Éanna Ó Catháin | c6ab02a | 2021-04-07 14:35:25 +0100 | [diff] [blame] | 1 | // |
| 2 | // Copyright © 2020 Arm Ltd and Contributors. All rights reserved. |
| 3 | // SPDX-License-Identifier: MIT |
| 4 | // |
| 5 | |
| 6 | #include "SpeechRecognitionPipeline.hpp" |
| 7 | #include "ArmnnNetworkExecutor.hpp" |
| 8 | |
George Gekov | 23c2627 | 2021-08-16 11:32:10 +0100 | [diff] [blame^] | 9 | namespace asr |
Éanna Ó Catháin | c6ab02a | 2021-04-07 14:35:25 +0100 | [diff] [blame] | 10 | { |
George Gekov | 23c2627 | 2021-08-16 11:32:10 +0100 | [diff] [blame^] | 11 | |
Éanna Ó Catháin | c6ab02a | 2021-04-07 14:35:25 +0100 | [diff] [blame] | 12 | ASRPipeline::ASRPipeline(std::unique_ptr<common::ArmnnNetworkExecutor<int8_t>> executor, |
George Gekov | 23c2627 | 2021-08-16 11:32:10 +0100 | [diff] [blame^] | 13 | std::unique_ptr<Decoder> decoder, std::unique_ptr<Wav2LetterPreprocessor> preProcessor) : |
Éanna Ó Catháin | c6ab02a | 2021-04-07 14:35:25 +0100 | [diff] [blame] | 14 | m_executor(std::move(executor)), |
George Gekov | 23c2627 | 2021-08-16 11:32:10 +0100 | [diff] [blame^] | 15 | m_decoder(std::move(decoder)), m_preProcessor(std::move(preProcessor)) {} |
Éanna Ó Catháin | c6ab02a | 2021-04-07 14:35:25 +0100 | [diff] [blame] | 16 | |
George Gekov | 23c2627 | 2021-08-16 11:32:10 +0100 | [diff] [blame^] | 17 | int ASRPipeline::getInputSamplesSize() |
Éanna Ó Catháin | c6ab02a | 2021-04-07 14:35:25 +0100 | [diff] [blame] | 18 | { |
George Gekov | 23c2627 | 2021-08-16 11:32:10 +0100 | [diff] [blame^] | 19 | return this->m_preProcessor->m_windowLen + |
| 20 | ((this->m_preProcessor->m_mfcc->m_params.m_numMfccVectors - 1) * this->m_preProcessor->m_windowStride); |
| 21 | } |
Éanna Ó Catháin | c6ab02a | 2021-04-07 14:35:25 +0100 | [diff] [blame] | 22 | |
George Gekov | 23c2627 | 2021-08-16 11:32:10 +0100 | [diff] [blame^] | 23 | int ASRPipeline::getSlidingWindowOffset() |
| 24 | { |
| 25 | // Hardcoded for now until refactor |
| 26 | return ASRPipeline::SLIDING_WINDOW_OFFSET; |
| 27 | } |
Éanna Ó Catháin | c6ab02a | 2021-04-07 14:35:25 +0100 | [diff] [blame] | 28 | |
George Gekov | 23c2627 | 2021-08-16 11:32:10 +0100 | [diff] [blame^] | 29 | std::vector<int8_t> ASRPipeline::PreProcessing(std::vector<float>& audio) |
| 30 | { |
| 31 | int audioDataToPreProcess = m_preProcessor->m_windowLen + |
| 32 | ((m_preProcessor->m_mfcc->m_params.m_numMfccVectors - 1) * |
| 33 | m_preProcessor->m_windowStride); |
| 34 | int outputBufferSize = m_preProcessor->m_mfcc->m_params.m_numMfccVectors |
| 35 | * m_preProcessor->m_mfcc->m_params.m_numMfccFeatures * 3; |
| 36 | std::vector<int8_t> outputBuffer(outputBufferSize); |
| 37 | m_preProcessor->Invoke(audio.data(), audioDataToPreProcess, outputBuffer, m_executor->GetQuantizationOffset(), |
| 38 | m_executor->GetQuantizationScale()); |
| 39 | return outputBuffer; |
| 40 | } |
| 41 | |
| 42 | IPipelinePtr CreatePipeline(common::PipelineOptions& config, std::map<int, std::string>& labels) |
| 43 | { |
| 44 | if (config.m_ModelName == "Wav2Letter") |
| 45 | { |
| 46 | // Wav2Letter ASR SETTINGS |
| 47 | int SAMP_FREQ = 16000; |
| 48 | int FRAME_LEN_MS = 32; |
| 49 | int FRAME_LEN_SAMPLES = SAMP_FREQ * FRAME_LEN_MS * 0.001; |
| 50 | int NUM_MFCC_FEATS = 13; |
| 51 | int MFCC_WINDOW_LEN = 512; |
| 52 | int MFCC_WINDOW_STRIDE = 160; |
| 53 | const int NUM_MFCC_VECTORS = 296; |
| 54 | int SAMPLES_PER_INFERENCE = MFCC_WINDOW_LEN + ((NUM_MFCC_VECTORS - 1) * MFCC_WINDOW_STRIDE); |
| 55 | int MEL_LO_FREQ = 0; |
| 56 | int MEL_HI_FREQ = 8000; |
| 57 | int NUM_FBANK_BIN = 128; |
| 58 | int INPUT_WINDOW_LEFT_CONTEXT = 98; |
| 59 | int INPUT_WINDOW_RIGHT_CONTEXT = 98; |
| 60 | int INPUT_WINDOW_INNER_CONTEXT = NUM_MFCC_VECTORS - |
| 61 | (INPUT_WINDOW_LEFT_CONTEXT + INPUT_WINDOW_RIGHT_CONTEXT); |
| 62 | int SLIDING_WINDOW_OFFSET = INPUT_WINDOW_INNER_CONTEXT * MFCC_WINDOW_STRIDE; |
| 63 | |
| 64 | |
| 65 | MfccParams mfccParams(SAMP_FREQ, NUM_FBANK_BIN, |
| 66 | MEL_LO_FREQ, MEL_HI_FREQ, NUM_MFCC_FEATS, FRAME_LEN_SAMPLES, false, NUM_MFCC_VECTORS); |
| 67 | |
| 68 | std::unique_ptr<Wav2LetterMFCC> mfccInst = std::make_unique<Wav2LetterMFCC>(mfccParams); |
| 69 | |
| 70 | auto executor = std::make_unique<common::ArmnnNetworkExecutor<int8_t>>(config.m_ModelFilePath, |
| 71 | config.m_backends); |
| 72 | |
| 73 | auto decoder = std::make_unique<asr::Decoder>(labels); |
| 74 | |
| 75 | auto preprocessor = std::make_unique<Wav2LetterPreprocessor>(MFCC_WINDOW_LEN, MFCC_WINDOW_STRIDE, |
| 76 | std::move(mfccInst)); |
| 77 | |
| 78 | auto ptr = std::make_unique<asr::ASRPipeline>( |
| 79 | std::move(executor), std::move(decoder), std::move(preprocessor)); |
| 80 | |
| 81 | ptr->SLIDING_WINDOW_OFFSET = SLIDING_WINDOW_OFFSET; |
| 82 | |
| 83 | return ptr; |
| 84 | } |
| 85 | else |
| 86 | { |
| 87 | throw std::invalid_argument("Unknown Model name: " + config.m_ModelName + " ."); |
| 88 | } |
Éanna Ó Catháin | c6ab02a | 2021-04-07 14:35:25 +0100 | [diff] [blame] | 89 | } |
| 90 | |
| 91 | }// namespace asr |