blob: e2d293001f1c04138f11ee1db21193ea7aadcb53 [file] [log] [blame]
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +01001//
George Gekov23c26272021-08-16 11:32:10 +01002// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +01003// SPDX-License-Identifier: MIT
4//
5#include <iostream>
6#include <map>
7#include <vector>
8#include <algorithm>
9#include <cmath>
10
11#include "CmdArgsParser.hpp"
12#include "ArmnnNetworkExecutor.hpp"
13#include "AudioCapture.hpp"
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010014#include "SpeechRecognitionPipeline.hpp"
George Gekov23c26272021-08-16 11:32:10 +010015#include "Wav2LetterMFCC.hpp"
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010016
17using InferenceResult = std::vector<int8_t>;
18using InferenceResults = std::vector<InferenceResult>;
19
20const std::string AUDIO_FILE_PATH = "--audio-file-path";
21const std::string MODEL_FILE_PATH = "--model-file-path";
22const std::string LABEL_PATH = "--label-path";
23const std::string PREFERRED_BACKENDS = "--preferred-backends";
24const std::string HELP = "--help";
25
George Gekov23c26272021-08-16 11:32:10 +010026std::map<int, std::string> labels =
27{
28 {0, "a"},
29 {1, "b"},
30 {2, "c"},
31 {3, "d"},
32 {4, "e"},
33 {5, "f"},
34 {6, "g"},
35 {7, "h"},
36 {8, "i"},
37 {9, "j"},
38 {10, "k"},
39 {11, "l"},
40 {12, "m"},
41 {13, "n"},
42 {14, "o"},
43 {15, "p"},
44 {16, "q"},
45 {17, "r"},
46 {18, "s"},
47 {19, "t"},
48 {20, "u"},
49 {21, "v"},
50 {22, "w"},
51 {23, "x"},
52 {24, "y"},
53 {25, "z"},
54 {26, "\'"},
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010055 {27, " "},
George Gekov23c26272021-08-16 11:32:10 +010056 {28, "$"}
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010057};
58
59/*
60 * The accepted options for this Speech Recognition executable
61 */
George Gekov23c26272021-08-16 11:32:10 +010062static std::map<std::string, std::string> CMD_OPTIONS =
63{
64 {AUDIO_FILE_PATH, "[REQUIRED] Path to the Audio file to run speech recognition on"},
65 {MODEL_FILE_PATH, "[REQUIRED] Path to the Speech Recognition model to use"},
66 {PREFERRED_BACKENDS, "[OPTIONAL] Takes the preferred backends in preference order, separated by comma."
67 " For example: CpuAcc,GpuAcc,CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]."
68 " Defaults to CpuAcc,CpuRef"}
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010069};
70
71/*
72 * Reads the user supplied backend preference, splits it by comma, and returns an ordered vector
73 */
George Gekov23c26272021-08-16 11:32:10 +010074std::vector<armnn::BackendId> GetPreferredBackendList(const std::string& preferredBackends)
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010075{
76 std::vector<armnn::BackendId> backends;
77 std::stringstream ss(preferredBackends);
78
George Gekov23c26272021-08-16 11:32:10 +010079 while (ss.good())
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010080 {
81 std::string backend;
George Gekov23c26272021-08-16 11:32:10 +010082 std::getline(ss, backend, ',');
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010083 backends.emplace_back(backend);
84 }
85 return backends;
86}
87
George Gekov23c26272021-08-16 11:32:10 +010088int main(int argc, char* argv[])
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010089{
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010090 bool isFirstWindow = true;
George Gekov23c26272021-08-16 11:32:10 +010091 std::string currentRContext = "";
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010092
George Gekov23c26272021-08-16 11:32:10 +010093 std::map<std::string, std::string> options;
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010094
95 int result = ParseOptions(options, CMD_OPTIONS, argv, argc);
George Gekov23c26272021-08-16 11:32:10 +010096 if (result != 0)
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +010097 {
98 return result;
99 }
100
101 // Create the network options
102 common::PipelineOptions pipelineOptions;
103 pipelineOptions.m_ModelFilePath = GetSpecifiedOption(options, MODEL_FILE_PATH);
George Gekov23c26272021-08-16 11:32:10 +0100104 pipelineOptions.m_ModelName = "Wav2Letter";
105 if (CheckOptionSpecified(options, PREFERRED_BACKENDS))
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100106 {
107 pipelineOptions.m_backends = GetPreferredBackendList((GetSpecifiedOption(options, PREFERRED_BACKENDS)));
George Gekov23c26272021-08-16 11:32:10 +0100108 }
109 else
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100110 {
111 pipelineOptions.m_backends = {"CpuAcc", "CpuRef"};
112 }
113
114 asr::IPipelinePtr asrPipeline = asr::CreatePipeline(pipelineOptions, labels);
115
George Gekov23c26272021-08-16 11:32:10 +0100116 audio::AudioCapture capture;
117 std::vector<float> audioData = audio::AudioCapture::LoadAudioFile(GetSpecifiedOption(options, AUDIO_FILE_PATH));
118 capture.InitSlidingWindow(audioData.data(), audioData.size(), asrPipeline->getInputSamplesSize(),
119 asrPipeline->getSlidingWindowOffset());
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100120
George Gekov23c26272021-08-16 11:32:10 +0100121 while (capture.HasNext())
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100122 {
123 std::vector<float> audioBlock = capture.Next();
124 InferenceResults results;
125
George Gekov23c26272021-08-16 11:32:10 +0100126 std::vector<int8_t> preprocessedData = asrPipeline->PreProcessing(audioBlock);
Éanna Ó Catháinc6ab02a2021-04-07 14:35:25 +0100127 asrPipeline->Inference<int8_t>(preprocessedData, results);
128 asrPipeline->PostProcessing<int8_t>(results, isFirstWindow, !capture.HasNext(), currentRContext);
129 }
130
131 return 0;
132}