Blame - samples/SpeechRecognition/src/Main.cpp - ml/armnn

blob: de37e23b404644f88403c2cdf810f7fd060d7492 [file] [log] [blame]

Éanna Ó Catháin	c6ab02a	2021-04-07 14:35:25 +0100	[diff] [blame]	1	//
				2	// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5	#include <iostream>
				6	#include <map>
				7	#include <vector>
				8	#include <algorithm>
				9	#include <cmath>
				10
				11	#include "CmdArgsParser.hpp"
				12	#include "ArmnnNetworkExecutor.hpp"
				13	#include "AudioCapture.hpp"
				14	#include "Preprocess.hpp"
				15	#include "Decoder.hpp"
				16	#include "SpeechRecognitionPipeline.hpp"
				17
				18
				19	using InferenceResult = std::vector<int8_t>;
				20	using InferenceResults = std::vector<InferenceResult>;
				21
				22	const std::string AUDIO_FILE_PATH = "--audio-file-path";
				23	const std::string MODEL_FILE_PATH = "--model-file-path";
				24	const std::string LABEL_PATH = "--label-path";
				25	const std::string PREFERRED_BACKENDS = "--preferred-backends";
				26	const std::string HELP = "--help";
				27
				28	std::map<int, std::string> labels = {
				29	{0, "a" },
				30	{1, "b" },
				31	{2, "c" },
				32	{3, "d" },
				33	{4, "e" },
				34	{5, "f" },
				35	{6, "g" },
				36	{7, "h" },
				37	{8, "i" },
				38	{9, "j" },
				39	{10,"k" },
				40	{11,"l" },
				41	{12,"m" },
				42	{13,"n" },
				43	{14,"o" },
				44	{15,"p" },
				45	{16,"q" },
				46	{17,"r" },
				47	{18,"s" },
				48	{19,"t" },
				49	{20,"u" },
				50	{21,"v" },
				51	{22,"w" },
				52	{23,"x" },
				53	{24,"y" },
				54	{25,"z" },
				55	{26, "\'" },
				56	{27, " "},
				57	{28,"$" }
				58	};
				59
				60	/*
				61	* The accepted options for this Speech Recognition executable
				62	*/
				63	static std::map<std::string, std::string> CMD_OPTIONS = {
				64	{AUDIO_FILE_PATH, "[REQUIRED] Path to the Audio file to run speech recognition on"},
				65	{MODEL_FILE_PATH, "[REQUIRED] Path to the Speech Recognition model to use"},
				66	{PREFERRED_BACKENDS, "[OPTIONAL] Takes the preferred backends in preference order, separated by comma."
				67	" For example: CpuAcc,GpuAcc,CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]."
				68	" Defaults to CpuAcc,CpuRef"}
				69	};
				70
				71	/*
				72	* Reads the user supplied backend preference, splits it by comma, and returns an ordered vector
				73	*/
				74	std::vector<armnn::BackendId> GetPreferredBackendList(const std::string& preferredBackends)
				75	{
				76	std::vector<armnn::BackendId> backends;
				77	std::stringstream ss(preferredBackends);
				78
				79	while(ss.good())
				80	{
				81	std::string backend;
				82	std::getline( ss, backend, ',' );
				83	backends.emplace_back(backend);
				84	}
				85	return backends;
				86	}
				87
				88	int main(int argc, char *argv[])
				89	{
				90	// Wav2Letter ASR SETTINGS
				91	int SAMP_FREQ = 16000;
				92	int FRAME_LEN_MS = 32;
				93	int FRAME_LEN_SAMPLES = SAMP_FREQ * FRAME_LEN_MS * 0.001;
				94	int NUM_MFCC_FEATS = 13;
				95	int MFCC_WINDOW_LEN = 512;
				96	int MFCC_WINDOW_STRIDE = 160;
				97	const int NUM_MFCC_VECTORS = 296;
				98	int SAMPLES_PER_INFERENCE = MFCC_WINDOW_LEN + ((NUM_MFCC_VECTORS -1) * MFCC_WINDOW_STRIDE);
				99	int MEL_LO_FREQ = 0;
				100	int MEL_HI_FREQ = 8000;
				101	int NUM_FBANK_BIN = 128;
				102	int INPUT_WINDOW_LEFT_CONTEXT = 98;
				103	int INPUT_WINDOW_RIGHT_CONTEXT = 98;
				104	int INPUT_WINDOW_INNER_CONTEXT = NUM_MFCC_VECTORS -
				105	(INPUT_WINDOW_LEFT_CONTEXT + INPUT_WINDOW_RIGHT_CONTEXT);
				106	int SLIDING_WINDOW_OFFSET = INPUT_WINDOW_INNER_CONTEXT * MFCC_WINDOW_STRIDE;
				107
				108
				109	MfccParams mfccParams(SAMP_FREQ, NUM_FBANK_BIN,
				110	MEL_LO_FREQ, MEL_HI_FREQ, NUM_MFCC_FEATS, FRAME_LEN_SAMPLES, false, NUM_MFCC_VECTORS);
				111
				112	MFCC mfccInst = MFCC(mfccParams);
				113
				114	Preprocess preprocessor(MFCC_WINDOW_LEN, MFCC_WINDOW_STRIDE, mfccInst);
				115
				116	bool isFirstWindow = true;
				117	std::string currentRContext = "";
				118
				119	std::map <std::string, std::string> options;
				120
				121	int result = ParseOptions(options, CMD_OPTIONS, argv, argc);
				122	if (result != 0)
				123	{
				124	return result;
				125	}
				126
				127	// Create the network options
				128	common::PipelineOptions pipelineOptions;
				129	pipelineOptions.m_ModelFilePath = GetSpecifiedOption(options, MODEL_FILE_PATH);
				130
				131	if (CheckOptionSpecified(options, PREFERRED_BACKENDS))
				132	{
				133	pipelineOptions.m_backends = GetPreferredBackendList((GetSpecifiedOption(options, PREFERRED_BACKENDS)));
				134	}
				135	else
				136	{
				137	pipelineOptions.m_backends = {"CpuAcc", "CpuRef"};
				138	}
				139
				140	asr::IPipelinePtr asrPipeline = asr::CreatePipeline(pipelineOptions, labels);
				141
				142	asr::AudioCapture capture;
				143	std::vector<float> audioData = capture.LoadAudioFile(GetSpecifiedOption(options, AUDIO_FILE_PATH));
				144	capture.InitSlidingWindow(audioData.data(), audioData.size(), SAMPLES_PER_INFERENCE, SLIDING_WINDOW_OFFSET);
				145
				146	while (capture.HasNext())
				147	{
				148	std::vector<float> audioBlock = capture.Next();
				149	InferenceResults results;
				150
				151	std::vector<int8_t> preprocessedData = asrPipeline->PreProcessing<float, int8_t>(audioBlock, preprocessor);
				152	asrPipeline->Inference<int8_t>(preprocessedData, results);
				153	asrPipeline->PostProcessing<int8_t>(results, isFirstWindow, !capture.HasNext(), currentRContext);
				154	}
				155
				156	return 0;
				157	}