Blame - python/pyarmnn/examples/speech_recognition/audio_utils.py - ml/armnn

blob: 1ac78e8074770cf831563ad266a13a9175d04030 [file] [log] [blame]

alexander	f42f568	2021-07-16 11:30:56 +0100	[diff] [blame]	1	# Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
Éanna Ó Catháin	145c88f	2020-11-16 14:12:11 +0000	[diff] [blame]	2	# SPDX-License-Identifier: MIT
				3
				4	"""Utilities for speech recognition apps."""
				5
				6	import numpy as np
Éanna Ó Catháin	145c88f	2020-11-16 14:12:11 +0000	[diff] [blame]	7
				8
				9	def decode(model_output: np.ndarray, labels: dict) -> str:
				10	"""Decodes the integer encoded results from inference into a string.
				11
				12	Args:
				13	model_output: Results from running inference.
				14	labels: Dictionary of labels keyed on the classification index.
				15
				16	Returns:
				17	Decoded string.
				18	"""
Nina Drozd	4018b21	2021-02-02 17:49:17 +0000	[diff] [blame]	19	top1_results = [labels[np.argmax(row)] for row in model_output]
Éanna Ó Catháin	145c88f	2020-11-16 14:12:11 +0000	[diff] [blame]	20	return filter_characters(top1_results)
				21
				22
				23	def filter_characters(results: list) -> str:
				24	"""Filters unwanted and duplicate characters.
				25
				26	Args:
				27	results: List of top 1 results from inference.
				28
				29	Returns:
				30	Final output string to present to user.
				31	"""
				32	text = ""
				33	for i in range(len(results)):
				34	if results[i] == "$":
				35	continue
				36	elif i + 1 < len(results) and results[i] == results[i + 1]:
				37	continue
				38	else:
				39	text += results[i]
				40	return text
				41
				42
				43	def display_text(text: str):
				44	"""Presents the results on the console.
				45
				46	Args:
				47	text: Results of performing ASR on the input audio data.
				48	"""
				49	print(text, sep="", end="", flush=True)
				50
				51
Éanna Ó Catháin	145c88f	2020-11-16 14:12:11 +0000	[diff] [blame]	52	def decode_text(is_first_window, labels, output_result):
				53	"""
				54	Slices the text appropriately depending on the window, and decodes for wav2letter output.
				55	* First run, take the left context, and inner context.
				56	* Every other run, take the inner context.
Nina Drozd	4018b21	2021-02-02 17:49:17 +0000	[diff] [blame]	57	Stores the current right context, and updates it for each inference. Will get used after last inference.
Éanna Ó Catháin	145c88f	2020-11-16 14:12:11 +0000	[diff] [blame]	58
				59	Args:
				60	is_first_window: Boolean to show if it is the first window we are running inference on
				61	labels: the label set
				62	output_result: the output from the inference
Éanna Ó Catháin	145c88f	2020-11-16 14:12:11 +0000	[diff] [blame]	63	Returns:
				64	current_r_context: the current right context
				65	text: the current text string, with the latest output decoded and appended
				66	"""
Nina Drozd	4018b21	2021-02-02 17:49:17 +0000	[diff] [blame]	67	# For wav2letter with 148 output steps:
				68	# Left context is index 0-48, inner context 49-99, right context 100-147
				69	inner_context_start = 49
				70	inner_context_end = 99
				71	right_context_start = 100
Éanna Ó Catháin	145c88f	2020-11-16 14:12:11 +0000	[diff] [blame]	72
				73	if is_first_window:
				74	# Since it's the first inference, keep the left context, and inner context, and decode
Nina Drozd	4018b21	2021-02-02 17:49:17 +0000	[diff] [blame]	75	text = decode(output_result[0][0][0][0:inner_context_end], labels)
Éanna Ó Catháin	145c88f	2020-11-16 14:12:11 +0000	[diff] [blame]	76	else:
				77	# Only decode the inner context
Nina Drozd	4018b21	2021-02-02 17:49:17 +0000	[diff] [blame]	78	text = decode(output_result[0][0][0][inner_context_start:inner_context_end], labels)
Éanna Ó Catháin	145c88f	2020-11-16 14:12:11 +0000	[diff] [blame]	79
				80	# Store the right context, we will need it after the last inference
Nina Drozd	4018b21	2021-02-02 17:49:17 +0000	[diff] [blame]	81	current_r_context = decode(output_result[0][0][0][right_context_start:], labels)
Éanna Ó Catháin	145c88f	2020-11-16 14:12:11 +0000	[diff] [blame]	82	return current_r_context, text