Blame - python/pyarmnn/examples/speech_recognition/audio_capture.py - ml/armnn

blob: 9f28d1006e59c562074667aaefef0bb89111ed05 [file] [log] [blame]

Éanna Ó Catháin	145c88f	2020-11-16 14:12:11 +0000	[diff] [blame]	1	# Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
				2	# SPDX-License-Identifier: MIT
				3
				4	"""Contains AudioCapture class for capturing chunks of audio data from file."""
				5
				6	from typing import Generator
				7
				8	import numpy as np
				9	import soundfile as sf
				10
				11
				12	class ModelParams:
				13	def __init__(self, model_file_path: str):
				14	"""Defines sampling parameters for model used.
				15
				16	Args:
				17	model_file_path: Path to ASR model to use.
				18	"""
				19	self.path = model_file_path
				20	self.mono = True
				21	self.dtype = np.float32
				22	self.samplerate = 16000
				23	self.min_samples = 167392
				24
				25
				26	class AudioCapture:
				27	def __init__(self, model_params):
				28	"""Sampling parameters for model used."""
				29	self.model_params = model_params
				30
				31	def from_audio_file(self, audio_file_path, overlap=31712) -> Generator[np.ndarray, None, None]:
				32	"""Creates a generator that yields audio data from a file. Data is padded with
				33	zeros if necessary to make up minimum number of samples.
				34
				35	Args:
				36	audio_file_path: Path to audio file provided by user.
				37	overlap: The overlap with previous buffer. We need the offset to be the same as the inner context
				38	of the mfcc output, which is sized as 100 x 39. Each mfcc compute produces 1 x 39 vector,
				39	and consumes 160 audio samples. The default overlap is then calculated to be 47712 - (160 x 100)
				40	where 47712 is the min_samples needed for 1 inference of wav2letter.
				41
				42	Yields:
				43	Blocks of audio data of minimum sample size.
				44	"""
				45	with sf.SoundFile(audio_file_path) as audio_file:
				46	for block in audio_file.blocks(
				47	blocksize=self.model_params.min_samples,
				48	dtype=self.model_params.dtype,
				49	always_2d=True,
				50	fill_value=0,
				51	overlap=overlap
				52	):
				53	# Convert to mono if specified
				54	if self.model_params.mono and block.shape[0] > 1:
				55	block = np.mean(block, axis=1)
				56	yield block