Éanna Ó Catháin | 145c88f | 2020-11-16 14:12:11 +0000 | [diff] [blame] | 1 | # Copyright © 2020 Arm Ltd and Contributors. All rights reserved. |
| 2 | # SPDX-License-Identifier: MIT |
| 3 | |
| 4 | """Contains AudioCapture class for capturing chunks of audio data from file.""" |
| 5 | |
| 6 | from typing import Generator |
| 7 | |
| 8 | import numpy as np |
| 9 | import soundfile as sf |
| 10 | |
| 11 | |
| 12 | class ModelParams: |
| 13 | def __init__(self, model_file_path: str): |
| 14 | """Defines sampling parameters for model used. |
| 15 | |
| 16 | Args: |
| 17 | model_file_path: Path to ASR model to use. |
| 18 | """ |
| 19 | self.path = model_file_path |
| 20 | self.mono = True |
| 21 | self.dtype = np.float32 |
| 22 | self.samplerate = 16000 |
| 23 | self.min_samples = 167392 |
| 24 | |
| 25 | |
| 26 | class AudioCapture: |
| 27 | def __init__(self, model_params): |
| 28 | """Sampling parameters for model used.""" |
| 29 | self.model_params = model_params |
| 30 | |
| 31 | def from_audio_file(self, audio_file_path, overlap=31712) -> Generator[np.ndarray, None, None]: |
| 32 | """Creates a generator that yields audio data from a file. Data is padded with |
| 33 | zeros if necessary to make up minimum number of samples. |
| 34 | |
| 35 | Args: |
| 36 | audio_file_path: Path to audio file provided by user. |
| 37 | overlap: The overlap with previous buffer. We need the offset to be the same as the inner context |
| 38 | of the mfcc output, which is sized as 100 x 39. Each mfcc compute produces 1 x 39 vector, |
| 39 | and consumes 160 audio samples. The default overlap is then calculated to be 47712 - (160 x 100) |
| 40 | where 47712 is the min_samples needed for 1 inference of wav2letter. |
| 41 | |
| 42 | Yields: |
| 43 | Blocks of audio data of minimum sample size. |
| 44 | """ |
| 45 | with sf.SoundFile(audio_file_path) as audio_file: |
| 46 | for block in audio_file.blocks( |
| 47 | blocksize=self.model_params.min_samples, |
| 48 | dtype=self.model_params.dtype, |
| 49 | always_2d=True, |
| 50 | fill_value=0, |
| 51 | overlap=overlap |
| 52 | ): |
| 53 | # Convert to mono if specified |
| 54 | if self.model_params.mono and block.shape[0] > 1: |
| 55 | block = np.mean(block, axis=1) |
| 56 | yield block |