alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 1 | #!env/bin/python3 |
| 2 | |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 3 | # SPDX-FileCopyrightText: Copyright 2021, 2023 Arm Limited and/or its affiliates <open-source-office@arm.com> |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 4 | # SPDX-License-Identifier: Apache-2.0 |
| 5 | # |
| 6 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | # you may not use this file except in compliance with the License. |
| 8 | # You may obtain a copy of the License at |
| 9 | # |
| 10 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | # |
| 12 | # Unless required by applicable law or agreed to in writing, software |
| 13 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | # See the License for the specific language governing permissions and |
| 16 | # limitations under the License. |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 17 | """ |
| 18 | Utility functions for .cc + .hpp file generation |
| 19 | """ |
| 20 | import argparse |
| 21 | import datetime |
| 22 | from dataclasses import dataclass |
| 23 | from pathlib import Path |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 24 | |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 25 | import jinja2 |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 26 | import numpy as np |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 27 | import resampy |
| 28 | import soundfile as sf |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 29 | |
| 30 | |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 31 | @dataclass |
| 32 | class AudioSample: |
| 33 | """ |
| 34 | Represents an audio sample with its sample rate |
| 35 | """ |
| 36 | data: np.ndarray |
| 37 | sample_rate: int |
| 38 | |
| 39 | |
| 40 | class GenUtils: |
| 41 | """ |
| 42 | Class with utility functions for audio and other .cc + .hpp file generation |
| 43 | """ |
| 44 | |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 45 | @staticmethod |
| 46 | def res_data_type(res_type_value): |
| 47 | """ |
| 48 | Returns the input string if is one of the valid resample type |
| 49 | """ |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 50 | if res_type_value not in GenUtils.res_type_list(): |
| 51 | raise argparse.ArgumentTypeError( |
| 52 | f"{res_type_value} not valid. Supported only {GenUtils.res_type_list()}" |
| 53 | ) |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 54 | return res_type_value |
| 55 | |
| 56 | @staticmethod |
| 57 | def res_type_list(): |
| 58 | """ |
| 59 | Returns the resample type list |
| 60 | """ |
| 61 | return ['kaiser_best', 'kaiser_fast'] |
| 62 | |
| 63 | @staticmethod |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 64 | def read_audio_file( |
| 65 | path, |
| 66 | offset, |
| 67 | duration |
| 68 | ) -> AudioSample: |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 69 | """ |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 70 | Reads an audio file to an array |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 71 | |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 72 | @param path: Path to audio file |
| 73 | @param offset: Offset to read from |
| 74 | @param duration: Duration to read |
| 75 | @return: The audio data and the sample rate |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 76 | """ |
| 77 | try: |
| 78 | with sf.SoundFile(path) as audio_file: |
| 79 | origin_sr = audio_file.samplerate |
| 80 | |
| 81 | if offset: |
| 82 | # Seek to the start of the target read |
| 83 | audio_file.seek(int(offset * origin_sr)) |
| 84 | |
| 85 | if duration > 0: |
| 86 | num_frame_duration = int(duration * origin_sr) |
| 87 | else: |
| 88 | num_frame_duration = -1 |
| 89 | |
| 90 | # Load the target number of frames |
| 91 | y = audio_file.read(frames=num_frame_duration, dtype=np.float32, always_2d=False).T |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 92 | except OSError as err: |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 93 | print(f"Failed to open {path} as an audio.") |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 94 | raise err |
| 95 | |
| 96 | return AudioSample(y, origin_sr) |
| 97 | |
| 98 | @staticmethod |
| 99 | def _resample_audio( |
| 100 | y, |
| 101 | target_sr, |
| 102 | origin_sr, |
| 103 | res_type |
| 104 | ): |
| 105 | """ |
| 106 | Resamples audio to a different sample rate |
| 107 | |
| 108 | @param y: Audio to resample |
| 109 | @param target_sr: Target sample rate |
| 110 | @param origin_sr: Original sample rate |
| 111 | @param res_type: Resample type |
| 112 | @return: The resampled audio |
| 113 | """ |
| 114 | ratio = float(target_sr) / origin_sr |
| 115 | axis = -1 |
| 116 | n_samples = int(np.ceil(y.shape[axis] * ratio)) |
| 117 | |
| 118 | # Resample using resampy |
| 119 | y_rs = resampy.resample(y, origin_sr, target_sr, filter=res_type, axis=axis) |
| 120 | n_rs_samples = y_rs.shape[axis] |
| 121 | |
| 122 | # Adjust the size |
| 123 | if n_rs_samples > n_samples: |
| 124 | slices = [slice(None)] * y_rs.ndim |
| 125 | slices[axis] = slice(0, n_samples) |
| 126 | y = y_rs[tuple(slices)] |
| 127 | elif n_rs_samples < n_samples: |
| 128 | lengths = [(0, 0)] * y_rs.ndim |
| 129 | lengths[axis] = (0, n_samples - n_rs_samples) |
| 130 | y = np.pad(y_rs, lengths, 'constant', constant_values=0) |
| 131 | |
| 132 | return y |
| 133 | |
| 134 | @staticmethod |
| 135 | def resample_audio_clip( |
| 136 | audio_sample: AudioSample, |
| 137 | target_sr=16000, |
| 138 | mono=True, |
| 139 | res_type='kaiser_best', |
| 140 | min_len=16000 |
| 141 | ) -> AudioSample: |
| 142 | """ |
| 143 | Load and resample an audio clip with the given desired specs. |
| 144 | |
| 145 | Parameters: |
| 146 | ---------- |
| 147 | path (string): Path to the input audio clip. |
| 148 | target_sr (int, optional): Target sampling rate. Positive number are considered valid, |
| 149 | if zero or negative the native sampling rate of the file |
| 150 | will be preserved. Default is 16000. |
| 151 | mono (bool, optional): Specify if the audio file needs to be converted to mono. |
| 152 | Default is True. |
| 153 | offset (float, optional): Target sampling rate. Default is 0.0. |
| 154 | duration (int, optional): Target duration. Positive number are considered valid, |
| 155 | if zero or negative the duration of the file |
| 156 | will be preserved. Default is 0. |
| 157 | res_type (int, optional): Resample type to use, Default is 'kaiser_best'. |
| 158 | min_len (int, optional): Minimum length of the output audio time series. |
| 159 | Default is 16000. |
| 160 | |
| 161 | Returns: |
| 162 | ---------- |
| 163 | y (np.ndarray): Output audio time series of shape=(n,) or (2, n). |
| 164 | sample_rate (int): A scalar number > 0 that represent the sampling rate of `y` |
| 165 | """ |
| 166 | y = audio_sample.data.copy() |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 167 | |
| 168 | # Convert to mono if requested and if audio has more than one dimension |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 169 | if mono and (audio_sample.data.ndim > 1): |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 170 | y = np.mean(y, axis=0) |
| 171 | |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 172 | if not (audio_sample.sample_rate == target_sr) and (target_sr > 0): |
| 173 | y = GenUtils._resample_audio(y, target_sr, audio_sample.sample_rate, res_type) |
| 174 | sample_rate = target_sr |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 175 | else: |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 176 | sample_rate = audio_sample.sample_rate |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 177 | |
| 178 | # Pad if necessary and min lenght is setted (min_len> 0) |
| 179 | if (y.shape[0] < min_len) and (min_len > 0): |
| 180 | sample_to_pad = min_len - y.shape[0] |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 181 | y = np.pad(y, (0, sample_to_pad), 'constant', constant_values=0) |
alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame] | 182 | |
Alex Tawse | daba3cf | 2023-09-29 15:55:38 +0100 | [diff] [blame] | 183 | return AudioSample(data=y, sample_rate=sample_rate) |
| 184 | |
| 185 | @staticmethod |
| 186 | def gen_header( |
| 187 | env: jinja2.Environment, |
| 188 | header_template_file: str, |
| 189 | file_name: str = None |
| 190 | ) -> str: |
| 191 | """ |
| 192 | Generate common licence header |
| 193 | |
| 194 | :param env: Jinja2 environment |
| 195 | :param header_template_file: Path to the licence header template |
| 196 | :param file_name: Optional generating script file name |
| 197 | :return: Generated licence header as a string |
| 198 | """ |
| 199 | header_template = env.get_template(header_template_file) |
| 200 | return header_template.render(script_name=Path(__file__).name, |
| 201 | gen_time=datetime.datetime.now(), |
| 202 | file_name=file_name, |
| 203 | year=datetime.datetime.now().year) |