blob: 6bb47603368ea07e9e65a8612f94cab539b2dc60 [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001#!env/bin/python3
2
Alex Tawsedaba3cf2023-09-29 15:55:38 +01003# SPDX-FileCopyrightText: Copyright 2021, 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
alexander3c798932021-03-26 21:42:19 +00004# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
Alex Tawsedaba3cf2023-09-29 15:55:38 +010017"""
18Utility functions for .cc + .hpp file generation
19"""
20import argparse
21import datetime
22from dataclasses import dataclass
23from pathlib import Path
alexander3c798932021-03-26 21:42:19 +000024
Alex Tawsedaba3cf2023-09-29 15:55:38 +010025import jinja2
alexander3c798932021-03-26 21:42:19 +000026import numpy as np
Alex Tawsedaba3cf2023-09-29 15:55:38 +010027import resampy
28import soundfile as sf
alexander3c798932021-03-26 21:42:19 +000029
30
Alex Tawsedaba3cf2023-09-29 15:55:38 +010031@dataclass
32class AudioSample:
33 """
34 Represents an audio sample with its sample rate
35 """
36 data: np.ndarray
37 sample_rate: int
38
39
40class GenUtils:
41 """
42 Class with utility functions for audio and other .cc + .hpp file generation
43 """
44
alexander3c798932021-03-26 21:42:19 +000045 @staticmethod
46 def res_data_type(res_type_value):
47 """
48 Returns the input string if is one of the valid resample type
49 """
Alex Tawsedaba3cf2023-09-29 15:55:38 +010050 if res_type_value not in GenUtils.res_type_list():
51 raise argparse.ArgumentTypeError(
52 f"{res_type_value} not valid. Supported only {GenUtils.res_type_list()}"
53 )
alexander3c798932021-03-26 21:42:19 +000054 return res_type_value
55
56 @staticmethod
57 def res_type_list():
58 """
59 Returns the resample type list
60 """
61 return ['kaiser_best', 'kaiser_fast']
62
63 @staticmethod
Alex Tawsedaba3cf2023-09-29 15:55:38 +010064 def read_audio_file(
65 path,
66 offset,
67 duration
68 ) -> AudioSample:
alexander3c798932021-03-26 21:42:19 +000069 """
Alex Tawsedaba3cf2023-09-29 15:55:38 +010070 Reads an audio file to an array
alexander3c798932021-03-26 21:42:19 +000071
Alex Tawsedaba3cf2023-09-29 15:55:38 +010072 @param path: Path to audio file
73 @param offset: Offset to read from
74 @param duration: Duration to read
75 @return: The audio data and the sample rate
alexander3c798932021-03-26 21:42:19 +000076 """
77 try:
78 with sf.SoundFile(path) as audio_file:
79 origin_sr = audio_file.samplerate
80
81 if offset:
82 # Seek to the start of the target read
83 audio_file.seek(int(offset * origin_sr))
84
85 if duration > 0:
86 num_frame_duration = int(duration * origin_sr)
87 else:
88 num_frame_duration = -1
89
90 # Load the target number of frames
91 y = audio_file.read(frames=num_frame_duration, dtype=np.float32, always_2d=False).T
Alex Tawsedaba3cf2023-09-29 15:55:38 +010092 except OSError as err:
alexander3c798932021-03-26 21:42:19 +000093 print(f"Failed to open {path} as an audio.")
Alex Tawsedaba3cf2023-09-29 15:55:38 +010094 raise err
95
96 return AudioSample(y, origin_sr)
97
98 @staticmethod
99 def _resample_audio(
100 y,
101 target_sr,
102 origin_sr,
103 res_type
104 ):
105 """
106 Resamples audio to a different sample rate
107
108 @param y: Audio to resample
109 @param target_sr: Target sample rate
110 @param origin_sr: Original sample rate
111 @param res_type: Resample type
112 @return: The resampled audio
113 """
114 ratio = float(target_sr) / origin_sr
115 axis = -1
116 n_samples = int(np.ceil(y.shape[axis] * ratio))
117
118 # Resample using resampy
119 y_rs = resampy.resample(y, origin_sr, target_sr, filter=res_type, axis=axis)
120 n_rs_samples = y_rs.shape[axis]
121
122 # Adjust the size
123 if n_rs_samples > n_samples:
124 slices = [slice(None)] * y_rs.ndim
125 slices[axis] = slice(0, n_samples)
126 y = y_rs[tuple(slices)]
127 elif n_rs_samples < n_samples:
128 lengths = [(0, 0)] * y_rs.ndim
129 lengths[axis] = (0, n_samples - n_rs_samples)
130 y = np.pad(y_rs, lengths, 'constant', constant_values=0)
131
132 return y
133
134 @staticmethod
135 def resample_audio_clip(
136 audio_sample: AudioSample,
137 target_sr=16000,
138 mono=True,
139 res_type='kaiser_best',
140 min_len=16000
141 ) -> AudioSample:
142 """
143 Load and resample an audio clip with the given desired specs.
144
145 Parameters:
146 ----------
147 path (string): Path to the input audio clip.
148 target_sr (int, optional): Target sampling rate. Positive number are considered valid,
149 if zero or negative the native sampling rate of the file
150 will be preserved. Default is 16000.
151 mono (bool, optional): Specify if the audio file needs to be converted to mono.
152 Default is True.
153 offset (float, optional): Target sampling rate. Default is 0.0.
154 duration (int, optional): Target duration. Positive number are considered valid,
155 if zero or negative the duration of the file
156 will be preserved. Default is 0.
157 res_type (int, optional): Resample type to use, Default is 'kaiser_best'.
158 min_len (int, optional): Minimum length of the output audio time series.
159 Default is 16000.
160
161 Returns:
162 ----------
163 y (np.ndarray): Output audio time series of shape=(n,) or (2, n).
164 sample_rate (int): A scalar number > 0 that represent the sampling rate of `y`
165 """
166 y = audio_sample.data.copy()
alexander3c798932021-03-26 21:42:19 +0000167
168 # Convert to mono if requested and if audio has more than one dimension
Alex Tawsedaba3cf2023-09-29 15:55:38 +0100169 if mono and (audio_sample.data.ndim > 1):
alexander3c798932021-03-26 21:42:19 +0000170 y = np.mean(y, axis=0)
171
Alex Tawsedaba3cf2023-09-29 15:55:38 +0100172 if not (audio_sample.sample_rate == target_sr) and (target_sr > 0):
173 y = GenUtils._resample_audio(y, target_sr, audio_sample.sample_rate, res_type)
174 sample_rate = target_sr
alexander3c798932021-03-26 21:42:19 +0000175 else:
Alex Tawsedaba3cf2023-09-29 15:55:38 +0100176 sample_rate = audio_sample.sample_rate
alexander3c798932021-03-26 21:42:19 +0000177
178 # Pad if necessary and min lenght is setted (min_len> 0)
179 if (y.shape[0] < min_len) and (min_len > 0):
180 sample_to_pad = min_len - y.shape[0]
Alex Tawsedaba3cf2023-09-29 15:55:38 +0100181 y = np.pad(y, (0, sample_to_pad), 'constant', constant_values=0)
alexander3c798932021-03-26 21:42:19 +0000182
Alex Tawsedaba3cf2023-09-29 15:55:38 +0100183 return AudioSample(data=y, sample_rate=sample_rate)
184
185 @staticmethod
186 def gen_header(
187 env: jinja2.Environment,
188 header_template_file: str,
189 file_name: str = None
190 ) -> str:
191 """
192 Generate common licence header
193
194 :param env: Jinja2 environment
195 :param header_template_file: Path to the licence header template
196 :param file_name: Optional generating script file name
197 :return: Generated licence header as a string
198 """
199 header_template = env.get_template(header_template_file)
200 return header_template.render(script_name=Path(__file__).name,
201 gen_time=datetime.datetime.now(),
202 file_name=file_name,
203 year=datetime.datetime.now().year)