blob: 4a56646f107ad8951c588c9c0e0e0f435acd141d [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001#!env/bin/python3
2
3# Copyright (c) 2021 Arm Limited. All rights reserved.
4# SPDX-License-Identifier: Apache-2.0
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10# http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17
18import soundfile as sf
19import resampy
20import numpy as np
21
22
23class AudioUtils:
24 @staticmethod
25 def res_data_type(res_type_value):
26 """
27 Returns the input string if is one of the valid resample type
28 """
29 import argparse
30 if res_type_value not in AudioUtils.res_type_list():
31 raise argparse.ArgumentTypeError(f"{res_type_value} not valid. Supported only {AudioUtils.res_type_list()}")
32 return res_type_value
33
34 @staticmethod
35 def res_type_list():
36 """
37 Returns the resample type list
38 """
39 return ['kaiser_best', 'kaiser_fast']
40
41 @staticmethod
42 def load_resample_audio_clip(path, target_sr=16000, mono=True, offset=0.0, duration=0, res_type='kaiser_best',
43 min_len=16000):
44 """
45 Load and resample an audio clip with the given desired specs.
46
47 Parameters:
48 ----------
49 path (string): Path to the input audio clip.
50 target_sr (int, optional): Target sampling rate. Positive number are considered valid,
51 if zero or negative the native sampling rate of the file will be preserved. Default is 16000.
52 mono (bool, optional): Specify if the audio file needs to be converted to mono. Default is True.
53 offset (float, optional): Target sampling rate. Default is 0.0.
54 duration (int, optional): Target duration. Positive number are considered valid,
55 if zero or negative the duration of the file will be preserved. Default is 0.
56 res_type (int, optional): Resample type to use, Default is 'kaiser_best'.
57 min_len (int, optional): Minimun lenght of the output audio time series. Default is 16000.
58
59 Returns:
60 ----------
61 y (np.ndarray): Output audio time series of shape shape=(n,) or (2, n).
62 sr (int): A scalar number > 0 that represent the sampling rate of `y`
63 """
64 try:
65 with sf.SoundFile(path) as audio_file:
66 origin_sr = audio_file.samplerate
67
68 if offset:
69 # Seek to the start of the target read
70 audio_file.seek(int(offset * origin_sr))
71
72 if duration > 0:
73 num_frame_duration = int(duration * origin_sr)
74 else:
75 num_frame_duration = -1
76
77 # Load the target number of frames
78 y = audio_file.read(frames=num_frame_duration, dtype=np.float32, always_2d=False).T
79
80 except:
81 print(f"Failed to open {path} as an audio.")
82
83 # Convert to mono if requested and if audio has more than one dimension
84 if mono and (y.ndim > 1):
85 y = np.mean(y, axis=0)
86
87 if not (origin_sr == target_sr) and (target_sr > 0):
88 ratio = float(target_sr) / origin_sr
89 axis = -1
90 n_samples = int(np.ceil(y.shape[axis] * ratio))
91
92 # Resample using resampy
93 y_rs = resampy.resample(y, origin_sr, target_sr, filter=res_type, axis=axis)
94 n_rs_samples = y_rs.shape[axis]
95
96 # Adjust the size
97 if n_rs_samples > n_samples:
98 slices = [slice(None)] * y_rs.ndim
99 slices[axis] = slice(0, n_samples)
100 y = y_rs[tuple(slices)]
101 elif n_rs_samples < n_samples:
102 lengths = [(0, 0)] * y_rs.ndim
103 lengths[axis] = (0, n_samples - n_rs_samples)
104 y = np.pad(y_rs, lengths, 'constant', constant_values=(0))
105
106 sr = target_sr
107 else:
108 sr = origin_sr
109
110 # Pad if necessary and min lenght is setted (min_len> 0)
111 if (y.shape[0] < min_len) and (min_len > 0):
112 sample_to_pad = min_len - y.shape[0]
113 y = np.pad(y, (0, sample_to_pad), 'constant', constant_values=(0))
114
115 return y, sr