Blame - scripts/py/gen_utils.py - ml/ethos-u/ml-embedded-evaluation-kit

blob: 4a56646f107ad8951c588c9c0e0e0f435acd141d [file] [log] [blame]

alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame^]	1	#!env/bin/python3
				2
				3	# Copyright (c) 2021 Arm Limited. All rights reserved.
				4	# SPDX-License-Identifier: Apache-2.0
				5	#
				6	# Licensed under the Apache License, Version 2.0 (the "License");
				7	# you may not use this file except in compliance with the License.
				8	# You may obtain a copy of the License at
				9	#
				10	# http://www.apache.org/licenses/LICENSE-2.0
				11	#
				12	# Unless required by applicable law or agreed to in writing, software
				13	# distributed under the License is distributed on an "AS IS" BASIS,
				14	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				15	# See the License for the specific language governing permissions and
				16	# limitations under the License.
				17
				18	import soundfile as sf
				19	import resampy
				20	import numpy as np
				21
				22
				23	class AudioUtils:
				24	@staticmethod
				25	def res_data_type(res_type_value):
				26	"""
				27	Returns the input string if is one of the valid resample type
				28	"""
				29	import argparse
				30	if res_type_value not in AudioUtils.res_type_list():
				31	raise argparse.ArgumentTypeError(f"{res_type_value} not valid. Supported only {AudioUtils.res_type_list()}")
				32	return res_type_value
				33
				34	@staticmethod
				35	def res_type_list():
				36	"""
				37	Returns the resample type list
				38	"""
				39	return ['kaiser_best', 'kaiser_fast']
				40
				41	@staticmethod
				42	def load_resample_audio_clip(path, target_sr=16000, mono=True, offset=0.0, duration=0, res_type='kaiser_best',
				43	min_len=16000):
				44	"""
				45	Load and resample an audio clip with the given desired specs.
				46
				47	Parameters:
				48	----------
				49	path (string): Path to the input audio clip.
				50	target_sr (int, optional): Target sampling rate. Positive number are considered valid,
				51	if zero or negative the native sampling rate of the file will be preserved. Default is 16000.
				52	mono (bool, optional): Specify if the audio file needs to be converted to mono. Default is True.
				53	offset (float, optional): Target sampling rate. Default is 0.0.
				54	duration (int, optional): Target duration. Positive number are considered valid,
				55	if zero or negative the duration of the file will be preserved. Default is 0.
				56	res_type (int, optional): Resample type to use, Default is 'kaiser_best'.
				57	min_len (int, optional): Minimun lenght of the output audio time series. Default is 16000.
				58
				59	Returns:
				60	----------
				61	y (np.ndarray): Output audio time series of shape shape=(n,) or (2, n).
				62	sr (int): A scalar number > 0 that represent the sampling rate of `y`
				63	"""
				64	try:
				65	with sf.SoundFile(path) as audio_file:
				66	origin_sr = audio_file.samplerate
				67
				68	if offset:
				69	# Seek to the start of the target read
				70	audio_file.seek(int(offset * origin_sr))
				71
				72	if duration > 0:
				73	num_frame_duration = int(duration * origin_sr)
				74	else:
				75	num_frame_duration = -1
				76
				77	# Load the target number of frames
				78	y = audio_file.read(frames=num_frame_duration, dtype=np.float32, always_2d=False).T
				79
				80	except:
				81	print(f"Failed to open {path} as an audio.")
				82
				83	# Convert to mono if requested and if audio has more than one dimension
				84	if mono and (y.ndim > 1):
				85	y = np.mean(y, axis=0)
				86
				87	if not (origin_sr == target_sr) and (target_sr > 0):
				88	ratio = float(target_sr) / origin_sr
				89	axis = -1
				90	n_samples = int(np.ceil(y.shape[axis] * ratio))
				91
				92	# Resample using resampy
				93	y_rs = resampy.resample(y, origin_sr, target_sr, filter=res_type, axis=axis)
				94	n_rs_samples = y_rs.shape[axis]
				95
				96	# Adjust the size
				97	if n_rs_samples > n_samples:
				98	slices = [slice(None)] * y_rs.ndim
				99	slices[axis] = slice(0, n_samples)
				100	y = y_rs[tuple(slices)]
				101	elif n_rs_samples < n_samples:
				102	lengths = [(0, 0)] * y_rs.ndim
				103	lengths[axis] = (0, n_samples - n_rs_samples)
				104	y = np.pad(y_rs, lengths, 'constant', constant_values=(0))
				105
				106	sr = target_sr
				107	else:
				108	sr = origin_sr
				109
				110	# Pad if necessary and min lenght is setted (min_len> 0)
				111	if (y.shape[0] < min_len) and (min_len > 0):
				112	sample_to_pad = min_len - y.shape[0]
				113	y = np.pad(y, (0, sample_to_pad), 'constant', constant_values=(0))
				114
				115	return y, sr