Blame - scripts/py/gen_utils.py - ml/ethos-u/ml-embedded-evaluation-kit

blob: 6bb47603368ea07e9e65a8612f94cab539b2dc60 [file] [log] [blame]

alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	1	#!env/bin/python3
				2
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	3	# SPDX-FileCopyrightText: Copyright 2021, 2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	4	# SPDX-License-Identifier: Apache-2.0
				5	#
				6	# Licensed under the Apache License, Version 2.0 (the "License");
				7	# you may not use this file except in compliance with the License.
				8	# You may obtain a copy of the License at
				9	#
				10	# http://www.apache.org/licenses/LICENSE-2.0
				11	#
				12	# Unless required by applicable law or agreed to in writing, software
				13	# distributed under the License is distributed on an "AS IS" BASIS,
				14	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				15	# See the License for the specific language governing permissions and
				16	# limitations under the License.
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	17	"""
				18	Utility functions for .cc + .hpp file generation
				19	"""
				20	import argparse
				21	import datetime
				22	from dataclasses import dataclass
				23	from pathlib import Path
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	24
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	25	import jinja2
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	26	import numpy as np
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	27	import resampy
				28	import soundfile as sf
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	29
				30
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	31	@dataclass
				32	class AudioSample:
				33	"""
				34	Represents an audio sample with its sample rate
				35	"""
				36	data: np.ndarray
				37	sample_rate: int
				38
				39
				40	class GenUtils:
				41	"""
				42	Class with utility functions for audio and other .cc + .hpp file generation
				43	"""
				44
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	45	@staticmethod
				46	def res_data_type(res_type_value):
				47	"""
				48	Returns the input string if is one of the valid resample type
				49	"""
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	50	if res_type_value not in GenUtils.res_type_list():
				51	raise argparse.ArgumentTypeError(
				52	f"{res_type_value} not valid. Supported only {GenUtils.res_type_list()}"
				53	)
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	54	return res_type_value
				55
				56	@staticmethod
				57	def res_type_list():
				58	"""
				59	Returns the resample type list
				60	"""
				61	return ['kaiser_best', 'kaiser_fast']
				62
				63	@staticmethod
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	64	def read_audio_file(
				65	path,
				66	offset,
				67	duration
				68	) -> AudioSample:
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	69	"""
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	70	Reads an audio file to an array
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	71
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	72	@param path: Path to audio file
				73	@param offset: Offset to read from
				74	@param duration: Duration to read
				75	@return: The audio data and the sample rate
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	76	"""
				77	try:
				78	with sf.SoundFile(path) as audio_file:
				79	origin_sr = audio_file.samplerate
				80
				81	if offset:
				82	# Seek to the start of the target read
				83	audio_file.seek(int(offset * origin_sr))
				84
				85	if duration > 0:
				86	num_frame_duration = int(duration * origin_sr)
				87	else:
				88	num_frame_duration = -1
				89
				90	# Load the target number of frames
				91	y = audio_file.read(frames=num_frame_duration, dtype=np.float32, always_2d=False).T
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	92	except OSError as err:
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	93	print(f"Failed to open {path} as an audio.")
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	94	raise err
				95
				96	return AudioSample(y, origin_sr)
				97
				98	@staticmethod
				99	def _resample_audio(
				100	y,
				101	target_sr,
				102	origin_sr,
				103	res_type
				104	):
				105	"""
				106	Resamples audio to a different sample rate
				107
				108	@param y: Audio to resample
				109	@param target_sr: Target sample rate
				110	@param origin_sr: Original sample rate
				111	@param res_type: Resample type
				112	@return: The resampled audio
				113	"""
				114	ratio = float(target_sr) / origin_sr
				115	axis = -1
				116	n_samples = int(np.ceil(y.shape[axis] * ratio))
				117
				118	# Resample using resampy
				119	y_rs = resampy.resample(y, origin_sr, target_sr, filter=res_type, axis=axis)
				120	n_rs_samples = y_rs.shape[axis]
				121
				122	# Adjust the size
				123	if n_rs_samples > n_samples:
				124	slices = [slice(None)] * y_rs.ndim
				125	slices[axis] = slice(0, n_samples)
				126	y = y_rs[tuple(slices)]
				127	elif n_rs_samples < n_samples:
				128	lengths = [(0, 0)] * y_rs.ndim
				129	lengths[axis] = (0, n_samples - n_rs_samples)
				130	y = np.pad(y_rs, lengths, 'constant', constant_values=0)
				131
				132	return y
				133
				134	@staticmethod
				135	def resample_audio_clip(
				136	audio_sample: AudioSample,
				137	target_sr=16000,
				138	mono=True,
				139	res_type='kaiser_best',
				140	min_len=16000
				141	) -> AudioSample:
				142	"""
				143	Load and resample an audio clip with the given desired specs.
				144
				145	Parameters:
				146	----------
				147	path (string): Path to the input audio clip.
				148	target_sr (int, optional): Target sampling rate. Positive number are considered valid,
				149	if zero or negative the native sampling rate of the file
				150	will be preserved. Default is 16000.
				151	mono (bool, optional): Specify if the audio file needs to be converted to mono.
				152	Default is True.
				153	offset (float, optional): Target sampling rate. Default is 0.0.
				154	duration (int, optional): Target duration. Positive number are considered valid,
				155	if zero or negative the duration of the file
				156	will be preserved. Default is 0.
				157	res_type (int, optional): Resample type to use, Default is 'kaiser_best'.
				158	min_len (int, optional): Minimum length of the output audio time series.
				159	Default is 16000.
				160
				161	Returns:
				162	----------
				163	y (np.ndarray): Output audio time series of shape=(n,) or (2, n).
				164	sample_rate (int): A scalar number > 0 that represent the sampling rate of `y`
				165	"""
				166	y = audio_sample.data.copy()
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	167
				168	# Convert to mono if requested and if audio has more than one dimension
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	169	if mono and (audio_sample.data.ndim > 1):
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	170	y = np.mean(y, axis=0)
				171
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	172	if not (audio_sample.sample_rate == target_sr) and (target_sr > 0):
				173	y = GenUtils._resample_audio(y, target_sr, audio_sample.sample_rate, res_type)
				174	sample_rate = target_sr
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	175	else:
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	176	sample_rate = audio_sample.sample_rate
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	177
				178	# Pad if necessary and min lenght is setted (min_len> 0)
				179	if (y.shape[0] < min_len) and (min_len > 0):
				180	sample_to_pad = min_len - y.shape[0]
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	181	y = np.pad(y, (0, sample_to_pad), 'constant', constant_values=0)
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	182
Alex Tawse	daba3cf	2023-09-29 15:55:38 +0100	[diff] [blame]	183	return AudioSample(data=y, sample_rate=sample_rate)
				184
				185	@staticmethod
				186	def gen_header(
				187	env: jinja2.Environment,
				188	header_template_file: str,
				189	file_name: str = None
				190	) -> str:
				191	"""
				192	Generate common licence header
				193
				194	:param env: Jinja2 environment
				195	:param header_template_file: Path to the licence header template
				196	:param file_name: Optional generating script file name
				197	:return: Generated licence header as a string
				198	"""
				199	header_template = env.get_template(header_template_file)
				200	return header_template.render(script_name=Path(__file__).name,
				201	gen_time=datetime.datetime.now(),
				202	file_name=file_name,
				203	year=datetime.datetime.now().year)