Blame - source/use_case/kws_asr/src/Wav2LetterMfcc.cc - ml/ethos-u/ml-embedded-evaluation-kit

blob: f2c50f3b41e7b01876ef3842acc011358f09ade0 [file] [log] [blame]

alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2021 Arm Limited. All rights reserved.
				3	* SPDX-License-Identifier: Apache-2.0
				4	*
				5	* Licensed under the Apache License, Version 2.0 (the "License");
				6	* you may not use this file except in compliance with the License.
				7	* You may obtain a copy of the License at
				8	*
				9	* http://www.apache.org/licenses/LICENSE-2.0
				10	*
				11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*/
				17	#include "Wav2LetterMfcc.hpp"
				18
				19	#include "PlatformMath.hpp"
alexander	31ae9f0	2022-02-10 16:15:54 +0000	[diff] [blame]	20	#include "log_macros.h"
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	21
				22	#include <cfloat>
				23
				24	namespace arm {
				25	namespace app {
				26	namespace audio {
				27
				28	bool Wav2LetterMFCC::ApplyMelFilterBank(
				29	std::vector<float>& fftVec,
				30	std::vector<std::vector<float>>& melFilterBank,
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	31	std::vector<uint32_t>& filterBankFilterFirst,
				32	std::vector<uint32_t>& filterBankFilterLast,
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	33	std::vector<float>& melEnergies)
				34	{
				35	const size_t numBanks = melEnergies.size();
				36
				37	if (numBanks != filterBankFilterFirst.size() \|\|
				38	numBanks != filterBankFilterLast.size()) {
				39	printf_err("unexpected filter bank lengths\n");
				40	return false;
				41	}
				42
				43	for (size_t bin = 0; bin < numBanks; ++bin) {
				44	auto filterBankIter = melFilterBank[bin].begin();
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	45	auto end = melFilterBank[bin].end();
				46	/* Avoid log of zero at later stages, same value used in librosa.
				47	* The number was used during our default wav2letter model training. */
				48	float melEnergy = 1e-10;
				49	const uint32_t firstIndex = filterBankFilterFirst[bin];
				50	const uint32_t lastIndex = std::min<uint32_t>(filterBankFilterLast[bin], fftVec.size() - 1);
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	51
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	52	for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; ++i) {
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	53	melEnergy += (filterBankIter++ fftVec[i]);
				54	}
				55
				56	melEnergies[bin] = melEnergy;
				57	}
				58
				59	return true;
				60	}
				61
				62	void Wav2LetterMFCC::ConvertToLogarithmicScale(
				63	std::vector<float>& melEnergies)
				64	{
				65	float maxMelEnergy = -FLT_MAX;
				66
				67	/* Container for natural logarithms of mel energies. */
				68	std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);
				69
				70	/* Because we are taking natural logs, we need to multiply by log10(e).
				71	* Also, for wav2letter model, we scale our log10 values by 10. */
				72	constexpr float multiplier = 10.0 * /* Default scalar. */
				73	0.4342944819032518; /* log10f(std::exp(1.0))*/
				74
				75	/* Take log of the whole vector. */
				76	math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);
				77
				78	/* Scale the log values and get the max. */
				79	for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	80	iterM != melEnergies.end() && iterL != vecLogEnergies.end(); ++iterM, ++iterL) {
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	81
				82	iterM = iterL * multiplier;
				83
				84	/* Save the max mel energy. */
				85	if (*iterM > maxMelEnergy) {
				86	maxMelEnergy = *iterM;
				87	}
				88	}
				89
				90	/* Clamp the mel energies. */
				91	constexpr float maxDb = 80.0;
				92	const float clampLevelLowdB = maxMelEnergy - maxDb;
alexander	c350cdc	2021-04-29 20:36:09 +0100	[diff] [blame]	93	for (float & melEnergie : melEnergies) {
				94	melEnergie = std::max(melEnergie, clampLevelLowdB);
alexander	3c79893	2021-03-26 21:42:19 +0000	[diff] [blame]	95	}
				96	}
				97
				98	std::vector<float> Wav2LetterMFCC::CreateDCTMatrix(
				99	const int32_t inputLength,
				100	const int32_t coefficientCount)
				101	{
				102	std::vector<float> dctMatix(inputLength * coefficientCount);
				103
				104	/* Orthonormal normalization. */
				105	const float normalizerK0 = 2 * math::MathUtils::SqrtF32(1.0f /
				106	static_cast<float>(4*inputLength));
				107	const float normalizer = 2 * math::MathUtils::SqrtF32(1.0f /
				108	static_cast<float>(2*inputLength));
				109
				110	const float angleIncr = M_PI/inputLength;
				111	float angle = angleIncr; /* We start using it at k = 1 loop. */
				112
				113	/* First row of DCT will use normalizer K0 */
				114	for (int32_t n = 0; n < inputLength; ++n) {
				115	dctMatix[n] = normalizerK0 /* cos(0) = 1 */;
				116	}
				117
				118	/* Second row (index = 1) onwards, we use standard normalizer. */
				119	for (int32_t k = 1, m = inputLength; k < coefficientCount; ++k, m += inputLength) {
				120	for (int32_t n = 0; n < inputLength; ++n) {
				121	dctMatix[m+n] = normalizer *
				122	math::MathUtils::CosineF32((n + 0.5f) * angle);
				123	}
				124	angle += angleIncr;
				125	}
				126	return dctMatix;
				127	}
				128
				129	float Wav2LetterMFCC::GetMelFilterBankNormaliser(
				130	const float& leftMel,
				131	const float& rightMel,
				132	const bool useHTKMethod)
				133	{
				134	/* Slaney normalization for mel weights. */
				135	return (2.0f / (MFCC::InverseMelScale(rightMel, useHTKMethod) -
				136	MFCC::InverseMelScale(leftMel, useHTKMethod)));
				137	}
				138
				139	} /* namespace audio */
				140	} /* namespace app */
				141	} /* namespace arm */