blob: 14b9323ad583aece6b43392686b54f8692bdccf3 [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
2 * Copyright (c) 2021 Arm Limited. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "AdMelSpectrogram.hpp"
alexander3c798932021-03-26 21:42:19 +000018#include "PlatformMath.hpp"
alexander31ae9f02022-02-10 16:15:54 +000019#include "log_macros.h"
alexander3c798932021-03-26 21:42:19 +000020
alexanderc350cdc2021-04-29 20:36:09 +010021#include <cfloat>
22
alexander3c798932021-03-26 21:42:19 +000023namespace arm {
24namespace app {
25namespace audio {
26
27 bool AdMelSpectrogram::ApplyMelFilterBank(
28 std::vector<float>& fftVec,
29 std::vector<std::vector<float>>& melFilterBank,
alexanderc350cdc2021-04-29 20:36:09 +010030 std::vector<uint32_t>& filterBankFilterFirst,
31 std::vector<uint32_t>& filterBankFilterLast,
alexander3c798932021-03-26 21:42:19 +000032 std::vector<float>& melEnergies)
33 {
34 const size_t numBanks = melEnergies.size();
35
36 if (numBanks != filterBankFilterFirst.size() ||
37 numBanks != filterBankFilterLast.size()) {
38 printf_err("unexpected filter bank lengths\n");
39 return false;
40 }
41
42 for (size_t bin = 0; bin < numBanks; ++bin) {
43 auto filterBankIter = melFilterBank[bin].begin();
alexanderc350cdc2021-04-29 20:36:09 +010044 auto end = melFilterBank[bin].end();
45 float melEnergy = FLT_MIN; /* Avoid log of zero at later stages. */
46 const uint32_t firstIndex = filterBankFilterFirst[bin];
47 const uint32_t lastIndex = std::min<int32_t>(filterBankFilterLast[bin], fftVec.size() - 1);
alexander3c798932021-03-26 21:42:19 +000048
alexanderc350cdc2021-04-29 20:36:09 +010049 for (uint32_t i = firstIndex; i <= lastIndex && filterBankIter != end; ++i) {
alexander3c798932021-03-26 21:42:19 +000050 melEnergy += (*filterBankIter++ * fftVec[i]);
51 }
52
53 melEnergies[bin] = melEnergy;
54 }
55
56 return true;
57 }
58
59 void AdMelSpectrogram::ConvertToLogarithmicScale(
60 std::vector<float>& melEnergies)
61 {
62 /* Container for natural logarithms of mel energies */
63 std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);
64
65 /* Because we are taking natural logs, we need to multiply by log10(e).
66 * Also, for wav2letter model, we scale our log10 values by 10 */
67 constexpr float multiplier = 10.0 * /* default scalar */
68 0.4342944819032518; /* log10f(std::exp(1.0))*/
69
70 /* Take log of the whole vector */
71 math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);
72
73 /* Scale the log values. */
74 for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
alexanderc350cdc2021-04-29 20:36:09 +010075 iterM != melEnergies.end() && iterL != vecLogEnergies.end(); ++iterM, ++iterL) {
alexander3c798932021-03-26 21:42:19 +000076
77 *iterM = *iterL * multiplier;
78 }
79 }
80
81 float AdMelSpectrogram::GetMelFilterBankNormaliser(
82 const float& leftMel,
83 const float& rightMel,
84 const bool useHTKMethod)
85 {
86 /* Slaney normalization for mel weights. */
87 return (2.0f / (AdMelSpectrogram::InverseMelScale(rightMel, useHTKMethod) -
88 AdMelSpectrogram::InverseMelScale(leftMel, useHTKMethod)));
89 }
90
91} /* namespace audio */
92} /* namespace app */
93} /* namespace arm */