blob: 80e4a2660ebbd23a5e2c96f9c46006f8b7f05ff6 [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
2 * Copyright (c) 2021 Arm Limited. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "Wav2LetterMfcc.hpp"
18
19#include "PlatformMath.hpp"
20
21#include <cfloat>
22
23namespace arm {
24namespace app {
25namespace audio {
26
27 bool Wav2LetterMFCC::ApplyMelFilterBank(
28 std::vector<float>& fftVec,
29 std::vector<std::vector<float>>& melFilterBank,
30 std::vector<int32_t>& filterBankFilterFirst,
31 std::vector<int32_t>& filterBankFilterLast,
32 std::vector<float>& melEnergies)
33 {
34 const size_t numBanks = melEnergies.size();
35
36 if (numBanks != filterBankFilterFirst.size() ||
37 numBanks != filterBankFilterLast.size()) {
38 printf_err("unexpected filter bank lengths\n");
39 return false;
40 }
41
42 for (size_t bin = 0; bin < numBanks; ++bin) {
43 auto filterBankIter = melFilterBank[bin].begin();
44 float melEnergy = 1e-10; /* Avoid log of zero at later stages, same value used in librosa. */
45 const int32_t firstIndex = filterBankFilterFirst[bin];
46 const int32_t lastIndex = filterBankFilterLast[bin];
47
48 for (int32_t i = firstIndex; i <= lastIndex; ++i) {
49 melEnergy += (*filterBankIter++ * fftVec[i]);
50 }
51
52 melEnergies[bin] = melEnergy;
53 }
54
55 return true;
56 }
57
58 void Wav2LetterMFCC::ConvertToLogarithmicScale(
59 std::vector<float>& melEnergies)
60 {
61 float maxMelEnergy = -FLT_MAX;
62
63 /* Container for natural logarithms of mel energies. */
64 std::vector <float> vecLogEnergies(melEnergies.size(), 0.f);
65
66 /* Because we are taking natural logs, we need to multiply by log10(e).
67 * Also, for wav2letter model, we scale our log10 values by 10. */
68 constexpr float multiplier = 10.0 * /* Default scalar. */
69 0.4342944819032518; /* log10f(std::exp(1.0))*/
70
71 /* Take log of the whole vector. */
72 math::MathUtils::VecLogarithmF32(melEnergies, vecLogEnergies);
73
74 /* Scale the log values and get the max. */
75 for (auto iterM = melEnergies.begin(), iterL = vecLogEnergies.begin();
76 iterM != melEnergies.end(); ++iterM, ++iterL) {
77
78 *iterM = *iterL * multiplier;
79
80 /* Save the max mel energy. */
81 if (*iterM > maxMelEnergy) {
82 maxMelEnergy = *iterM;
83 }
84 }
85
86 /* Clamp the mel energies. */
87 constexpr float maxDb = 80.0;
88 const float clampLevelLowdB = maxMelEnergy - maxDb;
89 for (auto iter = melEnergies.begin(); iter != melEnergies.end(); ++iter) {
90 *iter = std::max(*iter, clampLevelLowdB);
91 }
92 }
93
94 std::vector<float> Wav2LetterMFCC::CreateDCTMatrix(
95 const int32_t inputLength,
96 const int32_t coefficientCount)
97 {
98 std::vector<float> dctMatix(inputLength * coefficientCount);
99
100 /* Orthonormal normalization. */
101 const float normalizerK0 = 2 * math::MathUtils::SqrtF32(1.0f /
102 static_cast<float>(4*inputLength));
103 const float normalizer = 2 * math::MathUtils::SqrtF32(1.0f /
104 static_cast<float>(2*inputLength));
105
106 const float angleIncr = M_PI/inputLength;
107 float angle = angleIncr; /* We start using it at k = 1 loop. */
108
109 /* First row of DCT will use normalizer K0 */
110 for (int32_t n = 0; n < inputLength; ++n) {
111 dctMatix[n] = normalizerK0 /* cos(0) = 1 */;
112 }
113
114 /* Second row (index = 1) onwards, we use standard normalizer. */
115 for (int32_t k = 1, m = inputLength; k < coefficientCount; ++k, m += inputLength) {
116 for (int32_t n = 0; n < inputLength; ++n) {
117 dctMatix[m+n] = normalizer *
118 math::MathUtils::CosineF32((n + 0.5f) * angle);
119 }
120 angle += angleIncr;
121 }
122 return dctMatix;
123 }
124
125 float Wav2LetterMFCC::GetMelFilterBankNormaliser(
126 const float& leftMel,
127 const float& rightMel,
128 const bool useHTKMethod)
129 {
130 /* Slaney normalization for mel weights. */
131 return (2.0f / (MFCC::InverseMelScale(rightMel, useHTKMethod) -
132 MFCC::InverseMelScale(leftMel, useHTKMethod)));
133 }
134
135} /* namespace audio */
136} /* namespace app */
137} /* namespace arm */