alexander | 3c79893 | 2021-03-26 21:42:19 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (c) 2021 Arm Limited. All rights reserved. |
| 3 | * SPDX-License-Identifier: Apache-2.0 |
| 4 | * |
| 5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | * you may not use this file except in compliance with the License. |
| 7 | * You may obtain a copy of the License at |
| 8 | * |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | * |
| 11 | * Unless required by applicable law or agreed to in writing, software |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | * See the License for the specific language governing permissions and |
| 15 | * limitations under the License. |
| 16 | */ |
| 17 | #ifndef ASR_WAV2LETTER_MFCC_HPP |
| 18 | #define ASR_WAV2LETTER_MFCC_HPP |
| 19 | |
| 20 | #include "Mfcc.hpp" |
| 21 | |
| 22 | namespace arm { |
| 23 | namespace app { |
| 24 | namespace audio { |
| 25 | |
| 26 | /* Class to provide Wav2Letter specific MFCC calculation requirements. */ |
| 27 | class Wav2LetterMFCC : public MFCC { |
| 28 | |
| 29 | public: |
| 30 | static constexpr uint32_t ms_defaultSamplingFreq = 16000; |
| 31 | static constexpr uint32_t ms_defaultNumFbankBins = 128; |
| 32 | static constexpr uint32_t ms_defaultMelLoFreq = 0; |
| 33 | static constexpr uint32_t ms_defaultMelHiFreq = 8000; |
| 34 | static constexpr bool ms_defaultUseHtkMethod = false; |
| 35 | |
| 36 | explicit Wav2LetterMFCC(const size_t numFeats, const size_t frameLen) |
| 37 | : MFCC(MfccParams( |
| 38 | ms_defaultSamplingFreq, ms_defaultNumFbankBins, |
| 39 | ms_defaultMelLoFreq, ms_defaultMelHiFreq, |
| 40 | numFeats, frameLen, ms_defaultUseHtkMethod)) |
| 41 | {} |
| 42 | |
| 43 | Wav2LetterMFCC() = delete; |
| 44 | ~Wav2LetterMFCC() = default; |
| 45 | |
| 46 | protected: |
| 47 | |
| 48 | /** |
| 49 | * @brief Overrides base class implementation of this function. |
| 50 | * @param[in] fftVec Vector populated with FFT magnitudes |
| 51 | * @param[in] melFilterBank 2D Vector with filter bank weights |
| 52 | * @param[in] filterBankFilterFirst Vector containing the first indices of filter bank |
| 53 | * to be used for each bin. |
| 54 | * @param[in] filterBankFilterLast Vector containing the last indices of filter bank |
| 55 | * to be used for each bin. |
| 56 | * @param[out] melEnergies Pre-allocated vector of MEL energies to be |
| 57 | * populated. |
| 58 | * @return true if successful, false otherwise |
| 59 | */ |
| 60 | bool ApplyMelFilterBank( |
| 61 | std::vector<float>& fftVec, |
| 62 | std::vector<std::vector<float>>& melFilterBank, |
| 63 | std::vector<int32_t>& filterBankFilterFirst, |
| 64 | std::vector<int32_t>& filterBankFilterLast, |
| 65 | std::vector<float>& melEnergies) override; |
| 66 | |
| 67 | /** |
| 68 | * @brief Override for the base class implementation convert mel |
| 69 | * energies to logarithmic scale. The difference from |
| 70 | * default behaviour is that the power is converted to dB |
| 71 | * and subsequently clamped. |
| 72 | * @param[in,out] melEnergies 1D vector of Mel energies |
| 73 | **/ |
| 74 | void ConvertToLogarithmicScale(std::vector<float>& melEnergies) override; |
| 75 | |
| 76 | /** |
| 77 | * @brief Create a matrix used to calculate Discrete Cosine |
| 78 | * Transform. Override for the base class' default |
| 79 | * implementation as the first and last elements |
| 80 | * use a different normaliser. |
| 81 | * @param[in] inputLength input length of the buffer on which |
| 82 | * DCT will be performed |
| 83 | * @param[in] coefficientCount Total coefficients per input length. |
| 84 | * @return 1D vector with inputLength x coefficientCount elements |
| 85 | * populated with DCT coefficients. |
| 86 | */ |
| 87 | std::vector<float> CreateDCTMatrix(int32_t inputLength, |
| 88 | int32_t coefficientCount) override; |
| 89 | |
| 90 | /** |
| 91 | * @brief Given the low and high Mel values, get the normaliser |
| 92 | * for weights to be applied when populating the filter |
| 93 | * bank. Override for the base class implementation. |
| 94 | * @param[in] leftMel Low Mel frequency value. |
| 95 | * @param[in] rightMel High Mel frequency value. |
| 96 | * @param[in] useHTKMethod bool to signal if HTK method is to be |
| 97 | * used for calculation. |
| 98 | * @return Value to use for normalising. |
| 99 | */ |
| 100 | float GetMelFilterBankNormaliser(const float& leftMel, |
| 101 | const float& rightMel, |
| 102 | bool useHTKMethod) override; |
| 103 | }; |
| 104 | |
| 105 | } /* namespace audio */ |
| 106 | } /* namespace app */ |
| 107 | } /* namespace arm */ |
| 108 | |
| 109 | #endif /* ASR_WAV2LETTER_MFCC_HPP */ |