blob: 9abf6f10bc139f9e9b8fb88e5e659c8b46a542f1 [file] [log] [blame]
Richard Burton4e002792022-05-04 09:45:02 +01001/*
2 * Copyright (c) 2022 Arm Limited. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#ifndef AD_PROCESSING_HPP
18#define AD_PROCESSING_HPP
19
20#include "BaseProcessing.hpp"
21#include "AudioUtils.hpp"
22#include "AdMelSpectrogram.hpp"
23#include "log_macros.h"
24
25namespace arm {
26namespace app {
27
28 /**
29 * @brief Pre-processing class for anomaly detection use case.
30 * Implements methods declared by BasePreProcess and anything else needed
31 * to populate input tensors ready for inference.
32 */
33 class AdPreProcess : public BasePreProcess {
34
35 public:
36 /**
37 * @brief Constructor for AdPreProcess class objects
38 * @param[in] inputTensor input tensor pointer from the tensor arena.
39 * @param[in] melSpectrogramFrameLen MEL spectrogram's frame length
40 * @param[in] melSpectrogramFrameStride MEL spectrogram's frame stride
41 * @param[in] adModelTrainingMean Training mean for the Anomaly detection model being used.
42 */
43 explicit AdPreProcess(TfLiteTensor* inputTensor,
44 uint32_t melSpectrogramFrameLen,
45 uint32_t melSpectrogramFrameStride,
46 float adModelTrainingMean);
47
48 ~AdPreProcess() = default;
49
50 /**
51 * @brief Function to invoke pre-processing and populate the input vector
52 * @param input pointer to input data. For anomaly detection, this is the pointer to
53 * the audio data.
54 * @param inputSize Size of the data being passed in for pre-processing.
55 * @return True if successful, false otherwise.
56 */
57 bool DoPreProcess(const void* input, size_t inputSize) override;
58
59 /**
60 * @brief Getter function for audio window size computed when constructing
61 * the class object.
62 * @return Audio window size as 32 bit unsigned integer.
63 */
64 uint32_t GetAudioWindowSize();
65
66 /**
67 * @brief Getter function for audio window stride computed when constructing
68 * the class object.
69 * @return Audio window stride as 32 bit unsigned integer.
70 */
71 uint32_t GetAudioDataStride();
72
73 /**
74 * @brief Setter function for current audio index. This is only used for evaluating
75 * if previously computed features can be re-used from cache.
76 */
77 void SetAudioWindowIndex(uint32_t idx);
78
79 private:
80 bool m_validInstance{false}; /**< Indicates the current object is valid. */
81 uint32_t m_melSpectrogramFrameLen{}; /**< MEL spectrogram's window frame length */
82 uint32_t m_melSpectrogramFrameStride{}; /**< MEL spectrogram's window frame stride */
83 uint8_t m_inputResizeScale{}; /**< Downscaling factor for the MEL energy matrix. */
84 uint32_t m_numMelSpecVectorsInAudioStride{}; /**< Number of frames to move across the audio. */
85 uint32_t m_audioDataWindowSize{}; /**< Audio window size computed based on other parameters. */
86 uint32_t m_audioDataStride{}; /**< Audio window stride computed. */
87 uint32_t m_numReusedFeatureVectors{}; /**< Number of MEL vectors that can be re-used */
88 uint32_t m_audioWindowIndex{}; /**< Current audio window index (from audio's sliding window) */
89
90 audio::SlidingWindow<const int16_t> m_melWindowSlider; /**< Internal MEL spectrogram window slider */
91 audio::AdMelSpectrogram m_melSpec; /**< MEL spectrogram computation object */
92 std::function<void
93 (std::vector<int16_t>&, int, bool, size_t, size_t)> m_featureCalc; /**< Feature calculator object */
94 };
95
96 class AdPostProcess : public BasePostProcess {
97 public:
98 /**
99 * @brief Constructor for AdPostProcess object.
100 * @param[in] outputTensor Output tensor pointer.
101 */
102 explicit AdPostProcess(TfLiteTensor* outputTensor);
103
104 ~AdPostProcess() = default;
105
106 /**
107 * @brief Function to do the post-processing on the output tensor.
108 * @return True if successful, false otherwise.
109 */
110 bool DoPostProcess() override;
111
112 /**
113 * @brief Getter function for an element from the de-quantised output vector.
114 * @param index Index of the element to be retrieved.
115 * @return index represented as a 32 bit floating point number.
116 */
117 float GetOutputValue(uint32_t index);
118
119 private:
120 TfLiteTensor* m_outputTensor{}; /**< Output tensor pointer */
121 std::vector<float> m_dequantizedOutputVec{}; /**< Internal output vector */
122
123 /**
124 * @brief De-quantizes and flattens the output tensor into a vector.
125 * @tparam T template parameter to indicate data type.
126 * @return True if successful, false otherwise.
127 */
128 template<typename T>
129 bool Dequantize()
130 {
131 TfLiteTensor* tensor = this->m_outputTensor;
132 if (tensor == nullptr) {
133 printf_err("Invalid output tensor.\n");
134 return false;
135 }
136 T* tensorData = tflite::GetTensorData<T>(tensor);
137
138 uint32_t totalOutputSize = 1;
139 for (int inputDim = 0; inputDim < tensor->dims->size; inputDim++){
140 totalOutputSize *= tensor->dims->data[inputDim];
141 }
142
143 /* For getting the floating point values, we need quantization parameters */
144 QuantParams quantParams = GetTensorQuantParams(tensor);
145
146 this->m_dequantizedOutputVec = std::vector<float>(totalOutputSize, 0);
147
148 for (size_t i = 0; i < totalOutputSize; ++i) {
149 this->m_dequantizedOutputVec[i] = quantParams.scale * (tensorData[i] - quantParams.offset);
150 }
151
152 return true;
153 }
154 };
155
156 /* Templated instances available: */
157 template bool AdPostProcess::Dequantize<int8_t>();
158
159 /**
160 * @brief Generic feature calculator factory.
161 *
162 * Returns lambda function to compute features using features cache.
163 * Real features math is done by a lambda function provided as a parameter.
164 * Features are written to input tensor memory.
165 *
166 * @tparam T feature vector type.
167 * @param inputTensor model input tensor pointer.
168 * @param cacheSize number of feature vectors to cache. Defined by the sliding window overlap.
169 * @param compute features calculator function.
170 * @return lambda function to compute features.
171 */
172 template<class T>
173 std::function<void (std::vector<int16_t>&, size_t, bool, size_t, size_t)>
174 FeatureCalc(TfLiteTensor* inputTensor, size_t cacheSize,
175 std::function<std::vector<T> (std::vector<int16_t>& )> compute)
176 {
177 /* Feature cache to be captured by lambda function*/
178 static std::vector<std::vector<T>> featureCache = std::vector<std::vector<T>>(cacheSize);
179
180 return [=](std::vector<int16_t>& audioDataWindow,
181 size_t index,
182 bool useCache,
183 size_t featuresOverlapIndex,
184 size_t resizeScale)
185 {
186 T* tensorData = tflite::GetTensorData<T>(inputTensor);
187 std::vector<T> features;
188
189 /* Reuse features from cache if cache is ready and sliding windows overlap.
190 * Overlap is in the beginning of sliding window with a size of a feature cache. */
191 if (useCache && index < featureCache.size()) {
192 features = std::move(featureCache[index]);
193 } else {
194 features = std::move(compute(audioDataWindow));
195 }
196 auto size = features.size() / resizeScale;
197 auto sizeBytes = sizeof(T);
198
199 /* Input should be transposed and "resized" by skipping elements. */
200 for (size_t outIndex = 0; outIndex < size; outIndex++) {
201 std::memcpy(tensorData + (outIndex*size) + index, &features[outIndex*resizeScale], sizeBytes);
202 }
203
204 /* Start renewing cache as soon iteration goes out of the windows overlap. */
205 if (index >= featuresOverlapIndex / resizeScale) {
206 featureCache[index - featuresOverlapIndex / resizeScale] = std::move(features);
207 }
208 };
209 }
210
211 template std::function<void (std::vector<int16_t>&, size_t , bool, size_t, size_t)>
212 FeatureCalc<int8_t>(TfLiteTensor* inputTensor,
213 size_t cacheSize,
214 std::function<std::vector<int8_t> (std::vector<int16_t>&)> compute);
215
216 template std::function<void(std::vector<int16_t>&, size_t, bool, size_t, size_t)>
217 FeatureCalc<float>(TfLiteTensor *inputTensor,
218 size_t cacheSize,
219 std::function<std::vector<float>(std::vector<int16_t>&)> compute);
220
221 std::function<void (std::vector<int16_t>&, int, bool, size_t, size_t)>
222 GetFeatureCalculator(audio::AdMelSpectrogram& melSpec,
223 TfLiteTensor* inputTensor,
224 size_t cacheSize,
225 float trainingMean);
226
227} /* namespace app */
228} /* namespace arm */
229
230#endif /* AD_PROCESSING_HPP */