blob: cbf0e4e1f0835b95f20767ed6132627fb6b86c58 [file] [log] [blame]
Richard Burton00553462021-11-10 16:27:14 +00001/*
Richard Burton4e002792022-05-04 09:45:02 +01002 * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
Richard Burton00553462021-11-10 16:27:14 +00003 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
Richard Burton4e002792022-05-04 09:45:02 +010017#ifndef RNNOISE_FEATURE_PROCESSOR_HPP
18#define RNNOISE_FEATURE_PROCESSOR_HPP
19
Richard Burton00553462021-11-10 16:27:14 +000020#include "PlatformMath.hpp"
21#include <cstdint>
22#include <vector>
23#include <array>
24#include <tuple>
25
26namespace arm {
27namespace app {
28namespace rnn {
29
30 using vec1D32F = std::vector<float>;
31 using vec2D32F = std::vector<vec1D32F>;
32 using arrHp = std::array<float, 2>;
33 using math::FftInstance;
34 using math::FftType;
35
36 class FrameFeatures {
37 public:
38 bool m_silence{false}; /* If frame contains silence or not. */
39 vec1D32F m_featuresVec{}; /* Calculated feature vector to feed to model. */
40 vec1D32F m_fftX{}; /* Vector of floats arranged to represent complex numbers. */
41 vec1D32F m_fftP{}; /* Vector of floats arranged to represent complex numbers. */
42 vec1D32F m_Ex{}; /* Spectral band energy for audio x. */
43 vec1D32F m_Ep{}; /* Spectral band energy for pitch p. */
44 vec1D32F m_Exp{}; /* Correlated spectral energy between x and p. */
45 };
46
47 /**
48 * @brief RNNoise pre and post processing class based on the 2018 paper from
49 * Jan-Marc Valin. Recommended reading:
50 * - https://jmvalin.ca/demo/rnnoise/
51 * - https://arxiv.org/abs/1709.08243
52 **/
Richard Burton4e002792022-05-04 09:45:02 +010053 class RNNoiseFeatureProcessor {
Richard Burton00553462021-11-10 16:27:14 +000054 /* Public interface */
55 public:
Richard Burton4e002792022-05-04 09:45:02 +010056 RNNoiseFeatureProcessor();
57 ~RNNoiseFeatureProcessor() = default;
Richard Burton00553462021-11-10 16:27:14 +000058
59 /**
60 * @brief Calculates the features from a given audio buffer ready to be sent to RNNoise model.
61 * @param[in] audioData Pointer to the floating point vector
62 * with audio data (within the numerical
63 * limits of int16_t type).
64 * @param[in] audioLen Number of elements in the audio window.
65 * @param[out] features FrameFeatures object reference.
66 **/
67 void PreprocessFrame(const float* audioData,
68 size_t audioLen,
69 FrameFeatures& features);
70
71 /**
72 * @brief Use the RNNoise model output gain values with pre-processing features
73 * to generate audio with noise suppressed.
74 * @param[in] modelOutput Output gain values from model.
75 * @param[in] features Calculated features from pre-processing step.
76 * @param[out] outFrame Output frame to be populated.
77 **/
78 void PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame);
79
80
81 /* Public constants */
82 public:
83 static constexpr uint32_t FRAME_SIZE_SHIFT{2};
Richard Burton033c9152021-12-07 14:04:44 +000084 static constexpr uint32_t FRAME_SIZE{512};
Richard Burton00553462021-11-10 16:27:14 +000085 static constexpr uint32_t WINDOW_SIZE{2 * FRAME_SIZE};
86 static constexpr uint32_t FREQ_SIZE{FRAME_SIZE + 1};
87
Richard Burton033c9152021-12-07 14:04:44 +000088 static constexpr uint32_t PITCH_MIN_PERIOD{64};
89 static constexpr uint32_t PITCH_MAX_PERIOD{820};
90 static constexpr uint32_t PITCH_FRAME_SIZE{1024};
Richard Burton00553462021-11-10 16:27:14 +000091 static constexpr uint32_t PITCH_BUF_SIZE{PITCH_MAX_PERIOD + PITCH_FRAME_SIZE};
92
93 static constexpr uint32_t NB_BANDS{22};
94 static constexpr uint32_t CEPS_MEM{8};
95 static constexpr uint32_t NB_DELTA_CEPS{6};
96
97 static constexpr uint32_t NB_FEATURES{NB_BANDS + 3*NB_DELTA_CEPS + 2};
98
99 /* Private functions */
100 private:
101
102 /**
103 * @brief Initialises the half window and DCT tables.
104 */
105 void InitTables();
106
107 /**
108 * @brief Applies a bi-quadratic filter over the audio window.
109 * @param[in] bHp Constant coefficient set b (arrHp type).
110 * @param[in] aHp Constant coefficient set a (arrHp type).
111 * @param[in,out] memHpX Coefficients populated by this function.
112 * @param[in,out] audioWindow Floating point vector with audio data.
113 **/
114 void BiQuad(
115 const arrHp& bHp,
116 const arrHp& aHp,
117 arrHp& memHpX,
118 vec1D32F& audioWindow);
119
120 /**
121 * @brief Computes features from the "filtered" audio window.
122 * @param[in] audioWindow Floating point vector with audio data.
123 * @param[out] features FrameFeatures object reference.
124 **/
125 void ComputeFrameFeatures(vec1D32F& audioWindow, FrameFeatures& features);
126
127 /**
128 * @brief Runs analysis on the audio buffer.
129 * @param[in] audioWindow Floating point vector with audio data.
130 * @param[out] fft Floating point FFT vector containing real and
131 * imaginary pairs of elements. NOTE: this vector
132 * does not contain the mirror image (conjugates)
133 * part of the spectrum.
134 * @param[out] energy Computed energy for each band in the Bark scale.
135 * @param[out] analysisMem Buffer sequentially, but partially,
136 * populated with new audio data.
137 **/
138 void FrameAnalysis(
139 const vec1D32F& audioWindow,
140 vec1D32F& fft,
141 vec1D32F& energy,
142 vec1D32F& analysisMem);
143
144 /**
145 * @brief Applies the window function, in-place, over the given
146 * floating point buffer.
147 * @param[in,out] x Buffer the window will be applied to.
148 **/
149 void ApplyWindow(vec1D32F& x);
150
151 /**
152 * @brief Computes the FFT for a given vector.
153 * @param[in] x Vector to compute the FFT from.
154 * @param[out] fft Floating point FFT vector containing real and
155 * imaginary pairs of elements. NOTE: this vector
156 * does not contain the mirror image (conjugates)
157 * part of the spectrum.
158 **/
159 void ForwardTransform(
160 vec1D32F& x,
161 vec1D32F& fft);
162
163 /**
164 * @brief Computes band energy for each of the 22 Bark scale bands.
165 * @param[in] fft_X FFT spectrum (as computed by ForwardTransform).
166 * @param[out] bandE Vector with 22 elements populated with energy for
167 * each band.
168 **/
169 void ComputeBandEnergy(const vec1D32F& fft_X, vec1D32F& bandE);
170
171 /**
172 * @brief Computes band energy correlation.
173 * @param[in] X FFT vector X.
174 * @param[in] P FFT vector P.
175 * @param[out] bandC Vector with 22 elements populated with band energy
176 * correlation for the two input FFT vectors.
177 **/
178 void ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC);
179
180 /**
181 * @brief Performs pitch auto-correlation for a given vector for
182 * given lag.
183 * @param[in] x Input vector.
184 * @param[out] ac Auto-correlation output vector.
185 * @param[in] lag Lag value.
186 * @param[in] n Number of elements to consider for correlation
187 * computation.
188 **/
189 void AutoCorr(const vec1D32F &x,
190 vec1D32F &ac,
191 size_t lag,
192 size_t n);
193
194 /**
195 * @brief Computes pitch cross-correlation.
196 * @param[in] x Input vector 1.
197 * @param[in] y Input vector 2.
George Gekova2b0fc22021-11-08 16:30:43 +0000198 * @param[out] xCorr Cross-correlation output vector.
Richard Burton00553462021-11-10 16:27:14 +0000199 * @param[in] len Number of elements to consider for correlation.
200 * computation.
201 * @param[in] maxPitch Maximum pitch.
202 **/
203 void PitchXCorr(
204 const vec1D32F& x,
205 const vec1D32F& y,
George Gekova2b0fc22021-11-08 16:30:43 +0000206 vec1D32F& xCorr,
Richard Burton00553462021-11-10 16:27:14 +0000207 size_t len,
208 size_t maxPitch);
209
210 /**
211 * @brief Computes "Linear Predictor Coefficients".
212 * @param[in] ac Correlation vector.
213 * @param[in] p Number of elements of input vector to consider.
214 * @param[out] lpc Output coefficients vector.
215 **/
216 void LPC(const vec1D32F& ac, int32_t p, vec1D32F& lpc);
217
218 /**
219 * @brief Custom FIR implementation.
220 * @param[in] num FIR coefficient vector.
221 * @param[in] N Number of elements.
222 * @param[out] x Vector to be be processed.
223 **/
224 void Fir5(const vec1D32F& num, uint32_t N, vec1D32F& x);
225
226 /**
227 * @brief Down-sample the pitch buffer.
228 * @param[in,out] pitchBuf Pitch buffer.
229 * @param[in] pitchBufSz Buffer size.
230 **/
231 void PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz);
232
233 /**
234 * @brief Pitch search function.
235 * @param[in] xLP Shifted pitch buffer input.
236 * @param[in] y Pitch buffer input.
237 * @param[in] len Length to search for.
238 * @param[in] maxPitch Maximum pitch.
239 * @return pitch index.
240 **/
241 int PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch);
242
243 /**
244 * @brief Finds the "best" pitch from the buffer.
245 * @param[in] xCorr Pitch correlation vector.
246 * @param[in] y Pitch buffer input.
247 * @param[in] len Length to search for.
248 * @param[in] maxPitch Maximum pitch.
249 * @return pitch array (2 elements).
250 **/
251 arrHp FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch);
252
253 /**
254 * @brief Remove pitch period doubling errors.
255 * @param[in,out] pitchBuf Pitch buffer vector.
256 * @param[in] maxPeriod Maximum period.
257 * @param[in] minPeriod Minimum period.
258 * @param[in] frameSize Frame size.
259 * @param[in] pitchIdx0_ Pitch index 0.
260 * @return pitch index.
261 **/
262 int RemoveDoubling(
263 vec1D32F& pitchBuf,
264 uint32_t maxPeriod,
265 uint32_t minPeriod,
266 uint32_t frameSize,
267 size_t pitchIdx0_);
268
269 /**
270 * @brief Computes pitch gain.
271 * @param[in] xy Single xy cross correlation value.
272 * @param[in] xx Single xx auto correlation value.
273 * @param[in] yy Single yy auto correlation value.
274 * @return Calculated pitch gain.
275 **/
276 float ComputePitchGain(float xy, float xx, float yy);
277
278 /**
279 * @brief Computes DCT vector from the given input.
280 * @param[in] input Input vector.
281 * @param[out] output Output vector with DCT coefficients.
282 **/
283 void DCT(vec1D32F& input, vec1D32F& output);
284
285 /**
286 * @brief Perform inverse fourier transform on complex spectral vector.
287 * @param[out] out Output vector.
288 * @param[in] fftXIn Vector of floats arranged to represent complex numbers interleaved.
289 **/
290 void InverseTransform(vec1D32F& out, vec1D32F& fftXIn);
291
292 /**
293 * @brief Perform pitch filtering.
294 * @param[in] features Object with pre-processing calculated frame features.
295 * @param[in] g Gain values.
296 **/
297 void PitchFilter(FrameFeatures& features, vec1D32F& g);
298
299 /**
300 * @brief Interpolate the band gain values.
301 * @param[out] g Gain values.
302 * @param[in] bandE Vector with 22 elements populated with energy for
303 * each band.
304 **/
305 void InterpBandGain(vec1D32F& g, vec1D32F& bandE);
306
307 /**
308 * @brief Create de-noised frame.
309 * @param[out] outFrame Output vector for storing the created audio frame.
310 * @param[in] fftY Gain adjusted complex spectral vector.
311 */
312 void FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY);
313
314 /* Private objects */
315 private:
316 FftInstance m_fftInstReal; /* FFT instance for real numbers */
317 FftInstance m_fftInstCmplx; /* FFT instance for complex numbers */
318 vec1D32F m_halfWindow; /* Window coefficients */
319 vec1D32F m_dctTable; /* DCT table */
320 vec1D32F m_analysisMem; /* Buffer used for frame analysis */
321 vec2D32F m_cepstralMem; /* Cepstral coefficients */
322 size_t m_memId; /* memory ID */
323 vec1D32F m_synthesisMem; /* Synthesis mem (used by post-processing) */
324 vec1D32F m_pitchBuf; /* Pitch buffer */
325 float m_lastGain; /* Last gain calculated */
326 int m_lastPeriod; /* Last period calculated */
327 arrHp m_memHpX; /* HpX coefficients. */
328 vec1D32F m_lastGVec; /* Last gain vector (used by post-processing) */
329
330 /* Constants */
331 const std::array <uint32_t, NB_BANDS> m_eband5ms {
332 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12,
333 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100};
Richard Burton00553462021-11-10 16:27:14 +0000334 };
335
336
337} /* namespace rnn */
Richard Burton4e002792022-05-04 09:45:02 +0100338} /* namespace app */
Richard Burton00553462021-11-10 16:27:14 +0000339} /* namespace arm */
Richard Burton4e002792022-05-04 09:45:02 +0100340
341#endif /* RNNOISE_FEATURE_PROCESSOR_HPP */