blob: 0e19c68a8d315716b584f979e6c074d9936cd672 [file] [log] [blame]
Richard Burton00553462021-11-10 16:27:14 +00001/*
2 * Copyright (c) 2021 Arm Limited. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "PlatformMath.hpp"
18#include <cstdint>
19#include <vector>
20#include <array>
21#include <tuple>
22
23namespace arm {
24namespace app {
25namespace rnn {
26
27 using vec1D32F = std::vector<float>;
28 using vec2D32F = std::vector<vec1D32F>;
29 using arrHp = std::array<float, 2>;
30 using math::FftInstance;
31 using math::FftType;
32
33 class FrameFeatures {
34 public:
35 bool m_silence{false}; /* If frame contains silence or not. */
36 vec1D32F m_featuresVec{}; /* Calculated feature vector to feed to model. */
37 vec1D32F m_fftX{}; /* Vector of floats arranged to represent complex numbers. */
38 vec1D32F m_fftP{}; /* Vector of floats arranged to represent complex numbers. */
39 vec1D32F m_Ex{}; /* Spectral band energy for audio x. */
40 vec1D32F m_Ep{}; /* Spectral band energy for pitch p. */
41 vec1D32F m_Exp{}; /* Correlated spectral energy between x and p. */
42 };
43
44 /**
45 * @brief RNNoise pre and post processing class based on the 2018 paper from
46 * Jan-Marc Valin. Recommended reading:
47 * - https://jmvalin.ca/demo/rnnoise/
48 * - https://arxiv.org/abs/1709.08243
49 **/
50 class RNNoiseProcess {
51 /* Public interface */
52 public:
53 RNNoiseProcess();
54 ~RNNoiseProcess() = default;
55
56 /**
57 * @brief Calculates the features from a given audio buffer ready to be sent to RNNoise model.
58 * @param[in] audioData Pointer to the floating point vector
59 * with audio data (within the numerical
60 * limits of int16_t type).
61 * @param[in] audioLen Number of elements in the audio window.
62 * @param[out] features FrameFeatures object reference.
63 **/
64 void PreprocessFrame(const float* audioData,
65 size_t audioLen,
66 FrameFeatures& features);
67
68 /**
69 * @brief Use the RNNoise model output gain values with pre-processing features
70 * to generate audio with noise suppressed.
71 * @param[in] modelOutput Output gain values from model.
72 * @param[in] features Calculated features from pre-processing step.
73 * @param[out] outFrame Output frame to be populated.
74 **/
75 void PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame);
76
77
78 /* Public constants */
79 public:
80 static constexpr uint32_t FRAME_SIZE_SHIFT{2};
81 static constexpr uint32_t FRAME_SIZE{480};
82 static constexpr uint32_t WINDOW_SIZE{2 * FRAME_SIZE};
83 static constexpr uint32_t FREQ_SIZE{FRAME_SIZE + 1};
84
85 static constexpr uint32_t PITCH_MIN_PERIOD{60};
86 static constexpr uint32_t PITCH_MAX_PERIOD{768};
87 static constexpr uint32_t PITCH_FRAME_SIZE{960};
88 static constexpr uint32_t PITCH_BUF_SIZE{PITCH_MAX_PERIOD + PITCH_FRAME_SIZE};
89
90 static constexpr uint32_t NB_BANDS{22};
91 static constexpr uint32_t CEPS_MEM{8};
92 static constexpr uint32_t NB_DELTA_CEPS{6};
93
94 static constexpr uint32_t NB_FEATURES{NB_BANDS + 3*NB_DELTA_CEPS + 2};
95
96 /* Private functions */
97 private:
98
99 /**
100 * @brief Initialises the half window and DCT tables.
101 */
102 void InitTables();
103
104 /**
105 * @brief Applies a bi-quadratic filter over the audio window.
106 * @param[in] bHp Constant coefficient set b (arrHp type).
107 * @param[in] aHp Constant coefficient set a (arrHp type).
108 * @param[in,out] memHpX Coefficients populated by this function.
109 * @param[in,out] audioWindow Floating point vector with audio data.
110 **/
111 void BiQuad(
112 const arrHp& bHp,
113 const arrHp& aHp,
114 arrHp& memHpX,
115 vec1D32F& audioWindow);
116
117 /**
118 * @brief Computes features from the "filtered" audio window.
119 * @param[in] audioWindow Floating point vector with audio data.
120 * @param[out] features FrameFeatures object reference.
121 **/
122 void ComputeFrameFeatures(vec1D32F& audioWindow, FrameFeatures& features);
123
124 /**
125 * @brief Runs analysis on the audio buffer.
126 * @param[in] audioWindow Floating point vector with audio data.
127 * @param[out] fft Floating point FFT vector containing real and
128 * imaginary pairs of elements. NOTE: this vector
129 * does not contain the mirror image (conjugates)
130 * part of the spectrum.
131 * @param[out] energy Computed energy for each band in the Bark scale.
132 * @param[out] analysisMem Buffer sequentially, but partially,
133 * populated with new audio data.
134 **/
135 void FrameAnalysis(
136 const vec1D32F& audioWindow,
137 vec1D32F& fft,
138 vec1D32F& energy,
139 vec1D32F& analysisMem);
140
141 /**
142 * @brief Applies the window function, in-place, over the given
143 * floating point buffer.
144 * @param[in,out] x Buffer the window will be applied to.
145 **/
146 void ApplyWindow(vec1D32F& x);
147
148 /**
149 * @brief Computes the FFT for a given vector.
150 * @param[in] x Vector to compute the FFT from.
151 * @param[out] fft Floating point FFT vector containing real and
152 * imaginary pairs of elements. NOTE: this vector
153 * does not contain the mirror image (conjugates)
154 * part of the spectrum.
155 **/
156 void ForwardTransform(
157 vec1D32F& x,
158 vec1D32F& fft);
159
160 /**
161 * @brief Computes band energy for each of the 22 Bark scale bands.
162 * @param[in] fft_X FFT spectrum (as computed by ForwardTransform).
163 * @param[out] bandE Vector with 22 elements populated with energy for
164 * each band.
165 **/
166 void ComputeBandEnergy(const vec1D32F& fft_X, vec1D32F& bandE);
167
168 /**
169 * @brief Computes band energy correlation.
170 * @param[in] X FFT vector X.
171 * @param[in] P FFT vector P.
172 * @param[out] bandC Vector with 22 elements populated with band energy
173 * correlation for the two input FFT vectors.
174 **/
175 void ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC);
176
177 /**
178 * @brief Performs pitch auto-correlation for a given vector for
179 * given lag.
180 * @param[in] x Input vector.
181 * @param[out] ac Auto-correlation output vector.
182 * @param[in] lag Lag value.
183 * @param[in] n Number of elements to consider for correlation
184 * computation.
185 **/
186 void AutoCorr(const vec1D32F &x,
187 vec1D32F &ac,
188 size_t lag,
189 size_t n);
190
191 /**
192 * @brief Computes pitch cross-correlation.
193 * @param[in] x Input vector 1.
194 * @param[in] y Input vector 2.
George Gekova2b0fc22021-11-08 16:30:43 +0000195 * @param[out] xCorr Cross-correlation output vector.
Richard Burton00553462021-11-10 16:27:14 +0000196 * @param[in] len Number of elements to consider for correlation.
197 * computation.
198 * @param[in] maxPitch Maximum pitch.
199 **/
200 void PitchXCorr(
201 const vec1D32F& x,
202 const vec1D32F& y,
George Gekova2b0fc22021-11-08 16:30:43 +0000203 vec1D32F& xCorr,
Richard Burton00553462021-11-10 16:27:14 +0000204 size_t len,
205 size_t maxPitch);
206
207 /**
208 * @brief Computes "Linear Predictor Coefficients".
209 * @param[in] ac Correlation vector.
210 * @param[in] p Number of elements of input vector to consider.
211 * @param[out] lpc Output coefficients vector.
212 **/
213 void LPC(const vec1D32F& ac, int32_t p, vec1D32F& lpc);
214
215 /**
216 * @brief Custom FIR implementation.
217 * @param[in] num FIR coefficient vector.
218 * @param[in] N Number of elements.
219 * @param[out] x Vector to be be processed.
220 **/
221 void Fir5(const vec1D32F& num, uint32_t N, vec1D32F& x);
222
223 /**
224 * @brief Down-sample the pitch buffer.
225 * @param[in,out] pitchBuf Pitch buffer.
226 * @param[in] pitchBufSz Buffer size.
227 **/
228 void PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz);
229
230 /**
231 * @brief Pitch search function.
232 * @param[in] xLP Shifted pitch buffer input.
233 * @param[in] y Pitch buffer input.
234 * @param[in] len Length to search for.
235 * @param[in] maxPitch Maximum pitch.
236 * @return pitch index.
237 **/
238 int PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch);
239
240 /**
241 * @brief Finds the "best" pitch from the buffer.
242 * @param[in] xCorr Pitch correlation vector.
243 * @param[in] y Pitch buffer input.
244 * @param[in] len Length to search for.
245 * @param[in] maxPitch Maximum pitch.
246 * @return pitch array (2 elements).
247 **/
248 arrHp FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch);
249
250 /**
251 * @brief Remove pitch period doubling errors.
252 * @param[in,out] pitchBuf Pitch buffer vector.
253 * @param[in] maxPeriod Maximum period.
254 * @param[in] minPeriod Minimum period.
255 * @param[in] frameSize Frame size.
256 * @param[in] pitchIdx0_ Pitch index 0.
257 * @return pitch index.
258 **/
259 int RemoveDoubling(
260 vec1D32F& pitchBuf,
261 uint32_t maxPeriod,
262 uint32_t minPeriod,
263 uint32_t frameSize,
264 size_t pitchIdx0_);
265
266 /**
267 * @brief Computes pitch gain.
268 * @param[in] xy Single xy cross correlation value.
269 * @param[in] xx Single xx auto correlation value.
270 * @param[in] yy Single yy auto correlation value.
271 * @return Calculated pitch gain.
272 **/
273 float ComputePitchGain(float xy, float xx, float yy);
274
275 /**
276 * @brief Computes DCT vector from the given input.
277 * @param[in] input Input vector.
278 * @param[out] output Output vector with DCT coefficients.
279 **/
280 void DCT(vec1D32F& input, vec1D32F& output);
281
282 /**
283 * @brief Perform inverse fourier transform on complex spectral vector.
284 * @param[out] out Output vector.
285 * @param[in] fftXIn Vector of floats arranged to represent complex numbers interleaved.
286 **/
287 void InverseTransform(vec1D32F& out, vec1D32F& fftXIn);
288
289 /**
290 * @brief Perform pitch filtering.
291 * @param[in] features Object with pre-processing calculated frame features.
292 * @param[in] g Gain values.
293 **/
294 void PitchFilter(FrameFeatures& features, vec1D32F& g);
295
296 /**
297 * @brief Interpolate the band gain values.
298 * @param[out] g Gain values.
299 * @param[in] bandE Vector with 22 elements populated with energy for
300 * each band.
301 **/
302 void InterpBandGain(vec1D32F& g, vec1D32F& bandE);
303
304 /**
305 * @brief Create de-noised frame.
306 * @param[out] outFrame Output vector for storing the created audio frame.
307 * @param[in] fftY Gain adjusted complex spectral vector.
308 */
309 void FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY);
310
311 /* Private objects */
312 private:
313 FftInstance m_fftInstReal; /* FFT instance for real numbers */
314 FftInstance m_fftInstCmplx; /* FFT instance for complex numbers */
315 vec1D32F m_halfWindow; /* Window coefficients */
316 vec1D32F m_dctTable; /* DCT table */
317 vec1D32F m_analysisMem; /* Buffer used for frame analysis */
318 vec2D32F m_cepstralMem; /* Cepstral coefficients */
319 size_t m_memId; /* memory ID */
320 vec1D32F m_synthesisMem; /* Synthesis mem (used by post-processing) */
321 vec1D32F m_pitchBuf; /* Pitch buffer */
322 float m_lastGain; /* Last gain calculated */
323 int m_lastPeriod; /* Last period calculated */
324 arrHp m_memHpX; /* HpX coefficients. */
325 vec1D32F m_lastGVec; /* Last gain vector (used by post-processing) */
326
327 /* Constants */
328 const std::array <uint32_t, NB_BANDS> m_eband5ms {
329 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12,
330 14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100};
331
332 };
333
334
335} /* namespace rnn */
336} /* namspace app */
337} /* namespace arm */