Blame - source/use_case/noise_reduction/include/RNNoiseProcess.hpp - ml/ethos-u/ml-embedded-evaluation-kit

blob: 0e19c68a8d315716b584f979e6c074d9936cd672 [file] [log] [blame]

Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2021 Arm Limited. All rights reserved.
				3	* SPDX-License-Identifier: Apache-2.0
				4	*
				5	* Licensed under the Apache License, Version 2.0 (the "License");
				6	* you may not use this file except in compliance with the License.
				7	* You may obtain a copy of the License at
				8	*
				9	* http://www.apache.org/licenses/LICENSE-2.0
				10	*
				11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*/
				17	#include "PlatformMath.hpp"
				18	#include <cstdint>
				19	#include <vector>
				20	#include <array>
				21	#include <tuple>
				22
				23	namespace arm {
				24	namespace app {
				25	namespace rnn {
				26
				27	using vec1D32F = std::vector<float>;
				28	using vec2D32F = std::vector<vec1D32F>;
				29	using arrHp = std::array<float, 2>;
				30	using math::FftInstance;
				31	using math::FftType;
				32
				33	class FrameFeatures {
				34	public:
				35	bool m_silence{false}; /* If frame contains silence or not. */
				36	vec1D32F m_featuresVec{}; /* Calculated feature vector to feed to model. */
				37	vec1D32F m_fftX{}; /* Vector of floats arranged to represent complex numbers. */
				38	vec1D32F m_fftP{}; /* Vector of floats arranged to represent complex numbers. */
				39	vec1D32F m_Ex{}; /* Spectral band energy for audio x. */
				40	vec1D32F m_Ep{}; /* Spectral band energy for pitch p. */
				41	vec1D32F m_Exp{}; /* Correlated spectral energy between x and p. */
				42	};
				43
				44	/**
				45	* @brief RNNoise pre and post processing class based on the 2018 paper from
				46	* Jan-Marc Valin. Recommended reading:
				47	* - https://jmvalin.ca/demo/rnnoise/
				48	* - https://arxiv.org/abs/1709.08243
				49	**/
				50	class RNNoiseProcess {
				51	/* Public interface */
				52	public:
				53	RNNoiseProcess();
				54	~RNNoiseProcess() = default;
				55
				56	/**
				57	* @brief Calculates the features from a given audio buffer ready to be sent to RNNoise model.
				58	* @param[in] audioData Pointer to the floating point vector
				59	* with audio data (within the numerical
				60	* limits of int16_t type).
				61	* @param[in] audioLen Number of elements in the audio window.
				62	* @param[out] features FrameFeatures object reference.
				63	**/
				64	void PreprocessFrame(const float* audioData,
				65	size_t audioLen,
				66	FrameFeatures& features);
				67
				68	/**
				69	* @brief Use the RNNoise model output gain values with pre-processing features
				70	* to generate audio with noise suppressed.
				71	* @param[in] modelOutput Output gain values from model.
				72	* @param[in] features Calculated features from pre-processing step.
				73	* @param[out] outFrame Output frame to be populated.
				74	**/
				75	void PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame);
				76
				77
				78	/* Public constants */
				79	public:
				80	static constexpr uint32_t FRAME_SIZE_SHIFT{2};
				81	static constexpr uint32_t FRAME_SIZE{480};
				82	static constexpr uint32_t WINDOW_SIZE{2 * FRAME_SIZE};
				83	static constexpr uint32_t FREQ_SIZE{FRAME_SIZE + 1};
				84
				85	static constexpr uint32_t PITCH_MIN_PERIOD{60};
				86	static constexpr uint32_t PITCH_MAX_PERIOD{768};
				87	static constexpr uint32_t PITCH_FRAME_SIZE{960};
				88	static constexpr uint32_t PITCH_BUF_SIZE{PITCH_MAX_PERIOD + PITCH_FRAME_SIZE};
				89
				90	static constexpr uint32_t NB_BANDS{22};
				91	static constexpr uint32_t CEPS_MEM{8};
				92	static constexpr uint32_t NB_DELTA_CEPS{6};
				93
				94	static constexpr uint32_t NB_FEATURES{NB_BANDS + 3*NB_DELTA_CEPS + 2};
				95
				96	/* Private functions */
				97	private:
				98
				99	/**
				100	* @brief Initialises the half window and DCT tables.
				101	*/
				102	void InitTables();
				103
				104	/**
				105	* @brief Applies a bi-quadratic filter over the audio window.
				106	* @param[in] bHp Constant coefficient set b (arrHp type).
				107	* @param[in] aHp Constant coefficient set a (arrHp type).
				108	* @param[in,out] memHpX Coefficients populated by this function.
				109	* @param[in,out] audioWindow Floating point vector with audio data.
				110	**/
				111	void BiQuad(
				112	const arrHp& bHp,
				113	const arrHp& aHp,
				114	arrHp& memHpX,
				115	vec1D32F& audioWindow);
				116
				117	/**
				118	* @brief Computes features from the "filtered" audio window.
				119	* @param[in] audioWindow Floating point vector with audio data.
				120	* @param[out] features FrameFeatures object reference.
				121	**/
				122	void ComputeFrameFeatures(vec1D32F& audioWindow, FrameFeatures& features);
				123
				124	/**
				125	* @brief Runs analysis on the audio buffer.
				126	* @param[in] audioWindow Floating point vector with audio data.
				127	* @param[out] fft Floating point FFT vector containing real and
				128	* imaginary pairs of elements. NOTE: this vector
				129	* does not contain the mirror image (conjugates)
				130	* part of the spectrum.
				131	* @param[out] energy Computed energy for each band in the Bark scale.
				132	* @param[out] analysisMem Buffer sequentially, but partially,
				133	* populated with new audio data.
				134	**/
				135	void FrameAnalysis(
				136	const vec1D32F& audioWindow,
				137	vec1D32F& fft,
				138	vec1D32F& energy,
				139	vec1D32F& analysisMem);
				140
				141	/**
				142	* @brief Applies the window function, in-place, over the given
				143	* floating point buffer.
				144	* @param[in,out] x Buffer the window will be applied to.
				145	**/
				146	void ApplyWindow(vec1D32F& x);
				147
				148	/**
				149	* @brief Computes the FFT for a given vector.
				150	* @param[in] x Vector to compute the FFT from.
				151	* @param[out] fft Floating point FFT vector containing real and
				152	* imaginary pairs of elements. NOTE: this vector
				153	* does not contain the mirror image (conjugates)
				154	* part of the spectrum.
				155	**/
				156	void ForwardTransform(
				157	vec1D32F& x,
				158	vec1D32F& fft);
				159
				160	/**
				161	* @brief Computes band energy for each of the 22 Bark scale bands.
				162	* @param[in] fft_X FFT spectrum (as computed by ForwardTransform).
				163	* @param[out] bandE Vector with 22 elements populated with energy for
				164	* each band.
				165	**/
				166	void ComputeBandEnergy(const vec1D32F& fft_X, vec1D32F& bandE);
				167
				168	/**
				169	* @brief Computes band energy correlation.
				170	* @param[in] X FFT vector X.
				171	* @param[in] P FFT vector P.
				172	* @param[out] bandC Vector with 22 elements populated with band energy
				173	* correlation for the two input FFT vectors.
				174	**/
				175	void ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC);
				176
				177	/**
				178	* @brief Performs pitch auto-correlation for a given vector for
				179	* given lag.
				180	* @param[in] x Input vector.
				181	* @param[out] ac Auto-correlation output vector.
				182	* @param[in] lag Lag value.
				183	* @param[in] n Number of elements to consider for correlation
				184	* computation.
				185	**/
				186	void AutoCorr(const vec1D32F &x,
				187	vec1D32F &ac,
				188	size_t lag,
				189	size_t n);
				190
				191	/**
				192	* @brief Computes pitch cross-correlation.
				193	* @param[in] x Input vector 1.
				194	* @param[in] y Input vector 2.
George Gekov	a2b0fc2	2021-11-08 16:30:43 +0000	[diff] [blame^]	195	* @param[out] xCorr Cross-correlation output vector.
Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	196	* @param[in] len Number of elements to consider for correlation.
				197	* computation.
				198	* @param[in] maxPitch Maximum pitch.
				199	**/
				200	void PitchXCorr(
				201	const vec1D32F& x,
				202	const vec1D32F& y,
George Gekov	a2b0fc2	2021-11-08 16:30:43 +0000	[diff] [blame^]	203	vec1D32F& xCorr,
Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	204	size_t len,
				205	size_t maxPitch);
				206
				207	/**
				208	* @brief Computes "Linear Predictor Coefficients".
				209	* @param[in] ac Correlation vector.
				210	* @param[in] p Number of elements of input vector to consider.
				211	* @param[out] lpc Output coefficients vector.
				212	**/
				213	void LPC(const vec1D32F& ac, int32_t p, vec1D32F& lpc);
				214
				215	/**
				216	* @brief Custom FIR implementation.
				217	* @param[in] num FIR coefficient vector.
				218	* @param[in] N Number of elements.
				219	* @param[out] x Vector to be be processed.
				220	**/
				221	void Fir5(const vec1D32F& num, uint32_t N, vec1D32F& x);
				222
				223	/**
				224	* @brief Down-sample the pitch buffer.
				225	* @param[in,out] pitchBuf Pitch buffer.
				226	* @param[in] pitchBufSz Buffer size.
				227	**/
				228	void PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz);
				229
				230	/**
				231	* @brief Pitch search function.
				232	* @param[in] xLP Shifted pitch buffer input.
				233	* @param[in] y Pitch buffer input.
				234	* @param[in] len Length to search for.
				235	* @param[in] maxPitch Maximum pitch.
				236	* @return pitch index.
				237	**/
				238	int PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch);
				239
				240	/**
				241	* @brief Finds the "best" pitch from the buffer.
				242	* @param[in] xCorr Pitch correlation vector.
				243	* @param[in] y Pitch buffer input.
				244	* @param[in] len Length to search for.
				245	* @param[in] maxPitch Maximum pitch.
				246	* @return pitch array (2 elements).
				247	**/
				248	arrHp FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch);
				249
				250	/**
				251	* @brief Remove pitch period doubling errors.
				252	* @param[in,out] pitchBuf Pitch buffer vector.
				253	* @param[in] maxPeriod Maximum period.
				254	* @param[in] minPeriod Minimum period.
				255	* @param[in] frameSize Frame size.
				256	* @param[in] pitchIdx0_ Pitch index 0.
				257	* @return pitch index.
				258	**/
				259	int RemoveDoubling(
				260	vec1D32F& pitchBuf,
				261	uint32_t maxPeriod,
				262	uint32_t minPeriod,
				263	uint32_t frameSize,
				264	size_t pitchIdx0_);
				265
				266	/**
				267	* @brief Computes pitch gain.
				268	* @param[in] xy Single xy cross correlation value.
				269	* @param[in] xx Single xx auto correlation value.
				270	* @param[in] yy Single yy auto correlation value.
				271	* @return Calculated pitch gain.
				272	**/
				273	float ComputePitchGain(float xy, float xx, float yy);
				274
				275	/**
				276	* @brief Computes DCT vector from the given input.
				277	* @param[in] input Input vector.
				278	* @param[out] output Output vector with DCT coefficients.
				279	**/
				280	void DCT(vec1D32F& input, vec1D32F& output);
				281
				282	/**
				283	* @brief Perform inverse fourier transform on complex spectral vector.
				284	* @param[out] out Output vector.
				285	* @param[in] fftXIn Vector of floats arranged to represent complex numbers interleaved.
				286	**/
				287	void InverseTransform(vec1D32F& out, vec1D32F& fftXIn);
				288
				289	/**
				290	* @brief Perform pitch filtering.
				291	* @param[in] features Object with pre-processing calculated frame features.
				292	* @param[in] g Gain values.
				293	**/
				294	void PitchFilter(FrameFeatures& features, vec1D32F& g);
				295
				296	/**
				297	* @brief Interpolate the band gain values.
				298	* @param[out] g Gain values.
				299	* @param[in] bandE Vector with 22 elements populated with energy for
				300	* each band.
				301	**/
				302	void InterpBandGain(vec1D32F& g, vec1D32F& bandE);
				303
				304	/**
				305	* @brief Create de-noised frame.
				306	* @param[out] outFrame Output vector for storing the created audio frame.
				307	* @param[in] fftY Gain adjusted complex spectral vector.
				308	*/
				309	void FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY);
				310
				311	/* Private objects */
				312	private:
				313	FftInstance m_fftInstReal; /* FFT instance for real numbers */
				314	FftInstance m_fftInstCmplx; /* FFT instance for complex numbers */
				315	vec1D32F m_halfWindow; /* Window coefficients */
				316	vec1D32F m_dctTable; /* DCT table */
				317	vec1D32F m_analysisMem; /* Buffer used for frame analysis */
				318	vec2D32F m_cepstralMem; /* Cepstral coefficients */
				319	size_t m_memId; /* memory ID */
				320	vec1D32F m_synthesisMem; /* Synthesis mem (used by post-processing) */
				321	vec1D32F m_pitchBuf; /* Pitch buffer */
				322	float m_lastGain; /* Last gain calculated */
				323	int m_lastPeriod; /* Last period calculated */
				324	arrHp m_memHpX; /* HpX coefficients. */
				325	vec1D32F m_lastGVec; /* Last gain vector (used by post-processing) */
				326
				327	/* Constants */
				328	const std::array <uint32_t, NB_BANDS> m_eband5ms {
				329	0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12,
				330	14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100};
				331
				332	};
				333
				334
				335	} /* namespace rnn */
				336	} /* namspace app */
				337	} /* namespace arm */