Blame - source/application/api/use_case/noise_reduction/include/RNNoiseFeatureProcessor.hpp - ml/ethos-u/ml-embedded-evaluation-kit

blob: cbf0e4e1f0835b95f20767ed6132627fb6b86c58 [file] [log] [blame]

Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	1	/*
Richard Burton	4e00279	2022-05-04 09:45:02 +0100	[diff] [blame]	2	* Copyright (c) 2021-2022 Arm Limited. All rights reserved.
Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	3	* SPDX-License-Identifier: Apache-2.0
				4	*
				5	* Licensed under the Apache License, Version 2.0 (the "License");
				6	* you may not use this file except in compliance with the License.
				7	* You may obtain a copy of the License at
				8	*
				9	* http://www.apache.org/licenses/LICENSE-2.0
				10	*
				11	* Unless required by applicable law or agreed to in writing, software
				12	* distributed under the License is distributed on an "AS IS" BASIS,
				13	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	* See the License for the specific language governing permissions and
				15	* limitations under the License.
				16	*/
Richard Burton	4e00279	2022-05-04 09:45:02 +0100	[diff] [blame]	17	#ifndef RNNOISE_FEATURE_PROCESSOR_HPP
				18	#define RNNOISE_FEATURE_PROCESSOR_HPP
				19
Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	20	#include "PlatformMath.hpp"
				21	#include <cstdint>
				22	#include <vector>
				23	#include <array>
				24	#include <tuple>
				25
				26	namespace arm {
				27	namespace app {
				28	namespace rnn {
				29
				30	using vec1D32F = std::vector<float>;
				31	using vec2D32F = std::vector<vec1D32F>;
				32	using arrHp = std::array<float, 2>;
				33	using math::FftInstance;
				34	using math::FftType;
				35
				36	class FrameFeatures {
				37	public:
				38	bool m_silence{false}; /* If frame contains silence or not. */
				39	vec1D32F m_featuresVec{}; /* Calculated feature vector to feed to model. */
				40	vec1D32F m_fftX{}; /* Vector of floats arranged to represent complex numbers. */
				41	vec1D32F m_fftP{}; /* Vector of floats arranged to represent complex numbers. */
				42	vec1D32F m_Ex{}; /* Spectral band energy for audio x. */
				43	vec1D32F m_Ep{}; /* Spectral band energy for pitch p. */
				44	vec1D32F m_Exp{}; /* Correlated spectral energy between x and p. */
				45	};
				46
				47	/**
				48	* @brief RNNoise pre and post processing class based on the 2018 paper from
				49	* Jan-Marc Valin. Recommended reading:
				50	* - https://jmvalin.ca/demo/rnnoise/
				51	* - https://arxiv.org/abs/1709.08243
				52	**/
Richard Burton	4e00279	2022-05-04 09:45:02 +0100	[diff] [blame]	53	class RNNoiseFeatureProcessor {
Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	54	/* Public interface */
				55	public:
Richard Burton	4e00279	2022-05-04 09:45:02 +0100	[diff] [blame]	56	RNNoiseFeatureProcessor();
				57	~RNNoiseFeatureProcessor() = default;
Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	58
				59	/**
				60	* @brief Calculates the features from a given audio buffer ready to be sent to RNNoise model.
				61	* @param[in] audioData Pointer to the floating point vector
				62	* with audio data (within the numerical
				63	* limits of int16_t type).
				64	* @param[in] audioLen Number of elements in the audio window.
				65	* @param[out] features FrameFeatures object reference.
				66	**/
				67	void PreprocessFrame(const float* audioData,
				68	size_t audioLen,
				69	FrameFeatures& features);
				70
				71	/**
				72	* @brief Use the RNNoise model output gain values with pre-processing features
				73	* to generate audio with noise suppressed.
				74	* @param[in] modelOutput Output gain values from model.
				75	* @param[in] features Calculated features from pre-processing step.
				76	* @param[out] outFrame Output frame to be populated.
				77	**/
				78	void PostProcessFrame(vec1D32F& modelOutput, FrameFeatures& features, vec1D32F& outFrame);
				79
				80
				81	/* Public constants */
				82	public:
				83	static constexpr uint32_t FRAME_SIZE_SHIFT{2};
Richard Burton	033c915	2021-12-07 14:04:44 +0000	[diff] [blame]	84	static constexpr uint32_t FRAME_SIZE{512};
Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	85	static constexpr uint32_t WINDOW_SIZE{2 * FRAME_SIZE};
				86	static constexpr uint32_t FREQ_SIZE{FRAME_SIZE + 1};
				87
Richard Burton	033c915	2021-12-07 14:04:44 +0000	[diff] [blame]	88	static constexpr uint32_t PITCH_MIN_PERIOD{64};
				89	static constexpr uint32_t PITCH_MAX_PERIOD{820};
				90	static constexpr uint32_t PITCH_FRAME_SIZE{1024};
Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	91	static constexpr uint32_t PITCH_BUF_SIZE{PITCH_MAX_PERIOD + PITCH_FRAME_SIZE};
				92
				93	static constexpr uint32_t NB_BANDS{22};
				94	static constexpr uint32_t CEPS_MEM{8};
				95	static constexpr uint32_t NB_DELTA_CEPS{6};
				96
				97	static constexpr uint32_t NB_FEATURES{NB_BANDS + 3*NB_DELTA_CEPS + 2};
				98
				99	/* Private functions */
				100	private:
				101
				102	/**
				103	* @brief Initialises the half window and DCT tables.
				104	*/
				105	void InitTables();
				106
				107	/**
				108	* @brief Applies a bi-quadratic filter over the audio window.
				109	* @param[in] bHp Constant coefficient set b (arrHp type).
				110	* @param[in] aHp Constant coefficient set a (arrHp type).
				111	* @param[in,out] memHpX Coefficients populated by this function.
				112	* @param[in,out] audioWindow Floating point vector with audio data.
				113	**/
				114	void BiQuad(
				115	const arrHp& bHp,
				116	const arrHp& aHp,
				117	arrHp& memHpX,
				118	vec1D32F& audioWindow);
				119
				120	/**
				121	* @brief Computes features from the "filtered" audio window.
				122	* @param[in] audioWindow Floating point vector with audio data.
				123	* @param[out] features FrameFeatures object reference.
				124	**/
				125	void ComputeFrameFeatures(vec1D32F& audioWindow, FrameFeatures& features);
				126
				127	/**
				128	* @brief Runs analysis on the audio buffer.
				129	* @param[in] audioWindow Floating point vector with audio data.
				130	* @param[out] fft Floating point FFT vector containing real and
				131	* imaginary pairs of elements. NOTE: this vector
				132	* does not contain the mirror image (conjugates)
				133	* part of the spectrum.
				134	* @param[out] energy Computed energy for each band in the Bark scale.
				135	* @param[out] analysisMem Buffer sequentially, but partially,
				136	* populated with new audio data.
				137	**/
				138	void FrameAnalysis(
				139	const vec1D32F& audioWindow,
				140	vec1D32F& fft,
				141	vec1D32F& energy,
				142	vec1D32F& analysisMem);
				143
				144	/**
				145	* @brief Applies the window function, in-place, over the given
				146	* floating point buffer.
				147	* @param[in,out] x Buffer the window will be applied to.
				148	**/
				149	void ApplyWindow(vec1D32F& x);
				150
				151	/**
				152	* @brief Computes the FFT for a given vector.
				153	* @param[in] x Vector to compute the FFT from.
				154	* @param[out] fft Floating point FFT vector containing real and
				155	* imaginary pairs of elements. NOTE: this vector
				156	* does not contain the mirror image (conjugates)
				157	* part of the spectrum.
				158	**/
				159	void ForwardTransform(
				160	vec1D32F& x,
				161	vec1D32F& fft);
				162
				163	/**
				164	* @brief Computes band energy for each of the 22 Bark scale bands.
				165	* @param[in] fft_X FFT spectrum (as computed by ForwardTransform).
				166	* @param[out] bandE Vector with 22 elements populated with energy for
				167	* each band.
				168	**/
				169	void ComputeBandEnergy(const vec1D32F& fft_X, vec1D32F& bandE);
				170
				171	/**
				172	* @brief Computes band energy correlation.
				173	* @param[in] X FFT vector X.
				174	* @param[in] P FFT vector P.
				175	* @param[out] bandC Vector with 22 elements populated with band energy
				176	* correlation for the two input FFT vectors.
				177	**/
				178	void ComputeBandCorr(const vec1D32F& X, const vec1D32F& P, vec1D32F& bandC);
				179
				180	/**
				181	* @brief Performs pitch auto-correlation for a given vector for
				182	* given lag.
				183	* @param[in] x Input vector.
				184	* @param[out] ac Auto-correlation output vector.
				185	* @param[in] lag Lag value.
				186	* @param[in] n Number of elements to consider for correlation
				187	* computation.
				188	**/
				189	void AutoCorr(const vec1D32F &x,
				190	vec1D32F &ac,
				191	size_t lag,
				192	size_t n);
				193
				194	/**
				195	* @brief Computes pitch cross-correlation.
				196	* @param[in] x Input vector 1.
				197	* @param[in] y Input vector 2.
George Gekov	a2b0fc2	2021-11-08 16:30:43 +0000	[diff] [blame]	198	* @param[out] xCorr Cross-correlation output vector.
Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	199	* @param[in] len Number of elements to consider for correlation.
				200	* computation.
				201	* @param[in] maxPitch Maximum pitch.
				202	**/
				203	void PitchXCorr(
				204	const vec1D32F& x,
				205	const vec1D32F& y,
George Gekov	a2b0fc2	2021-11-08 16:30:43 +0000	[diff] [blame]	206	vec1D32F& xCorr,
Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	207	size_t len,
				208	size_t maxPitch);
				209
				210	/**
				211	* @brief Computes "Linear Predictor Coefficients".
				212	* @param[in] ac Correlation vector.
				213	* @param[in] p Number of elements of input vector to consider.
				214	* @param[out] lpc Output coefficients vector.
				215	**/
				216	void LPC(const vec1D32F& ac, int32_t p, vec1D32F& lpc);
				217
				218	/**
				219	* @brief Custom FIR implementation.
				220	* @param[in] num FIR coefficient vector.
				221	* @param[in] N Number of elements.
				222	* @param[out] x Vector to be be processed.
				223	**/
				224	void Fir5(const vec1D32F& num, uint32_t N, vec1D32F& x);
				225
				226	/**
				227	* @brief Down-sample the pitch buffer.
				228	* @param[in,out] pitchBuf Pitch buffer.
				229	* @param[in] pitchBufSz Buffer size.
				230	**/
				231	void PitchDownsample(vec1D32F& pitchBuf, size_t pitchBufSz);
				232
				233	/**
				234	* @brief Pitch search function.
				235	* @param[in] xLP Shifted pitch buffer input.
				236	* @param[in] y Pitch buffer input.
				237	* @param[in] len Length to search for.
				238	* @param[in] maxPitch Maximum pitch.
				239	* @return pitch index.
				240	**/
				241	int PitchSearch(vec1D32F& xLp, vec1D32F& y, uint32_t len, uint32_t maxPitch);
				242
				243	/**
				244	* @brief Finds the "best" pitch from the buffer.
				245	* @param[in] xCorr Pitch correlation vector.
				246	* @param[in] y Pitch buffer input.
				247	* @param[in] len Length to search for.
				248	* @param[in] maxPitch Maximum pitch.
				249	* @return pitch array (2 elements).
				250	**/
				251	arrHp FindBestPitch(vec1D32F& xCorr, vec1D32F& y, uint32_t len, uint32_t maxPitch);
				252
				253	/**
				254	* @brief Remove pitch period doubling errors.
				255	* @param[in,out] pitchBuf Pitch buffer vector.
				256	* @param[in] maxPeriod Maximum period.
				257	* @param[in] minPeriod Minimum period.
				258	* @param[in] frameSize Frame size.
				259	* @param[in] pitchIdx0_ Pitch index 0.
				260	* @return pitch index.
				261	**/
				262	int RemoveDoubling(
				263	vec1D32F& pitchBuf,
				264	uint32_t maxPeriod,
				265	uint32_t minPeriod,
				266	uint32_t frameSize,
				267	size_t pitchIdx0_);
				268
				269	/**
				270	* @brief Computes pitch gain.
				271	* @param[in] xy Single xy cross correlation value.
				272	* @param[in] xx Single xx auto correlation value.
				273	* @param[in] yy Single yy auto correlation value.
				274	* @return Calculated pitch gain.
				275	**/
				276	float ComputePitchGain(float xy, float xx, float yy);
				277
				278	/**
				279	* @brief Computes DCT vector from the given input.
				280	* @param[in] input Input vector.
				281	* @param[out] output Output vector with DCT coefficients.
				282	**/
				283	void DCT(vec1D32F& input, vec1D32F& output);
				284
				285	/**
				286	* @brief Perform inverse fourier transform on complex spectral vector.
				287	* @param[out] out Output vector.
				288	* @param[in] fftXIn Vector of floats arranged to represent complex numbers interleaved.
				289	**/
				290	void InverseTransform(vec1D32F& out, vec1D32F& fftXIn);
				291
				292	/**
				293	* @brief Perform pitch filtering.
				294	* @param[in] features Object with pre-processing calculated frame features.
				295	* @param[in] g Gain values.
				296	**/
				297	void PitchFilter(FrameFeatures& features, vec1D32F& g);
				298
				299	/**
				300	* @brief Interpolate the band gain values.
				301	* @param[out] g Gain values.
				302	* @param[in] bandE Vector with 22 elements populated with energy for
				303	* each band.
				304	**/
				305	void InterpBandGain(vec1D32F& g, vec1D32F& bandE);
				306
				307	/**
				308	* @brief Create de-noised frame.
				309	* @param[out] outFrame Output vector for storing the created audio frame.
				310	* @param[in] fftY Gain adjusted complex spectral vector.
				311	*/
				312	void FrameSynthesis(vec1D32F& outFrame, vec1D32F& fftY);
				313
				314	/* Private objects */
				315	private:
				316	FftInstance m_fftInstReal; /* FFT instance for real numbers */
				317	FftInstance m_fftInstCmplx; /* FFT instance for complex numbers */
				318	vec1D32F m_halfWindow; /* Window coefficients */
				319	vec1D32F m_dctTable; /* DCT table */
				320	vec1D32F m_analysisMem; /* Buffer used for frame analysis */
				321	vec2D32F m_cepstralMem; /* Cepstral coefficients */
				322	size_t m_memId; /* memory ID */
				323	vec1D32F m_synthesisMem; /* Synthesis mem (used by post-processing) */
				324	vec1D32F m_pitchBuf; /* Pitch buffer */
				325	float m_lastGain; /* Last gain calculated */
				326	int m_lastPeriod; /* Last period calculated */
				327	arrHp m_memHpX; /* HpX coefficients. */
				328	vec1D32F m_lastGVec; /* Last gain vector (used by post-processing) */
				329
				330	/* Constants */
				331	const std::array <uint32_t, NB_BANDS> m_eband5ms {
				332	0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12,
				333	14, 16, 20, 24, 28, 34, 40, 48, 60, 78, 100};
Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	334	};
				335
				336
				337	} /* namespace rnn */
Richard Burton	4e00279	2022-05-04 09:45:02 +0100	[diff] [blame]	338	} /* namespace app */
Richard Burton	0055346	2021-11-10 16:27:14 +0000	[diff] [blame]	339	} /* namespace arm */
Richard Burton	4e00279	2022-05-04 09:45:02 +0100	[diff] [blame]	340
				341	#endif /* RNNOISE_FEATURE_PROCESSOR_HPP */