blob: 505d357c9a0b1401ed542b11a4480115e4c34f17 [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
2 * Copyright (c) 2021 Arm Limited. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "PlatformMath.hpp"
18
19#if 0 == ARM_DSP_AVAILABLE
20 #include <cmath>
21 #include <numeric>
22#endif /* 0 == ARM_DSP_AVAILABLE */
23
24namespace arm {
25namespace app {
26namespace math {
27
28 float MathUtils::CosineF32(float radians)
29 {
30#if ARM_DSP_AVAILABLE
31 return arm_cos_f32(radians);
32#else /* ARM_DSP_AVAILABLE */
33 return cos(radians);
34#endif /* ARM_DSP_AVAILABLE */
35 }
36
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +010037 float MathUtils::SineF32(float radians)
38 {
39#if ARM_DSP_AVAILABLE
40 return arm_sin_f32(radians);
41#else /* ARM_DSP_AVAILABLE */
42 return sin(radians);
43#endif /* ARM_DSP_AVAILABLE */
44 }
45
alexander3c798932021-03-26 21:42:19 +000046 float MathUtils::SqrtF32(float input)
47 {
48#if ARM_DSP_AVAILABLE
49 float output = 0.f;
50 arm_sqrt_f32(input, &output);
51 return output;
52#else /* ARM_DSP_AVAILABLE */
53 return sqrtf(input);
54#endif /* ARM_DSP_AVAILABLE */
55 }
56
57 float MathUtils::MeanF32(float* ptrSrc, const uint32_t srcLen)
58 {
59 if (!srcLen) {
60 return 0.f;
61 }
62
63#if ARM_DSP_AVAILABLE
64 float result = 0.f;
65 arm_mean_f32(ptrSrc, srcLen, &result);
66 return result;
67#else /* ARM_DSP_AVAILABLE */
68 float acc = std::accumulate(ptrSrc, ptrSrc + srcLen, 0.0);
69 return acc/srcLen;
70#endif /* ARM_DSP_AVAILABLE */
71 }
72
73 float MathUtils::StdDevF32(float* ptrSrc, const uint32_t srcLen,
74 const float mean)
75 {
76 if (!srcLen) {
77 return 0.f;
78 }
79#if ARM_DSP_AVAILABLE
80 /**
81 * Note Standard deviation calculation can be off
82 * by > 0.01 but less than < 0.1, according to
83 * preliminary findings.
84 **/
85 UNUSED(mean);
86 float stdDev = 0;
87 arm_std_f32(ptrSrc, srcLen, &stdDev);
88 return stdDev;
89#else /* ARM_DSP_AVAILABLE */
90 auto VarianceFunction = [=](float acc, const float value) {
91 return acc + (((value - mean) * (value - mean))/ srcLen);
92 };
93
94 float acc = std::accumulate(ptrSrc, ptrSrc + srcLen, 0.0,
95 VarianceFunction);
96
97 return sqrtf(acc);
98#endif /* ARM_DSP_AVAILABLE */
99 }
100
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100101 void MathUtils::FftInitF32(const uint16_t fftLen,
102 FftInstance& fftInstance,
103 const FftType type)
alexander3c798932021-03-26 21:42:19 +0000104 {
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100105 fftInstance.m_fftLen = fftLen;
106 fftInstance.m_initialised = false;
107 fftInstance.m_optimisedOptionAvailable = false;
108 fftInstance.m_type = type;
alexander3c798932021-03-26 21:42:19 +0000109
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100110#if ARM_DSP_AVAILABLE
111 arm_status status = ARM_MATH_ARGUMENT_ERROR;
112 switch (fftInstance.m_type) {
113 case FftType::real:
114 status = arm_rfft_fast_init_f32(&fftInstance.m_instanceReal, fftLen);
115 break;
116
117 case FftType::complex:
118 status = arm_cfft_init_f32(&fftInstance.m_instanceComplex, fftLen);
119 break;
120
121 default:
122 printf_err("Invalid FFT type\n");
123 return;
alexander3c798932021-03-26 21:42:19 +0000124 }
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100125
126 if (ARM_MATH_SUCCESS != status) {
127 printf_err("Failed to initialise FFT for len %d\n", fftLen);
128 } else {
129 fftInstance.m_optimisedOptionAvailable = true;
130 }
alexander3c798932021-03-26 21:42:19 +0000131#endif /* ARM_DSP_AVAILABLE */
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100132
133 if (!fftInstance.m_optimisedOptionAvailable) {
134 debug("Non optimised FFT will be used\n.");
135 }
136
137 fftInstance.m_initialised = true;
138 }
139
140 static void FftRealF32(std::vector<float>& input,
141 std::vector<float>& fftOutput)
142 {
143 const size_t inputLength = input.size();
144 const size_t halfLength = input.size() / 2;
145
146 fftOutput[0] = 0;
147 fftOutput[1] = 0;
148 for (size_t t = 0; t < inputLength; t++) {
149 fftOutput[0] += input[t];
150 fftOutput[1] += input[t] *
151 MathUtils::CosineF32(2 * M_PI * halfLength * t / inputLength);
152 }
153
154 for (size_t k = 1, j = 2; k < halfLength; ++k, j += 2) {
155 float sumReal = 0;
156 float sumImag = 0;
157
158 const float theta = static_cast<float>(2 * M_PI * k / inputLength);
159
160 for (size_t t = 0; t < inputLength; t++) {
161 const auto angle = static_cast<float>(t * theta);
162 sumReal += input[t] * MathUtils::CosineF32(angle);
163 sumImag += -input[t]* MathUtils::SineF32(angle);
164 }
165
166 /* Arrange output to [real0, realN/2, real1, im1, real2, im2, ...] */
167 fftOutput[j] = sumReal;
168 fftOutput[j + 1] = sumImag;
169 }
170 }
171
172 static void FftComplexF32(std::vector<float>& input,
173 std::vector<float>& fftOutput)
174 {
175 const size_t fftLen = input.size() / 2;
176 for (size_t k = 0; k < fftLen; k++) {
177 float sumReal = 0;
178 float sumImag = 0;
179 const auto theta = static_cast<float>(2 * M_PI * k / fftLen);
180 for (size_t t = 0; t < fftLen; t++) {
181 const auto angle = theta * t;
182 const auto cosine = MathUtils::CosineF32(angle);
183 const auto sine = MathUtils::SineF32(angle);
184 sumReal += input[t*2] * cosine + input[t*2 + 1] * sine;
185 sumImag += -input[t*2] * sine + input[t*2 + 1] * cosine;
186 }
187 fftOutput[k*2] = sumReal;
188 fftOutput[k*2 + 1] = sumImag;
189 }
alexander3c798932021-03-26 21:42:19 +0000190 }
191
192 void MathUtils::FftF32(std::vector<float>& input,
193 std::vector<float>& fftOutput,
194 arm::app::math::FftInstance& fftInstance)
195 {
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100196 if (!fftInstance.m_initialised) {
197 printf_err("FFT uninitialised\n");
198 return;
199 } else if (input.size() < fftInstance.m_fftLen) {
200 printf_err("FFT len: %" PRIu16 "; input len: %zu\n",
201 fftInstance.m_fftLen, input.size());
202 return;
203 } else if (fftOutput.size() < input.size()) {
204 printf_err("Output vector len insufficient to hold FFTs\n");
205 return;
alexander3c798932021-03-26 21:42:19 +0000206 }
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100207
208 switch (fftInstance.m_type) {
209 case FftType::real:
210
211#if ARM_DSP_AVAILABLE
212 if (fftInstance.m_optimisedOptionAvailable) {
213 arm_rfft_fast_f32(&fftInstance.m_instanceReal, input.data(), fftOutput.data(), 0);
214 return;
215 }
alexander3c798932021-03-26 21:42:19 +0000216#endif /* ARM_DSP_AVAILABLE */
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100217 FftRealF32(input, fftOutput);
218 return;
219
220 case FftType::complex:
221 if (input.size() < fftInstance.m_fftLen * 2) {
222 printf_err("Complex FFT instance should have input size >= (FFT len x 2)");
223 return;
224 }
225#if ARM_DSP_AVAILABLE
226 if (fftInstance.m_optimisedOptionAvailable) {
227 fftOutput = input; /* Complex function works in-place */
228 arm_cfft_f32(&fftInstance.m_instanceComplex, fftOutput.data(), 0, 0);
229 return;
230 }
231#endif /* ARM_DSP_AVAILABLE */
232 FftComplexF32(input, fftOutput);
233 return;
234
235 default:
236 printf_err("Invalid FFT type\n");
237 return;
238 }
alexander3c798932021-03-26 21:42:19 +0000239 }
240
241 void MathUtils::VecLogarithmF32(std::vector <float>& input,
242 std::vector <float>& output)
243 {
244#if ARM_DSP_AVAILABLE
245 arm_vlog_f32(input.data(), output.data(),
246 output.size());
247#else /* ARM_DSP_AVAILABLE */
248 for (auto in = input.begin(), out = output.begin();
alexanderc350cdc2021-04-29 20:36:09 +0100249 in != input.end() && out != output.end(); ++in, ++out) {
alexander3c798932021-03-26 21:42:19 +0000250 *out = logf(*in);
251 }
252#endif /* ARM_DSP_AVAILABLE */
253 }
254
255 float MathUtils::DotProductF32(float* srcPtrA, float* srcPtrB,
256 const uint32_t srcLen)
257 {
258 float output = 0.f;
259
260#if ARM_DSP_AVAILABLE
261 arm_dot_prod_f32(srcPtrA, srcPtrB, srcLen, &output);
262#else /* ARM_DSP_AVAILABLE */
263 for (uint32_t i = 0; i < srcLen; ++i) {
264 output += *srcPtrA++ * *srcPtrB++;
265 }
266#endif /* ARM_DSP_AVAILABLE */
267
268 return output;
269 }
270
271 bool MathUtils::ComplexMagnitudeSquaredF32(float* ptrSrc,
272 const uint32_t srcLen,
273 float* ptrDst,
274 const uint32_t dstLen)
275 {
276 if (dstLen < srcLen/2) {
277 printf_err("dstLen must be greater than srcLen/2");
278 return false;
279 }
280
281#if ARM_DSP_AVAILABLE
282 arm_cmplx_mag_squared_f32(ptrSrc, ptrDst, srcLen/2);
283#else /* ARM_DSP_AVAILABLE */
Éanna Ó Catháin036f8c72021-09-01 15:44:56 +0100284 for (uint32_t j = 0; j < srcLen/2; ++j) {
alexander3c798932021-03-26 21:42:19 +0000285 const float real = *ptrSrc++;
286 const float im = *ptrSrc++;
287 *ptrDst++ = real*real + im*im;
288 }
289#endif /* ARM_DSP_AVAILABLE */
290 return true;
291 }
292
293} /* namespace math */
294} /* namespace app */
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100295} /* namespace arm */