blob: 89509416288c4aea82d2eb6dfe1fdb4db8286d34 [file] [log] [blame]
alexander3c798932021-03-26 21:42:19 +00001/*
Richard Burtoned35a6f2022-02-14 11:55:35 +00002 * Copyright (c) 2021-2022 Arm Limited. All rights reserved.
alexander3c798932021-03-26 21:42:19 +00003 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17#include "PlatformMath.hpp"
alexander31ae9f02022-02-10 16:15:54 +000018#include "log_macros.h"
Kshitij Sisodia76a15802021-12-24 11:05:11 +000019#include <algorithm>
alexander3c798932021-03-26 21:42:19 +000020
21namespace arm {
22namespace app {
23namespace math {
24
25 float MathUtils::CosineF32(float radians)
26 {
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010027#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
alexander3c798932021-03-26 21:42:19 +000028 return arm_cos_f32(radians);
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010029#else /* __ARM_FEATURE_DSP */
alexander31ae9f02022-02-10 16:15:54 +000030 return cosf(radians);
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010031#endif /* __ARM_FEATURE_DSP */
alexander3c798932021-03-26 21:42:19 +000032 }
33
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +010034 float MathUtils::SineF32(float radians)
35 {
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010036#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +010037 return arm_sin_f32(radians);
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010038#else /* __ARM_FEATURE_DSP */
alexander31ae9f02022-02-10 16:15:54 +000039 return sinf(radians);
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010040#endif /* __ARM_FEATURE_DSP */
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +010041 }
42
alexander3c798932021-03-26 21:42:19 +000043 float MathUtils::SqrtF32(float input)
44 {
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010045#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
alexander3c798932021-03-26 21:42:19 +000046 float output = 0.f;
47 arm_sqrt_f32(input, &output);
48 return output;
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010049#else /* __ARM_FEATURE_DSP */
alexander3c798932021-03-26 21:42:19 +000050 return sqrtf(input);
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010051#endif /* __ARM_FEATURE_DSP */
alexander3c798932021-03-26 21:42:19 +000052 }
53
54 float MathUtils::MeanF32(float* ptrSrc, const uint32_t srcLen)
55 {
56 if (!srcLen) {
57 return 0.f;
58 }
59
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010060#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
alexander3c798932021-03-26 21:42:19 +000061 float result = 0.f;
62 arm_mean_f32(ptrSrc, srcLen, &result);
63 return result;
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010064#else /* __ARM_FEATURE_DSP */
alexander3c798932021-03-26 21:42:19 +000065 float acc = std::accumulate(ptrSrc, ptrSrc + srcLen, 0.0);
66 return acc/srcLen;
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010067#endif /* __ARM_FEATURE_DSP */
alexander3c798932021-03-26 21:42:19 +000068 }
69
70 float MathUtils::StdDevF32(float* ptrSrc, const uint32_t srcLen,
71 const float mean)
72 {
73 if (!srcLen) {
74 return 0.f;
75 }
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010076#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
alexander3c798932021-03-26 21:42:19 +000077 /**
78 * Note Standard deviation calculation can be off
79 * by > 0.01 but less than < 0.1, according to
80 * preliminary findings.
81 **/
82 UNUSED(mean);
83 float stdDev = 0;
84 arm_std_f32(ptrSrc, srcLen, &stdDev);
85 return stdDev;
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010086#else /* __ARM_FEATURE_DSP */
alexander3c798932021-03-26 21:42:19 +000087 auto VarianceFunction = [=](float acc, const float value) {
88 return acc + (((value - mean) * (value - mean))/ srcLen);
89 };
90
91 float acc = std::accumulate(ptrSrc, ptrSrc + srcLen, 0.0,
92 VarianceFunction);
93
94 return sqrtf(acc);
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +010095#endif /* __ARM_FEATURE_DSP */
alexander3c798932021-03-26 21:42:19 +000096 }
97
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +010098 void MathUtils::FftInitF32(const uint16_t fftLen,
99 FftInstance& fftInstance,
100 const FftType type)
alexander3c798932021-03-26 21:42:19 +0000101 {
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100102 fftInstance.m_fftLen = fftLen;
103 fftInstance.m_initialised = false;
104 fftInstance.m_optimisedOptionAvailable = false;
105 fftInstance.m_type = type;
alexander3c798932021-03-26 21:42:19 +0000106
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100107#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100108 arm_status status = ARM_MATH_ARGUMENT_ERROR;
109 switch (fftInstance.m_type) {
110 case FftType::real:
111 status = arm_rfft_fast_init_f32(&fftInstance.m_instanceReal, fftLen);
112 break;
113
114 case FftType::complex:
115 status = arm_cfft_init_f32(&fftInstance.m_instanceComplex, fftLen);
116 break;
117
118 default:
119 printf_err("Invalid FFT type\n");
120 return;
alexander3c798932021-03-26 21:42:19 +0000121 }
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100122
123 if (ARM_MATH_SUCCESS != status) {
124 printf_err("Failed to initialise FFT for len %d\n", fftLen);
125 } else {
126 fftInstance.m_optimisedOptionAvailable = true;
127 }
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100128#endif /* __ARM_FEATURE_DSP */
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100129
alexander31ae9f02022-02-10 16:15:54 +0000130 debug("Optimised FFT will be used: %s.\n", fftInstance.m_optimisedOptionAvailable? "yes": "no");
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100131
132 fftInstance.m_initialised = true;
133 }
134
135 static void FftRealF32(std::vector<float>& input,
136 std::vector<float>& fftOutput)
137 {
138 const size_t inputLength = input.size();
139 const size_t halfLength = input.size() / 2;
140
141 fftOutput[0] = 0;
142 fftOutput[1] = 0;
143 for (size_t t = 0; t < inputLength; t++) {
144 fftOutput[0] += input[t];
145 fftOutput[1] += input[t] *
146 MathUtils::CosineF32(2 * M_PI * halfLength * t / inputLength);
147 }
148
149 for (size_t k = 1, j = 2; k < halfLength; ++k, j += 2) {
150 float sumReal = 0;
151 float sumImag = 0;
152
alexander31ae9f02022-02-10 16:15:54 +0000153 const auto theta = static_cast<float>(2 * M_PI * k / inputLength);
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100154
155 for (size_t t = 0; t < inputLength; t++) {
156 const auto angle = static_cast<float>(t * theta);
157 sumReal += input[t] * MathUtils::CosineF32(angle);
158 sumImag += -input[t]* MathUtils::SineF32(angle);
159 }
160
161 /* Arrange output to [real0, realN/2, real1, im1, real2, im2, ...] */
162 fftOutput[j] = sumReal;
163 fftOutput[j + 1] = sumImag;
164 }
165 }
166
167 static void FftComplexF32(std::vector<float>& input,
168 std::vector<float>& fftOutput)
169 {
170 const size_t fftLen = input.size() / 2;
171 for (size_t k = 0; k < fftLen; k++) {
172 float sumReal = 0;
173 float sumImag = 0;
174 const auto theta = static_cast<float>(2 * M_PI * k / fftLen);
175 for (size_t t = 0; t < fftLen; t++) {
176 const auto angle = theta * t;
177 const auto cosine = MathUtils::CosineF32(angle);
178 const auto sine = MathUtils::SineF32(angle);
179 sumReal += input[t*2] * cosine + input[t*2 + 1] * sine;
180 sumImag += -input[t*2] * sine + input[t*2 + 1] * cosine;
181 }
182 fftOutput[k*2] = sumReal;
183 fftOutput[k*2 + 1] = sumImag;
184 }
alexander3c798932021-03-26 21:42:19 +0000185 }
186
187 void MathUtils::FftF32(std::vector<float>& input,
188 std::vector<float>& fftOutput,
189 arm::app::math::FftInstance& fftInstance)
190 {
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100191 if (!fftInstance.m_initialised) {
192 printf_err("FFT uninitialised\n");
193 return;
194 } else if (input.size() < fftInstance.m_fftLen) {
195 printf_err("FFT len: %" PRIu16 "; input len: %zu\n",
196 fftInstance.m_fftLen, input.size());
197 return;
198 } else if (fftOutput.size() < input.size()) {
199 printf_err("Output vector len insufficient to hold FFTs\n");
200 return;
alexander3c798932021-03-26 21:42:19 +0000201 }
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100202
203 switch (fftInstance.m_type) {
204 case FftType::real:
205
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100206#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100207 if (fftInstance.m_optimisedOptionAvailable) {
208 arm_rfft_fast_f32(&fftInstance.m_instanceReal, input.data(), fftOutput.data(), 0);
209 return;
210 }
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100211#endif /* __ARM_FEATURE_DSP */
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100212 FftRealF32(input, fftOutput);
213 return;
214
215 case FftType::complex:
216 if (input.size() < fftInstance.m_fftLen * 2) {
217 printf_err("Complex FFT instance should have input size >= (FFT len x 2)");
218 return;
219 }
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100220#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100221 if (fftInstance.m_optimisedOptionAvailable) {
222 fftOutput = input; /* Complex function works in-place */
Richard Burton033c9152021-12-07 14:04:44 +0000223 arm_cfft_f32(&fftInstance.m_instanceComplex, fftOutput.data(), 0, 1);
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100224 return;
225 }
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100226#endif /* __ARM_FEATURE_DSP */
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100227 FftComplexF32(input, fftOutput);
228 return;
229
230 default:
231 printf_err("Invalid FFT type\n");
232 return;
233 }
alexander3c798932021-03-26 21:42:19 +0000234 }
235
236 void MathUtils::VecLogarithmF32(std::vector <float>& input,
237 std::vector <float>& output)
238 {
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100239#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
alexander3c798932021-03-26 21:42:19 +0000240 arm_vlog_f32(input.data(), output.data(),
241 output.size());
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100242#else /* __ARM_FEATURE_DSP */
alexander3c798932021-03-26 21:42:19 +0000243 for (auto in = input.begin(), out = output.begin();
alexanderc350cdc2021-04-29 20:36:09 +0100244 in != input.end() && out != output.end(); ++in, ++out) {
alexander3c798932021-03-26 21:42:19 +0000245 *out = logf(*in);
246 }
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100247#endif /* __ARM_FEATURE_DSP */
alexander3c798932021-03-26 21:42:19 +0000248 }
249
250 float MathUtils::DotProductF32(float* srcPtrA, float* srcPtrB,
251 const uint32_t srcLen)
252 {
253 float output = 0.f;
254
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100255#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
alexander3c798932021-03-26 21:42:19 +0000256 arm_dot_prod_f32(srcPtrA, srcPtrB, srcLen, &output);
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100257#else /* __ARM_FEATURE_DSP */
alexander3c798932021-03-26 21:42:19 +0000258 for (uint32_t i = 0; i < srcLen; ++i) {
259 output += *srcPtrA++ * *srcPtrB++;
260 }
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100261#endif /* __ARM_FEATURE_DSP */
alexander3c798932021-03-26 21:42:19 +0000262
263 return output;
264 }
265
266 bool MathUtils::ComplexMagnitudeSquaredF32(float* ptrSrc,
267 const uint32_t srcLen,
268 float* ptrDst,
269 const uint32_t dstLen)
270 {
271 if (dstLen < srcLen/2) {
272 printf_err("dstLen must be greater than srcLen/2");
273 return false;
274 }
275
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100276#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
alexander3c798932021-03-26 21:42:19 +0000277 arm_cmplx_mag_squared_f32(ptrSrc, ptrDst, srcLen/2);
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100278#else /* __ARM_FEATURE_DSP */
Éanna Ó Catháin036f8c72021-09-01 15:44:56 +0100279 for (uint32_t j = 0; j < srcLen/2; ++j) {
alexander3c798932021-03-26 21:42:19 +0000280 const float real = *ptrSrc++;
281 const float im = *ptrSrc++;
282 *ptrDst++ = real*real + im*im;
283 }
Kshitij Sisodia9c6f9f82022-05-20 14:30:02 +0100284#endif /* __ARM_FEATURE_DSP */
alexander3c798932021-03-26 21:42:19 +0000285 return true;
286 }
287
Kshitij Sisodia76a15802021-12-24 11:05:11 +0000288 void MathUtils::SoftmaxF32(std::vector<float>& vec)
289 {
290 /* Fix for numerical stability and apply exp. */
291 auto start = vec.begin();
292 auto end = vec.end();
293
294 float maxValue = *std::max_element(start, end);
295 for (auto it = start; it != end; ++it) {
296 *it = std::exp((*it) - maxValue);
297 }
298
299 float sumExp = std::accumulate(start, end, 0.0f);
300
301 for (auto it = start; it != end; ++it) {
302 *it = (*it)/sumExp;
303 }
304 }
305
Richard Burtoned35a6f2022-02-14 11:55:35 +0000306 float MathUtils::SigmoidF32(float x)
307 {
308 return 1.f/(1.f + std::exp(-x));
309 }
310
alexander3c798932021-03-26 21:42:19 +0000311} /* namespace math */
312} /* namespace app */
Kshitij Sisodia14ab8d42021-10-22 17:35:01 +0100313} /* namespace arm */