blob: 2b03c270acb61941304cbae02c375573d07bbbc5 [file] [log] [blame]
Georgios Pinitasdef2a852019-02-21 14:47:56 +00001/*
Adnan AlSinanbdcb4c12023-09-18 14:49:45 +01002 * Copyright (c) 2019-2020, 2023 Arm Limited.
Georgios Pinitasdef2a852019-02-21 14:47:56 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "DFT.h"
25
26#include "PadLayer.h"
27#include "Permute.h"
28#include "Reverse.h"
29#include "SliceOperations.h"
SiCongLi410e21e2020-12-11 15:07:53 +000030#include "support/ToolchainSupport.h"
Georgios Pinitasdef2a852019-02-21 14:47:56 +000031
32#include <cmath>
33
34namespace arm_compute
35{
36namespace test
37{
38namespace validation
39{
40namespace reference
41{
42namespace
43{
44/** Performs an one dimensional DFT on a given real sequence.
45 *
46 * @param[in] src_ptr Pointer to the real input sequence.
47 * @param[in] N Size of input sequence.
48 * @param[out] dst_ptr Pointer to the complex output sequence.
49 * @param[out] K Size of the output sequence
50 */
51template <typename T>
52void rdft_1d_step(const T *src_ptr, size_t N, T *dst_ptr, size_t K)
53{
Michalis Spyroud1d77222020-04-08 14:10:15 +010054#if defined(_OPENMP)
55 #pragma omp parallel for
56#endif /* _OPENMP */
Georgios Pinitasdef2a852019-02-21 14:47:56 +000057 for(unsigned int k = 0; k < K; ++k)
58 {
59 float Xr = 0;
60 float Xi = 0;
61 for(unsigned int n = 0; n < N; ++n)
62 {
63 const float alpha = (2 * M_PI * k * n) / N;
64 const float val_r = src_ptr[n];
65 // Assuming DFT from the R domain thus skipping imaginary calculations
66 Xr += val_r * cos(alpha);
67 Xi -= val_r * sin(alpha);
68 }
69
70 dst_ptr[k * 2] = Xr;
71 dst_ptr[k * 2 + 1] = Xi;
72 }
73}
74
75/** Performs an one dimensional DFT on a given complex sequence.
76 *
77 * @param[in] src_ptr Pointer to the complex input sequence.
78 * @param[out] dst_ptr Pointer to the complex output sequence.
79 * @param[in] N Size of the sequences
80 */
81template <typename T>
82void dft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
83{
Michalis Spyroud1d77222020-04-08 14:10:15 +010084#if defined(_OPENMP)
85 #pragma omp parallel for
86#endif /* _OPENMP */
Georgios Pinitasdef2a852019-02-21 14:47:56 +000087 for(unsigned int k = 0; k < N; ++k)
88 {
89 float Xr = 0;
90 float Xi = 0;
91 for(unsigned int n = 0; n < N; ++n)
92 {
93 const float alpha = (2 * M_PI * k * n) / N;
94 const float val_r = src_ptr[2 * n];
95 const float val_i = src_ptr[2 * n + 1];
96 const float cos_alpha = cos(alpha);
97 const float sin_alpha = sin(alpha);
98
99 Xr += val_r * cos_alpha + val_i * sin_alpha;
100 Xi += val_i * cos_alpha - val_r * sin_alpha;
101 }
102
103 dst_ptr[k * 2] = Xr;
104 dst_ptr[k * 2 + 1] = Xi;
105 }
106}
107
108/** Performs an one dimensional inverse DFT on a given real sequence.
109 *
110 * @param[in] src_ptr Pointer to the real input sequence.
111 * @param[in] K Size of input sequence.
112 * @param[out] dst_ptr Pointer to the complex output sequence.
113 * @param[out] N Size of the output sequence
114 */
115template <typename T>
116void irdft_1d_step(const T *src_ptr, size_t K, T *dst_ptr, size_t N)
117{
118 const bool is_odd = N % 2;
119 const unsigned int Nleft = N - K;
120 const int tail_start = is_odd ? K - 1 : K - 2;
Michalis Spyroud1d77222020-04-08 14:10:15 +0100121#if defined(_OPENMP)
122 #pragma omp parallel for
123#endif /* _OPENMP */
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000124 for(unsigned int n = 0; n < N; ++n)
125 {
126 float xr = 0;
127 for(unsigned int k = 0; k < K; ++k)
128 {
129 const float alpha = (2 * M_PI * k * n) / N;
130 xr += src_ptr[2 * k] * cos(alpha) - src_ptr[2 * k + 1] * sin(alpha);
131 }
132
133 unsigned int j = tail_start;
134 for(unsigned int k = 0; k < Nleft; ++k)
135 {
136 const float alpha = (2 * M_PI * (k + K) * n) / N;
137 xr += src_ptr[2 * j] * cos(alpha) + src_ptr[2 * j + 1] * sin(alpha);
138 --j;
139 }
140
141 dst_ptr[n] = xr;
142 }
143}
144
145/** Performs an one dimensional inverse DFT on a given complex sequence.
146 *
147 * @param[in] src_ptr Pointer to the complex input sequence.
148 * @param[out] dst_ptr Pointer to the complex output sequence.
149 * @param[in] N Size of the sequences
150 */
151template <typename T>
152void idft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
153{
Michalis Spyroud1d77222020-04-08 14:10:15 +0100154#if defined(_OPENMP)
155 #pragma omp parallel for
156#endif /* _OPENMP */
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000157 for(unsigned int n = 0; n < N; ++n)
158 {
159 float xr = 0;
160 float xi = 0;
161 for(unsigned int k = 0; k < N; ++k)
162 {
163 const float alpha = (2 * M_PI * k * n) / N;
164 const float cos_alpha = cos(alpha);
165 const float sin_alpha = sin(alpha);
166 const float val_r = src_ptr[2 * k];
167 const float val_i = src_ptr[2 * k + 1];
168
169 xr += val_r * cos_alpha - val_i * sin_alpha;
170 xi += val_i * cos_alpha + val_r * sin_alpha;
171 }
172
173 dst_ptr[2 * n] = xr;
174 dst_ptr[2 * n + 1] = xi;
175 }
176}
177
178template <typename T>
179SimpleTensor<T> rdft_1d_core(const SimpleTensor<T> &src, FFTDirection direction, bool is_odd)
180{
181 // Performs only rdft
182 ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Forward && src.num_channels() != 1);
183 ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Inverse && src.num_channels() != 2);
184
185 const unsigned int inverse_tail = is_odd ? 1 : 0;
186 const unsigned int N = src.shape()[0];
187 const unsigned int K = direction == FFTDirection::Forward ? N / 2 + 1 : (N - 1) * 2 + inverse_tail;
188 const unsigned int num_channels = direction == FFTDirection::Forward ? 2 : 1;
189
190 TensorShape dst_shape = src.shape();
191 dst_shape.set(0, K);
192
193 SimpleTensor<T> dst(dst_shape, src.data_type(), num_channels);
194
195 const unsigned int upper_dims = src.shape().total_size_upper(1);
Michalis Spyroud1d77222020-04-08 14:10:15 +0100196#if defined(_OPENMP)
197 #pragma omp parallel for
198#endif /* _OPENMP */
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000199 for(unsigned int du = 0; du < upper_dims; ++du)
200 {
201 const T *src_row_ptr = src.data() + du * N * src.num_channels();
202 T *dst_row_ptr = dst.data() + du * K * dst.num_channels();
203 direction == FFTDirection::Forward ? rdft_1d_step(src_row_ptr, N, dst_row_ptr, K) : irdft_1d_step(src_row_ptr, N, dst_row_ptr, K);
204 }
205
206 return dst;
207}
208
209template <typename T>
210SimpleTensor<T> dft_1d_core(const SimpleTensor<T> &src, FFTDirection direction)
211{
212 ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
213
214 const unsigned int N = src.shape()[0];
215
216 SimpleTensor<T> dst(src.shape(), src.data_type(), src.num_channels());
217
218 const unsigned int upper_dims = src.shape().total_size_upper(1);
Michalis Spyroud1d77222020-04-08 14:10:15 +0100219#if defined(_OPENMP)
220 #pragma omp parallel for
221#endif /* _OPENMP */
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000222 for(unsigned int du = 0; du < upper_dims; ++du)
223 {
224 const T *src_row_ptr = src.data() + du * N * src.num_channels();
225 T *dst_row_ptr = dst.data() + du * N * dst.num_channels();
226 direction == FFTDirection::Forward ? dft_1d_step(src_row_ptr, dst_row_ptr, N) : idft_1d_step(src_row_ptr, dst_row_ptr, N);
227 }
228
229 return dst;
230}
231
232/** Scale a tensor by a given scaling factor.
233 *
234 * @param[in,out] tensor Tensor to scale.
235 * @param[in] scaling_factor Scaling to scale the tensor data with.
236 */
237template <typename T>
238void scale(SimpleTensor<T> &tensor, T scaling_factor)
239{
240 const int total_elements = tensor.num_elements() * tensor.num_channels();
241 T *data_ptr = tensor.data();
Michalis Spyroud1d77222020-04-08 14:10:15 +0100242#if defined(_OPENMP)
243 #pragma omp parallel for
244#endif /* _OPENMP */
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000245 for(int i = 0; i < total_elements; ++i)
246 {
247 data_ptr[i] /= scaling_factor;
248 }
249}
250
251/** Performs a complex element-wise multiplication with reduction across the channels axis.
252 *
253 * @param[in] input Input tensor.
254 * @param[in] weights Weights tensor.
255 *
256 * @return Output tensor.
257 */
258template <typename T>
259SimpleTensor<T> complex_mul_and_reduce(const SimpleTensor<T> &input, const SimpleTensor<T> &weights)
260{
Michalis Spyroufae513c2019-10-16 17:41:33 +0100261 const uint32_t W = input.shape().x();
262 const uint32_t H = input.shape().y();
263 const uint32_t Ci = input.shape().z();
264 const uint32_t Co = weights.shape()[3];
265 const uint32_t N = input.shape().total_size() / (W * H * Ci);
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000266
267 TensorShape output_shape = input.shape();
268 output_shape.set(2, Co);
269 SimpleTensor<T> dst(output_shape, input.data_type(), input.num_channels());
270
Sang-Hoon Park18fbb922021-01-14 14:50:25 +0000271 // dst memory to zero
272 const auto total_element_count = dst.num_channels() * dst.num_elements();
273 std::fill_n(dst.data(), total_element_count, 0);
Manuel Bottinif25e2952020-04-23 12:40:08 +0100274
Michalis Spyroufae513c2019-10-16 17:41:33 +0100275 for(uint32_t b = 0; b < N; ++b)
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000276 {
Michalis Spyroufae513c2019-10-16 17:41:33 +0100277 for(uint32_t co = 0; co < Co; ++co)
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000278 {
Michalis Spyroufae513c2019-10-16 17:41:33 +0100279 for(uint32_t ci = 0; ci < Ci; ++ci)
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000280 {
Michalis Spyroufae513c2019-10-16 17:41:33 +0100281 for(uint32_t h = 0; h < H; ++h)
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000282 {
Michalis Spyroufae513c2019-10-16 17:41:33 +0100283 for(uint32_t w = 0; w < W; ++w)
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000284 {
Michalis Spyroufae513c2019-10-16 17:41:33 +0100285 const uint32_t i_index = w + h * W + ci * H * W + b * H * W * Ci;
286 const uint32_t w_index = w + h * W + ci * H * W + co * H * W * Ci;
287 const uint32_t o_index = w + h * W + co * H * W + b * H * W * Co;
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000288 const Coordinates i_coords = index2coords(input.shape(), i_index);
289 const Coordinates w_coords = index2coords(weights.shape(), w_index);
290 const Coordinates o_coords = index2coords(dst.shape(), o_index);
291
292 auto i_ptr = static_cast<const T *>(input(i_coords));
293 auto w_ptr = static_cast<const T *>(weights(w_coords));
294 auto o_ptr = static_cast<T *>(dst(o_coords));
295
296 const T Rin = i_ptr[0];
297 const T Iin = i_ptr[1];
298 const T Rw = w_ptr[0];
299 const T Iw = w_ptr[1];
300
301 o_ptr[0] += Rin * Rw - Iin * Iw;
302 o_ptr[1] += Rin * Iw + Rw * Iin;
303 }
304 }
305 }
306 }
307 }
308 return dst;
309}
310} // namespace
311
312template <typename T>
313SimpleTensor<T> rdft_1d(const SimpleTensor<T> &src)
314{
315 return rdft_1d_core(src, FFTDirection::Forward, false);
316}
317
318template <typename T>
319SimpleTensor<T> ridft_1d(const SimpleTensor<T> &src, bool is_odd)
320{
321 auto dst = rdft_1d_core(src, FFTDirection::Inverse, is_odd);
322
Giorgio Arenaea7de7b2020-12-10 16:49:39 +0000323 const T scaling_factor = T(dst.shape()[0]);
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000324 scale(dst, scaling_factor);
325
326 return dst;
327}
328
329template <typename T>
330SimpleTensor<T> dft_1d(const SimpleTensor<T> &src, FFTDirection direction)
331{
332 auto dst = dft_1d_core(src, direction);
333 if(direction == FFTDirection::Inverse)
334 {
Giorgio Arenaea7de7b2020-12-10 16:49:39 +0000335 const T scaling_factor = T(dst.shape()[0]);
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000336 scale(dst, scaling_factor);
337 }
338 return dst;
339}
340
341template <typename T>
342SimpleTensor<T> rdft_2d(const SimpleTensor<T> &src)
343{
344 ARM_COMPUTE_ERROR_ON(src.num_channels() != 1);
345 constexpr FFTDirection direction = FFTDirection::Forward;
346
347 auto first_pass = rdft_1d_core(src, direction, false);
348 auto transposed = permute(first_pass, PermutationVector(1U, 0U));
349 auto second_pass = dft_1d_core(transposed, direction);
350 return permute(second_pass, PermutationVector(1U, 0U));
351}
352
353template <typename T>
354SimpleTensor<T> ridft_2d(const SimpleTensor<T> &src, bool is_odd)
355{
356 ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
357 constexpr FFTDirection direction = FFTDirection::Inverse;
358
359 auto transposed = permute(src, PermutationVector(1U, 0U));
360 auto first_pass = dft_1d_core(transposed, direction);
361 auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
362 auto dst = rdft_1d_core(transposed_2, direction, is_odd);
363
Giorgio Arenaea7de7b2020-12-10 16:49:39 +0000364 const T scaling_factor = T(dst.shape()[0] * dst.shape()[1]);
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000365 scale(dst, scaling_factor);
366 return dst;
367}
368
369template <typename T>
370SimpleTensor<T> dft_2d(const SimpleTensor<T> &src, FFTDirection direction)
371{
372 ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
373
374 if(direction == FFTDirection::Forward)
375 {
376 auto first_pass = dft_1d_core(src, direction);
377 auto transposed = permute(first_pass, PermutationVector(1U, 0U));
378 auto second_pass = dft_1d_core(transposed, direction);
379 return permute(second_pass, PermutationVector(1U, 0U));
380 }
381 else
382 {
383 auto transposed = permute(src, PermutationVector(1U, 0U));
384 auto first_pass = dft_1d_core(transposed, direction);
385 auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
386 auto dst = dft_1d_core(transposed_2, direction);
387
Giorgio Arenaea7de7b2020-12-10 16:49:39 +0000388 const T scaling_factor = T(dst.shape()[0] * dst.shape()[1]);
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000389 scale(dst, scaling_factor);
390
391 return dst;
392 }
393}
394
395template <typename T>
396SimpleTensor<T> conv2d_dft(const SimpleTensor<T> &src, const SimpleTensor<T> &w, const PadStrideInfo &conv_info)
397{
398 // Pad input to full padding
399 const PaddingList padding_in = { { 0, w.shape()[0] - 1 }, { 0, w.shape()[1] - 1 } };
400 auto padded_src = pad_layer(src, padding_in);
401
402 // Flip weights
Adnan AlSinanbdcb4c12023-09-18 14:49:45 +0100403 std::vector<uint32_t> axis_v = { 0, 1 };
404 SimpleTensor<int32_t> axis{ TensorShape(2U), DataType::S32 };
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000405 std::copy(axis_v.begin(), axis_v.begin() + axis.shape().x(), axis.data());
Adnan AlSinanbdcb4c12023-09-18 14:49:45 +0100406 auto flipped_w = reverse(w, axis, /* use_inverted_axis */ false);
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000407
408 // Pad weights to have the same size as input
409 const PaddingList paddings_w = { { 0, src.shape()[0] - 1 }, { 0, src.shape()[1] - 1 } };
410 auto padded_w = pad_layer(flipped_w, paddings_w);
411
412 // Transform input and weights to frequency domain
413 auto Fsrc = rdft_2d(padded_src);
414 auto Fw = rdft_2d(padded_w);
415
416 // Perform dot product
417 auto Fdst = complex_mul_and_reduce(Fsrc, Fw);
418
419 // Transform output back to frequency domain
420 auto conv_res = ridft_2d(Fdst);
421
422 // Slice output
423 const int start_left = w.shape().x() - conv_info.pad_left() - 1;
424 const int start_top = w.shape().y() - conv_info.pad_top() - 1;
425 const int end_right = conv_res.shape().x() - (w.shape().x() - conv_info.pad_right() - 1);
426 const int end_botton = conv_res.shape().y() - (w.shape().y() - conv_info.pad_bottom() - 1);
427 return slice(conv_res, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
428}
429
Giorgio Arenaea7de7b2020-12-10 16:49:39 +0000430// FP32
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000431template SimpleTensor<float> rdft_1d(const SimpleTensor<float> &src);
432template SimpleTensor<float> ridft_1d(const SimpleTensor<float> &src, bool is_odd);
433template SimpleTensor<float> dft_1d(const SimpleTensor<float> &src, FFTDirection direction);
434
435template SimpleTensor<float> rdft_2d(const SimpleTensor<float> &src);
436template SimpleTensor<float> ridft_2d(const SimpleTensor<float> &src, bool is_odd);
437template SimpleTensor<float> dft_2d(const SimpleTensor<float> &src, FFTDirection direction);
438
439template SimpleTensor<float> conv2d_dft(const SimpleTensor<float> &src, const SimpleTensor<float> &w, const PadStrideInfo &conv_info);
Giorgio Arenaea7de7b2020-12-10 16:49:39 +0000440
441// FP16
442template SimpleTensor<half> rdft_1d(const SimpleTensor<half> &src);
443template SimpleTensor<half> ridft_1d(const SimpleTensor<half> &src, bool is_odd);
444template SimpleTensor<half> dft_1d(const SimpleTensor<half> &src, FFTDirection direction);
445
446template SimpleTensor<half> rdft_2d(const SimpleTensor<half> &src);
447template SimpleTensor<half> ridft_2d(const SimpleTensor<half> &src, bool is_odd);
448template SimpleTensor<half> dft_2d(const SimpleTensor<half> &src, FFTDirection direction);
449
450template SimpleTensor<half> conv2d_dft(const SimpleTensor<half> &src, const SimpleTensor<half> &w, const PadStrideInfo &conv_info);
Georgios Pinitasdef2a852019-02-21 14:47:56 +0000451} // namespace reference
452} // namespace validation
453} // namespace test
454} // namespace arm_compute