blob: 6ad1b9e150fbd0befc235eb20f2925b8003f867d [file] [log] [blame]
Georgios Pinitasdef2a852019-02-21 14:47:56 +00001/*
2 * Copyright (c) 2019 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "DFT.h"
25
26#include "PadLayer.h"
27#include "Permute.h"
28#include "Reverse.h"
29#include "SliceOperations.h"
30
31#include <cmath>
32
33namespace arm_compute
34{
35namespace test
36{
37namespace validation
38{
39namespace reference
40{
41namespace
42{
43/** Performs an one dimensional DFT on a given real sequence.
44 *
45 * @param[in] src_ptr Pointer to the real input sequence.
46 * @param[in] N Size of input sequence.
47 * @param[out] dst_ptr Pointer to the complex output sequence.
48 * @param[out] K Size of the output sequence
49 */
50template <typename T>
51void rdft_1d_step(const T *src_ptr, size_t N, T *dst_ptr, size_t K)
52{
53 for(unsigned int k = 0; k < K; ++k)
54 {
55 float Xr = 0;
56 float Xi = 0;
57 for(unsigned int n = 0; n < N; ++n)
58 {
59 const float alpha = (2 * M_PI * k * n) / N;
60 const float val_r = src_ptr[n];
61 // Assuming DFT from the R domain thus skipping imaginary calculations
62 Xr += val_r * cos(alpha);
63 Xi -= val_r * sin(alpha);
64 }
65
66 dst_ptr[k * 2] = Xr;
67 dst_ptr[k * 2 + 1] = Xi;
68 }
69}
70
71/** Performs an one dimensional DFT on a given complex sequence.
72 *
73 * @param[in] src_ptr Pointer to the complex input sequence.
74 * @param[out] dst_ptr Pointer to the complex output sequence.
75 * @param[in] N Size of the sequences
76 */
77template <typename T>
78void dft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
79{
80 for(unsigned int k = 0; k < N; ++k)
81 {
82 float Xr = 0;
83 float Xi = 0;
84 for(unsigned int n = 0; n < N; ++n)
85 {
86 const float alpha = (2 * M_PI * k * n) / N;
87 const float val_r = src_ptr[2 * n];
88 const float val_i = src_ptr[2 * n + 1];
89 const float cos_alpha = cos(alpha);
90 const float sin_alpha = sin(alpha);
91
92 Xr += val_r * cos_alpha + val_i * sin_alpha;
93 Xi += val_i * cos_alpha - val_r * sin_alpha;
94 }
95
96 dst_ptr[k * 2] = Xr;
97 dst_ptr[k * 2 + 1] = Xi;
98 }
99}
100
101/** Performs an one dimensional inverse DFT on a given real sequence.
102 *
103 * @param[in] src_ptr Pointer to the real input sequence.
104 * @param[in] K Size of input sequence.
105 * @param[out] dst_ptr Pointer to the complex output sequence.
106 * @param[out] N Size of the output sequence
107 */
108template <typename T>
109void irdft_1d_step(const T *src_ptr, size_t K, T *dst_ptr, size_t N)
110{
111 const bool is_odd = N % 2;
112 const unsigned int Nleft = N - K;
113 const int tail_start = is_odd ? K - 1 : K - 2;
114
115 for(unsigned int n = 0; n < N; ++n)
116 {
117 float xr = 0;
118 for(unsigned int k = 0; k < K; ++k)
119 {
120 const float alpha = (2 * M_PI * k * n) / N;
121 xr += src_ptr[2 * k] * cos(alpha) - src_ptr[2 * k + 1] * sin(alpha);
122 }
123
124 unsigned int j = tail_start;
125 for(unsigned int k = 0; k < Nleft; ++k)
126 {
127 const float alpha = (2 * M_PI * (k + K) * n) / N;
128 xr += src_ptr[2 * j] * cos(alpha) + src_ptr[2 * j + 1] * sin(alpha);
129 --j;
130 }
131
132 dst_ptr[n] = xr;
133 }
134}
135
136/** Performs an one dimensional inverse DFT on a given complex sequence.
137 *
138 * @param[in] src_ptr Pointer to the complex input sequence.
139 * @param[out] dst_ptr Pointer to the complex output sequence.
140 * @param[in] N Size of the sequences
141 */
142template <typename T>
143void idft_1d_step(const T *src_ptr, T *dst_ptr, size_t N)
144{
145 for(unsigned int n = 0; n < N; ++n)
146 {
147 float xr = 0;
148 float xi = 0;
149 for(unsigned int k = 0; k < N; ++k)
150 {
151 const float alpha = (2 * M_PI * k * n) / N;
152 const float cos_alpha = cos(alpha);
153 const float sin_alpha = sin(alpha);
154 const float val_r = src_ptr[2 * k];
155 const float val_i = src_ptr[2 * k + 1];
156
157 xr += val_r * cos_alpha - val_i * sin_alpha;
158 xi += val_i * cos_alpha + val_r * sin_alpha;
159 }
160
161 dst_ptr[2 * n] = xr;
162 dst_ptr[2 * n + 1] = xi;
163 }
164}
165
166template <typename T>
167SimpleTensor<T> rdft_1d_core(const SimpleTensor<T> &src, FFTDirection direction, bool is_odd)
168{
169 // Performs only rdft
170 ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Forward && src.num_channels() != 1);
171 ARM_COMPUTE_ERROR_ON(direction == FFTDirection::Inverse && src.num_channels() != 2);
172
173 const unsigned int inverse_tail = is_odd ? 1 : 0;
174 const unsigned int N = src.shape()[0];
175 const unsigned int K = direction == FFTDirection::Forward ? N / 2 + 1 : (N - 1) * 2 + inverse_tail;
176 const unsigned int num_channels = direction == FFTDirection::Forward ? 2 : 1;
177
178 TensorShape dst_shape = src.shape();
179 dst_shape.set(0, K);
180
181 SimpleTensor<T> dst(dst_shape, src.data_type(), num_channels);
182
183 const unsigned int upper_dims = src.shape().total_size_upper(1);
184 for(unsigned int du = 0; du < upper_dims; ++du)
185 {
186 const T *src_row_ptr = src.data() + du * N * src.num_channels();
187 T *dst_row_ptr = dst.data() + du * K * dst.num_channels();
188 direction == FFTDirection::Forward ? rdft_1d_step(src_row_ptr, N, dst_row_ptr, K) : irdft_1d_step(src_row_ptr, N, dst_row_ptr, K);
189 }
190
191 return dst;
192}
193
194template <typename T>
195SimpleTensor<T> dft_1d_core(const SimpleTensor<T> &src, FFTDirection direction)
196{
197 ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
198
199 const unsigned int N = src.shape()[0];
200
201 SimpleTensor<T> dst(src.shape(), src.data_type(), src.num_channels());
202
203 const unsigned int upper_dims = src.shape().total_size_upper(1);
204 for(unsigned int du = 0; du < upper_dims; ++du)
205 {
206 const T *src_row_ptr = src.data() + du * N * src.num_channels();
207 T *dst_row_ptr = dst.data() + du * N * dst.num_channels();
208 direction == FFTDirection::Forward ? dft_1d_step(src_row_ptr, dst_row_ptr, N) : idft_1d_step(src_row_ptr, dst_row_ptr, N);
209 }
210
211 return dst;
212}
213
214/** Scale a tensor by a given scaling factor.
215 *
216 * @param[in,out] tensor Tensor to scale.
217 * @param[in] scaling_factor Scaling to scale the tensor data with.
218 */
219template <typename T>
220void scale(SimpleTensor<T> &tensor, T scaling_factor)
221{
222 const int total_elements = tensor.num_elements() * tensor.num_channels();
223 T *data_ptr = tensor.data();
224 for(int i = 0; i < total_elements; ++i)
225 {
226 data_ptr[i] /= scaling_factor;
227 }
228}
229
230/** Performs a complex element-wise multiplication with reduction across the channels axis.
231 *
232 * @param[in] input Input tensor.
233 * @param[in] weights Weights tensor.
234 *
235 * @return Output tensor.
236 */
237template <typename T>
238SimpleTensor<T> complex_mul_and_reduce(const SimpleTensor<T> &input, const SimpleTensor<T> &weights)
239{
240 const int W = input.shape().x();
241 const int H = input.shape().y();
242 const int Ci = input.shape().z();
243 const int Co = weights.shape()[3];
244 const int N = input.shape().total_size() / (W * H * Ci);
245
246 TensorShape output_shape = input.shape();
247 output_shape.set(2, Co);
248 SimpleTensor<T> dst(output_shape, input.data_type(), input.num_channels());
249
250 // MemSet dst memory to zero
251 std::memset(dst.data(), 0, dst.size());
252
253 for(int b = 0; b < N; ++b)
254 {
255 for(int co = 0; co < Co; ++co)
256 {
257 for(int ci = 0; ci < Ci; ++ci)
258 {
259 for(int h = 0; h < H; ++h)
260 {
261 for(int w = 0; w < W; ++w)
262 {
263 size_t i_index = w + h * W + ci * H * W + b * H * W * Ci;
264 size_t w_index = w + h * W + ci * H * W + co * H * W * Ci;
265 size_t o_index = w + h * W + co * H * W + b * H * W * Co;
266 const Coordinates i_coords = index2coords(input.shape(), i_index);
267 const Coordinates w_coords = index2coords(weights.shape(), w_index);
268 const Coordinates o_coords = index2coords(dst.shape(), o_index);
269
270 auto i_ptr = static_cast<const T *>(input(i_coords));
271 auto w_ptr = static_cast<const T *>(weights(w_coords));
272 auto o_ptr = static_cast<T *>(dst(o_coords));
273
274 const T Rin = i_ptr[0];
275 const T Iin = i_ptr[1];
276 const T Rw = w_ptr[0];
277 const T Iw = w_ptr[1];
278
279 o_ptr[0] += Rin * Rw - Iin * Iw;
280 o_ptr[1] += Rin * Iw + Rw * Iin;
281 }
282 }
283 }
284 }
285 }
286 return dst;
287}
288} // namespace
289
290template <typename T>
291SimpleTensor<T> rdft_1d(const SimpleTensor<T> &src)
292{
293 return rdft_1d_core(src, FFTDirection::Forward, false);
294}
295
296template <typename T>
297SimpleTensor<T> ridft_1d(const SimpleTensor<T> &src, bool is_odd)
298{
299 auto dst = rdft_1d_core(src, FFTDirection::Inverse, is_odd);
300
301 const T scaling_factor = dst.shape()[0];
302 scale(dst, scaling_factor);
303
304 return dst;
305}
306
307template <typename T>
308SimpleTensor<T> dft_1d(const SimpleTensor<T> &src, FFTDirection direction)
309{
310 auto dst = dft_1d_core(src, direction);
311 if(direction == FFTDirection::Inverse)
312 {
313 const T scaling_factor = dst.shape()[0];
314 scale(dst, scaling_factor);
315 }
316 return dst;
317}
318
319template <typename T>
320SimpleTensor<T> rdft_2d(const SimpleTensor<T> &src)
321{
322 ARM_COMPUTE_ERROR_ON(src.num_channels() != 1);
323 constexpr FFTDirection direction = FFTDirection::Forward;
324
325 auto first_pass = rdft_1d_core(src, direction, false);
326 auto transposed = permute(first_pass, PermutationVector(1U, 0U));
327 auto second_pass = dft_1d_core(transposed, direction);
328 return permute(second_pass, PermutationVector(1U, 0U));
329}
330
331template <typename T>
332SimpleTensor<T> ridft_2d(const SimpleTensor<T> &src, bool is_odd)
333{
334 ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
335 constexpr FFTDirection direction = FFTDirection::Inverse;
336
337 auto transposed = permute(src, PermutationVector(1U, 0U));
338 auto first_pass = dft_1d_core(transposed, direction);
339 auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
340 auto dst = rdft_1d_core(transposed_2, direction, is_odd);
341
342 const T scaling_factor = dst.shape()[0] * dst.shape()[1];
343 scale(dst, scaling_factor);
344 return dst;
345}
346
347template <typename T>
348SimpleTensor<T> dft_2d(const SimpleTensor<T> &src, FFTDirection direction)
349{
350 ARM_COMPUTE_ERROR_ON(src.num_channels() != 2);
351
352 if(direction == FFTDirection::Forward)
353 {
354 auto first_pass = dft_1d_core(src, direction);
355 auto transposed = permute(first_pass, PermutationVector(1U, 0U));
356 auto second_pass = dft_1d_core(transposed, direction);
357 return permute(second_pass, PermutationVector(1U, 0U));
358 }
359 else
360 {
361 auto transposed = permute(src, PermutationVector(1U, 0U));
362 auto first_pass = dft_1d_core(transposed, direction);
363 auto transposed_2 = permute(first_pass, PermutationVector(1U, 0U));
364 auto dst = dft_1d_core(transposed_2, direction);
365
366 const T scaling_factor = dst.shape()[0] * dst.shape()[1];
367 scale(dst, scaling_factor);
368
369 return dst;
370 }
371}
372
373template <typename T>
374SimpleTensor<T> conv2d_dft(const SimpleTensor<T> &src, const SimpleTensor<T> &w, const PadStrideInfo &conv_info)
375{
376 // Pad input to full padding
377 const PaddingList padding_in = { { 0, w.shape()[0] - 1 }, { 0, w.shape()[1] - 1 } };
378 auto padded_src = pad_layer(src, padding_in);
379
380 // Flip weights
381 std::vector<uint32_t> axis_v = { 0, 1 };
382 SimpleTensor<uint32_t> axis{ TensorShape(2U), DataType::U32 };
383 std::copy(axis_v.begin(), axis_v.begin() + axis.shape().x(), axis.data());
384 auto flipped_w = reverse(w, axis);
385
386 // Pad weights to have the same size as input
387 const PaddingList paddings_w = { { 0, src.shape()[0] - 1 }, { 0, src.shape()[1] - 1 } };
388 auto padded_w = pad_layer(flipped_w, paddings_w);
389
390 // Transform input and weights to frequency domain
391 auto Fsrc = rdft_2d(padded_src);
392 auto Fw = rdft_2d(padded_w);
393
394 // Perform dot product
395 auto Fdst = complex_mul_and_reduce(Fsrc, Fw);
396
397 // Transform output back to frequency domain
398 auto conv_res = ridft_2d(Fdst);
399
400 // Slice output
401 const int start_left = w.shape().x() - conv_info.pad_left() - 1;
402 const int start_top = w.shape().y() - conv_info.pad_top() - 1;
403 const int end_right = conv_res.shape().x() - (w.shape().x() - conv_info.pad_right() - 1);
404 const int end_botton = conv_res.shape().y() - (w.shape().y() - conv_info.pad_bottom() - 1);
405 return slice(conv_res, Coordinates(start_left, start_top), Coordinates(end_right, end_botton));
406}
407
408template SimpleTensor<float> rdft_1d(const SimpleTensor<float> &src);
409template SimpleTensor<float> ridft_1d(const SimpleTensor<float> &src, bool is_odd);
410template SimpleTensor<float> dft_1d(const SimpleTensor<float> &src, FFTDirection direction);
411
412template SimpleTensor<float> rdft_2d(const SimpleTensor<float> &src);
413template SimpleTensor<float> ridft_2d(const SimpleTensor<float> &src, bool is_odd);
414template SimpleTensor<float> dft_2d(const SimpleTensor<float> &src, FFTDirection direction);
415
416template SimpleTensor<float> conv2d_dft(const SimpleTensor<float> &src, const SimpleTensor<float> &w, const PadStrideInfo &conv_info);
417} // namespace reference
418} // namespace validation
419} // namespace test
420} // namespace arm_compute