blob: 0430d59d33b7291ca5bff15d2ea1d2732be82b3a [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_TEST_TENSOR_OPERATIONS_H__
25#define __ARM_COMPUTE_TEST_TENSOR_OPERATIONS_H__
26
27#include "FixedPoint.h"
28#include "Tensor.h"
29#include "Types.h"
30#include "Utils.h"
31
32#include "FixedPoint.h"
33#include "Types.h"
34#include "arm_compute/core/FixedPoint.h"
35#include "arm_compute/core/Types.h"
36#include "tests/validation/FixedPoint.h"
Giorgio Arena50f9fd72017-06-19 17:05:30 +010037#include "tests/validation/ValidationUserConfiguration.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010038
39#include <algorithm>
40#include <array>
41#include <cmath>
Giorgio Arena50f9fd72017-06-19 17:05:30 +010042#include <random>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010043
44namespace arm_compute
45{
46namespace test
47{
48namespace validation
49{
50namespace tensor_operations
51{
52namespace
53{
Pablo Tello383deec2017-06-23 10:40:05 +010054template <class T>
55struct is_floating_point
56 : std::integral_constant < bool,
57 std::is_same<float, typename std::remove_cv<T>::type>::value ||
58#if ARM_COMPUTE_ENABLE_FP16
59 std::is_same<float16_t, typename std::remove_cv<T>::type>::value ||
60#endif
61 std::is_same<double, typename std::remove_cv<T>::type>::value || std::is_same<long double, typename std::remove_cv<T>::type>::value >
62{
63};
64
Anthony Barbier6ff3b192017-09-04 18:44:23 +010065bool is_valid_pixel(int i, int min, int max)
66{
67 return (i >= min && i < max);
68}
69
70// 3D convolution for floating point type
Pablo Tello383deec2017-06-23 10:40:05 +010071template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010072void convolution3d(const T *in, const T *weights, const T *bias, T *out, int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int8_t fixed_point_position)
73{
74 const int half_width_weights = width_weights / 2;
75 const int half_height_weights = height_weights / 2;
76
77 // Reset accumulator
78 T acc = static_cast<T>(0);
79
80 // Compute a 2D convolution for each IFM and accumulate the result
81 for(int ifm = 0; ifm < depth_in; ++ifm)
82 {
83 // Compute the offset for the input slice
84 const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
85
86 // Compute 2D convolution
87 for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
88 {
89 for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
90 {
91 // Check if the pixel is out-of-bound
92 if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
93 {
94 const int idx = xk + half_width_weights;
95 const int idy = yk + half_height_weights;
96
97 const T i_value = in[offset_slice_in + xk + yk * width_in];
98 const T w_value = weights[idx + idy * width_weights + ifm * width_weights * height_weights];
99
100 acc += i_value * w_value;
101 }
102 }
103 }
104 }
105
106 // Accumulate the bias and store the result
107 *out = acc + (*bias);
108}
109
110// 3D convolution for fixed point type
111template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
112void convolution3d(const T *in, const T *weights, const T *bias, T *out, int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights,
113 int8_t fixed_point_position)
114{
115 const int half_width_weights = width_weights / 2;
116 const int half_height_weights = height_weights / 2;
117
118 using namespace fixed_point_arithmetic;
119 using promoted_type = typename fixed_point_arithmetic::traits::promote<T>::type;
120
121 // Reset accumulator
122 fixed_point<promoted_type> acc(0, fixed_point_position);
123
124 // Compute a 2D convolution for each IFM and accumulate the result
125 for(int ifm = 0; ifm < depth_in; ++ifm)
126 {
127 // Compute the offset for the input slice
128 const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
129
130 // Compute 2D convolution
131 for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
132 {
133 for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
134 {
135 // Check if the pixel is out-of-bound
136 if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
137 {
138 const int idx = xk + half_width_weights;
139 const int idy = yk + half_height_weights;
140
141 const fixed_point<promoted_type> i_value(in[offset_slice_in + xk + yk * width_in], fixed_point_position, true);
142 const fixed_point<promoted_type> w_value(weights[idx + idy * width_weights + ifm * width_weights * height_weights], fixed_point_position, true);
143 const fixed_point<promoted_type> iw = i_value * w_value;
144 acc = iw + acc;
145 }
146 }
147 }
148 }
149
150 // Get the bias
151 const fixed_point<promoted_type> b(*bias, fixed_point_position, true);
152
153 // Accumulate the bias and covert back
154 acc = acc + b;
155 fixed_point<T> res(acc);
156 *out = res.raw();
157}
158
159template <typename T>
160void vector_matrix_multiply(const T *in, const T *weights, const T *bias, T *out, int cols_weights, int rows_weights, uint8_t fixed_point_position)
161{
162 for(int x = 0; x < cols_weights; ++x)
163 {
164 T acc = 0.0f;
165 for(int y = 0; y < rows_weights; ++y)
166 {
167 acc += in[y] * weights[x + y * cols_weights];
168 }
169 out[x] = acc + bias[x];
170 }
171}
172
173template <>
174void vector_matrix_multiply(const int8_t *in, const int8_t *weights, const int8_t *bias, int8_t *out, int cols_weights, int rows_weights, uint8_t fixed_point_position)
175{
176 using namespace fixed_point_arithmetic;
177 using promoted_type = typename fixed_point_arithmetic::traits::promote<int8_t>::type;
178
179 for(int x = 0; x < cols_weights; ++x)
180 {
181 // Reset accumulator
182 fixed_point<promoted_type> acc(0, fixed_point_position);
183
184 for(int y = 0; y < rows_weights; ++y)
185 {
186 const fixed_point<promoted_type> i_value(in[y], fixed_point_position, true);
187 const fixed_point<promoted_type> w_value(weights[x + y * cols_weights], fixed_point_position, true);
188 const fixed_point<promoted_type> iw = i_value * w_value;
189 acc = iw + acc;
190 }
191
192 // Get the bias
193 const fixed_point<int8_t> b(bias[x], fixed_point_position, true);
194
195 // Convert back and accumulate the bias
196 fixed_point<int8_t> res(acc);
197 res = res + b;
198
199 // Store the result
200 out[x] = res.raw();
201 }
202}
203
SiCong Libacaf9a2017-06-19 13:41:45 +0100204// Return a tensor element at a specified coordinate with different border modes
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100205template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
206T tensor_elem_at(const Tensor<T> &in, Coordinates &coord, BorderMode border_mode, T constant_border_value)
207{
208 const int x = coord.x();
209 const int y = coord.y();
210 const int width = static_cast<int>(in.shape().x());
211 const int height = static_cast<int>(in.shape().y());
212
SiCong Libacaf9a2017-06-19 13:41:45 +0100213 // If coordinates beyond range of tensor's width or height
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100214 if(x < 0 || y < 0 || x >= width || y >= height)
215 {
SiCong Libacaf9a2017-06-19 13:41:45 +0100216 if(border_mode == BorderMode::REPLICATE)
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100217 {
218 coord.set(0, std::max(0, std::min(x, width - 1)));
219 coord.set(1, std::max(0, std::min(y, height - 1)));
220 return in[coord2index(in.shape(), coord)];
221 }
222 else
223 {
SiCong Libacaf9a2017-06-19 13:41:45 +0100224 return constant_border_value;
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100225 }
226 }
227 else
228 {
229 return in[coord2index(in.shape(), coord)];
230 }
231}
232
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100233/** Apply 2D spatial filter on a single element of @p in at coordinates @p coord
234 *
235 * - filter sizes have to be odd number
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100236 * - Row major order of filter assumed
237 * - TO_ZERO rounding policy assumed
238 * - SATURATE convert policy assumed
239 *
240 */
241template <typename T1, typename T2, typename T3>
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100242void apply_2d_spatial_filter(Coordinates coord, const Tensor<T1> &in, Tensor<T3> &out, const TensorShape &filter_shape, const T2 *filter_itr, float scale, BorderMode border_mode,
243 T1 constant_border_value = 0)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100244{
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100245 double val = 0;
246 const int x = coord.x();
247 const int y = coord.y();
248 for(int j = y - static_cast<int>(filter_shape[1] / 2); j <= y + static_cast<int>(filter_shape[1] / 2); ++j)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100249 {
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100250 for(int i = x - static_cast<int>(filter_shape[0] / 2); i <= x + static_cast<int>(filter_shape[0] / 2); ++i)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100251 {
252 coord.set(0, i);
253 coord.set(1, j);
SiCong Libacaf9a2017-06-19 13:41:45 +0100254 val += static_cast<double>(*filter_itr) * tensor_elem_at(in, coord, border_mode, constant_border_value);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100255 ++filter_itr;
256 }
257 }
258 coord.set(0, x);
259 coord.set(1, y);
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100260 const double rounded_val = cpp11::trunc(val * static_cast<double>(scale));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100261 out[coord2index(in.shape(), coord)] = saturate_cast<T3>(rounded_val);
262}
263} // namespace
264
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100265// Sobel 3x3
266template <typename T1, typename T2>
267void sobel_3x3(Tensor<T1> &in, Tensor<T2> &out_x, Tensor<T2> &out_y, BorderMode border_mode, uint8_t constant_border_value)
268{
269 const std::array<int8_t, 9> sobel_x{ { -1, 0, 1, -2, 0, 2, -1, 0, 1 } };
270 const std::array<int8_t, 9> sobel_y{ { -1, -2, -1, 0, 0, 0, 1, 2, 1 } };
271
272 for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
273 {
274 const Coordinates id = index2coord(in.shape(), element_idx);
275
276 apply_2d_spatial_filter(id, in, out_x, TensorShape(3U, 3U), sobel_x.data(), 1.f, border_mode, constant_border_value);
277 apply_2d_spatial_filter(id, in, out_y, TensorShape(3U, 3U), sobel_y.data(), 1.f, border_mode, constant_border_value);
278 }
279}
280
281// Sobel 5x5
282template <typename T1, typename T2>
283void sobel_5x5(Tensor<T1> &in, Tensor<T2> &out_x, Tensor<T2> &out_y, BorderMode border_mode, uint8_t constant_border_value)
284{
285 const std::array<int8_t, 25> sobel_x{ {
286 -1, -2, 0, 2, 1,
287 -4, -8, 0, 8, 4,
288 -6, -12, 0, 12, 6,
289 -4, -8, 0, 8, 4,
290 -1, -2, 0, 2, 1
291 } };
292
293 const std::array<int8_t, 25> sobel_y{ {
294 -1, -4, -6, -4, -1,
295 -2, -8, -12, -8, -2,
296 0, 0, 0, 0, 0,
297 2, 8, 12, 8, 2,
298 1, 4, 6, 4, 1
299 } };
300
301 for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
302 {
303 const Coordinates id = index2coord(in.shape(), element_idx);
304
305 apply_2d_spatial_filter(id, in, out_x, TensorShape(5U, 5U), sobel_x.data(), 1.f, border_mode, constant_border_value);
306 apply_2d_spatial_filter(id, in, out_y, TensorShape(5U, 5U), sobel_y.data(), 1.f, border_mode, constant_border_value);
307 }
308}
309
Giorgio Arenaf7959862017-06-13 15:19:51 +0100310// Mean Standard Deviation
311template <typename T1>
312void mean_and_standard_deviation(const Tensor<T1> &in, float &mean, float &std_dev)
313{
314 int num_elements = in.num_elements();
315
316 // Calculate mean
317 mean = 0.f;
318 for(int i = 0; i < num_elements; ++i)
319 {
320 mean += in[i];
321 }
322 mean /= num_elements;
323
324 // Calculate standard deviation
325 std_dev = 0.f;
326 for(int i = 0; i < num_elements; ++i)
327 {
328 std_dev += (mean - in[i]) * (mean - in[i]);
329 }
330 std_dev = sqrt(std_dev / num_elements);
331}
332
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100333// Integral Image
334void integral_image(const Tensor<uint8_t> &in, Tensor<uint32_t> &out)
335{
336 // Length of dimensions
337 const size_t width = in.shape().x();
338 const size_t height = in.shape().y();
339 const size_t depth = in.shape().z() * in.shape()[3] * in.shape()[4] * in.shape()[5];
340
341 const size_t image_size = width * height;
342
343 for(size_t z = 0; z < depth; ++z)
344 {
345 size_t current_image = z * image_size;
346
347 //First element of each image
348 out[current_image] = in[current_image];
349
350 // First row of each image (add only pixel on the left)
351 for(size_t x = 1; x < width; ++x)
352 {
353 out[current_image + x] = static_cast<uint32_t>(in[current_image + x]) + out[current_image + x - 1];
354 }
355
356 // Subsequent rows
357 for(size_t y = 1; y < height; ++y)
358 {
359 size_t current_row = current_image + (width * y);
360
361 // First element of each row (add only pixel up)
362 out[current_row] = static_cast<uint32_t>(in[current_row]) + out[current_row - width];
363
364 // Following row elements
365 for(size_t x = 1; x < width; ++x)
366 {
367 size_t current_pixel = current_row + x;
368
369 // out = in + up(out) + left(out) - up_left(out)
370 out[current_pixel] = static_cast<uint32_t>(in[current_pixel]) + out[current_pixel - 1]
371 + out[current_pixel - width] - out[current_pixel - width - 1];
372 }
373 }
374 }
375}
376
377// Absolute difference
378template <typename T1, typename T2, typename T3>
379void absolute_difference(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out)
380{
381 using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
382
383 for(int i = 0; i < in1.num_elements(); ++i)
384 {
385 intermediate_type val = std::abs(static_cast<intermediate_type>(in1[i]) - static_cast<intermediate_type>(in2[i]));
386 out[i] = saturate_cast<T3>(val);
387 }
388}
389
390// Accumulate
391template <typename T1, typename T2>
392void accumulate(const Tensor<T1> &in, Tensor<T2> &out)
393{
394 using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
395
396 for(int i = 0; i < in.num_elements(); ++i)
397 {
398 intermediate_type val = static_cast<intermediate_type>(out[i]) + static_cast<intermediate_type>(in[i]);
399 out[i] = saturate_cast<T2>(val);
400 }
401}
402
403// Accumulate squared
404template <typename T1, typename T2>
405void accumulate_squared(const Tensor<T1> &in, Tensor<T2> &out, uint32_t shift)
406{
407 if(shift > 15)
408 {
409 ARM_COMPUTE_ERROR("Shift in accumulate_squared must be within the range [0, 15]");
410 }
411 using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
412 intermediate_type denom = 1 << shift;
413
414 for(int i = 0; i < in.num_elements(); ++i)
415 {
416 intermediate_type val = static_cast<intermediate_type>(out[i]) + (static_cast<intermediate_type>(in[i]) * static_cast<intermediate_type>(in[i]) / denom);
417 out[i] = saturate_cast<T2>(val);
418 }
419}
420
421// Accumulate weighted
422template <typename T>
423void accumulate_weighted(const Tensor<T> &in, Tensor<T> &out, float alpha)
424{
425 if(alpha < 0.f || alpha > 1.f)
426 {
427 ARM_COMPUTE_ERROR("Weight (alpha) specified in accumulate_weighted must be within the range [0, 1]");
428 }
429 using intermediate_type = typename common_promoted_signed_type<T>::intermediate_type;
430
431 for(int i = 0; i < in.num_elements(); ++i)
432 {
433 double val = (1. - static_cast<double>(alpha)) * static_cast<intermediate_type>(out[i]) + static_cast<double>(alpha) * static_cast<intermediate_type>(in[i]);
434 out[i] = static_cast<T>(val);
435 }
436}
437
438// Arithmetic addition
439template <typename T1, typename T2, typename T3>
440void arithmetic_addition(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out, ConvertPolicy convert_policy)
441{
442 using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
443
444 for(int i = 0; i < in1.num_elements(); ++i)
445 {
446 intermediate_type val = static_cast<intermediate_type>(in1[i]) + static_cast<intermediate_type>(in2[i]);
447 out[i] = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T3>(val) : static_cast<T3>(val);
448 }
449}
450
451// Arithmetic Subtraction
452template <typename T1, typename T2, typename T3>
453void arithmetic_subtraction(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out, ConvertPolicy convert_policy)
454{
455 using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
456
457 for(int i = 0; i < in1.num_elements(); ++i)
458 {
459 intermediate_type val = static_cast<intermediate_type>(in1[i]) - static_cast<intermediate_type>(in2[i]);
460 out[i] = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T3>(val) : static_cast<T3>(val);
461 }
462}
463
464// Bitwise and
465template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
466void bitwise_and(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out)
467{
468 for(int i = 0; i < in1.num_elements(); ++i)
469 {
470 out[i] = in1[i] & in2[i];
471 }
472}
473
474// Bitwise or
475template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
476void bitwise_or(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out)
477{
478 for(int i = 0; i < in1.num_elements(); ++i)
479 {
480 out[i] = in1[i] | in2[i];
481 }
482}
483
484// Bitwise xor
485template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
486void bitwise_xor(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out)
487{
488 for(int i = 0; i < in1.num_elements(); ++i)
489 {
490 out[i] = in1[i] ^ in2[i];
491 }
492}
493
494// Bitwise not
495template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
496void bitwise_not(const Tensor<T> &in, Tensor<T> &out)
497{
498 for(int i = 0; i < in.num_elements(); ++i)
499 {
500 out[i] = ~in[i];
501 }
502}
503
SiCong Libacaf9a2017-06-19 13:41:45 +0100504// Box3x3 filter
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100505template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
SiCong Libacaf9a2017-06-19 13:41:45 +0100506void box3x3(const Tensor<T> &in, Tensor<T> &out, BorderMode border_mode, T constant_border_value)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100507{
508 const std::array<T, 9> filter{ { 1, 1, 1, 1, 1, 1, 1, 1, 1 } };
SiCong Libacaf9a2017-06-19 13:41:45 +0100509 float scale = 1.f / static_cast<float>(filter.size());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100510 for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
511 {
512 const Coordinates id = index2coord(in.shape(), element_idx);
SiCong Libacaf9a2017-06-19 13:41:45 +0100513 apply_2d_spatial_filter(id, in, out, TensorShape(3U, 3U), filter.data(), scale, border_mode, constant_border_value);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100514 }
515}
516
517// Depth conversion
518template <typename T1, typename T2>
519void depth_convert(const Tensor<T1> &in, Tensor<T2> &out, ConvertPolicy policy, uint32_t shift)
520{
521 ARM_COMPUTE_ERROR("The conversion is not supported");
522}
523
524template <>
525void depth_convert<int8_t, float>(const Tensor<int8_t> &in, Tensor<float> &out, ConvertPolicy policy, uint32_t shift)
526{
527 const int8_t fixed_point_position = static_cast<int8_t>(in.fixed_point_position());
528 for(int i = 0; i < in.num_elements(); ++i)
529 {
530 out[i] = static_cast<float>(in[i]) * (1.0f / (1 << fixed_point_position));
531 }
532}
533
534template <>
535void depth_convert<float, int8_t>(const Tensor<float> &in, Tensor<int8_t> &out, ConvertPolicy policy, uint32_t shift)
536{
537 const int8_t fixed_point_position = static_cast<int8_t>(in.fixed_point_position());
538 for(int i = 0; i < in.num_elements(); ++i)
539 {
540 float val = in[i] * (1 << fixed_point_position) + 0.5f;
541 out[i] = ((policy == ConvertPolicy::SATURATE) ? saturate_cast<int8_t>(val) : static_cast<int8_t>(val));
542 }
543}
544
545template <>
546void depth_convert<uint8_t, uint16_t>(const Tensor<uint8_t> &in, Tensor<uint16_t> &out, ConvertPolicy policy, uint32_t shift)
547{
548 for(int i = 0; i < in.num_elements(); ++i)
549 {
550 out[i] = static_cast<uint16_t>(in[i]) << shift;
551 }
552}
553
554template <>
555void depth_convert<uint8_t, int16_t>(const Tensor<uint8_t> &in, Tensor<int16_t> &out, ConvertPolicy policy, uint32_t shift)
556{
557 for(int i = 0; i < in.num_elements(); ++i)
558 {
559 out[i] = static_cast<int16_t>(in[i]) << shift;
560 }
561}
562
563template <>
564void depth_convert<uint8_t, int32_t>(const Tensor<uint8_t> &in, Tensor<int32_t> &out, ConvertPolicy policy, uint32_t shift)
565{
566 for(int i = 0; i < in.num_elements(); ++i)
567 {
568 out[i] = static_cast<int32_t>(in[i]) << shift;
569 }
570}
571
572template <>
573void depth_convert<uint16_t, uint8_t>(const Tensor<uint16_t> &in, Tensor<uint8_t> &out, ConvertPolicy policy, uint32_t shift)
574{
575 for(int i = 0; i < in.num_elements(); ++i)
576 {
577 uint16_t val = in[i] >> shift;
578 out[i] = ((policy == ConvertPolicy::SATURATE) ? saturate_cast<uint8_t>(val) : static_cast<uint8_t>(val));
579 }
580}
581
582template <>
583void depth_convert<uint16_t, uint32_t>(const Tensor<uint16_t> &in, Tensor<uint32_t> &out, ConvertPolicy policy, uint32_t shift)
584{
585 for(int i = 0; i < in.num_elements(); ++i)
586 {
587 out[i] = static_cast<uint32_t>(in[i]) << shift;
588 }
589}
590
591template <>
592void depth_convert<int16_t, uint8_t>(const Tensor<int16_t> &in, Tensor<uint8_t> &out, ConvertPolicy policy, uint32_t shift)
593{
594 for(int i = 0; i < in.num_elements(); ++i)
595 {
596 int16_t val = in[i] >> shift;
597 out[i] = ((policy == ConvertPolicy::SATURATE) ? saturate_cast<uint8_t>(val) : static_cast<uint8_t>(val));
598 }
599}
600template <>
601void depth_convert<int16_t, int32_t>(const Tensor<int16_t> &in, Tensor<int32_t> &out, ConvertPolicy policy, uint32_t shift)
602{
603 for(int i = 0; i < in.num_elements(); ++i)
604 {
605 out[i] = static_cast<int32_t>(in[i]) << shift;
606 }
607}
608
SiCong Li5a536642017-06-19 14:47:05 +0100609// Gaussian3x3 filter
610template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
611void gaussian3x3(const Tensor<T> &in, Tensor<T> &out, BorderMode border_mode, T constant_border_value)
612{
613 const std::array<T, 9> filter{ { 1, 2, 1, 2, 4, 2, 1, 2, 1 } };
614 const float scale = 1.f / 16.f;
615 for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
616 {
617 const Coordinates id = index2coord(in.shape(), element_idx);
618 apply_2d_spatial_filter(id, in, out, TensorShape(3U, 3U), filter.data(), scale, border_mode, constant_border_value);
619 }
620}
621
SiCong Li3eb263e2017-06-19 15:31:43 +0100622// Gaussian5x5 filter
623template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
624void gaussian5x5(const Tensor<T> &in, Tensor<T> &out, BorderMode border_mode, T constant_border_value)
625{
626 const std::array<T, 25> filter{ {
627 1, 4, 6, 4, 1,
628 4, 16, 24, 16, 4,
629 6, 24, 36, 24, 6,
630 4, 16, 24, 16, 4,
631 1, 4, 6, 4, 1
632 } };
633 const float scale = 1.f / 256.f;
634 for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
635 {
636 const Coordinates id = index2coord(in.shape(), element_idx);
637 apply_2d_spatial_filter(id, in, out, TensorShape(5U, 5U), filter.data(), scale, border_mode, constant_border_value);
638 }
639}
640
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100641// Matrix multiplication for floating point type
Pablo Tello383deec2017-06-23 10:40:05 +0100642template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100643void gemm(const Tensor<T> &in1, const Tensor<T> &in2, const Tensor<T> &in3, Tensor<T> &out, float alpha, float beta)
644{
645 const int M = out.shape().y();
646 const int N = out.shape().x();
647 const int K = in1.shape().x();
648
649 for(int r = 0; r < M; ++r)
650 {
651 for(int c = 0; c < N; ++c)
652 {
653 T acc = 0.0f;
654
655 for(int k = 0; k < K; ++k)
656 {
657 const T a0 = in1[r * K + k];
658 const T b0 = in2[k * N + c];
659
660 acc += a0 * b0;
661 }
662
663 // Finalize the result: A * B * alpha + C * beta
664 const T c0 = in3[c + r * N];
665 out[c + r * N] = alpha * acc + beta * c0;
666 }
667 }
668}
669
670// Matrix multiplication for fixed point type
671template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
672void gemm(const Tensor<T> &in1, const Tensor<T> &in2, const Tensor<T> &in3, Tensor<T> &out, float alpha, float beta)
673{
674 using namespace fixed_point_arithmetic;
675
676 using promoted_type = typename fixed_point_arithmetic::traits::promote<T>::type;
677
678 const int M = out.shape().y();
679 const int N = out.shape().x();
680 const int K = in1.shape().x();
681 const int8_t fixed_point_position = static_cast<int8_t>(in1.fixed_point_position());
682
683 const fixed_point<T> alpha_q(alpha, fixed_point_position);
684 const fixed_point<T> beta_q(beta, fixed_point_position);
685
686 for(int r = 0; r < M; ++r)
687 {
688 for(int c = 0; c < N; ++c)
689 {
690 fixed_point<promoted_type> acc_q(0, fixed_point_position);
691
692 for(int k = 0; k < K; ++k)
693 {
694 const fixed_point<promoted_type> a0_q(in1[r * K + k], fixed_point_position, true);
695 const fixed_point<promoted_type> b0_q(in2[k * N + c], fixed_point_position, true);
696 const fixed_point<promoted_type> axb_q = a0_q * b0_q;
697
698 acc_q = axb_q + acc_q;
699 }
700
701 // Finalize the result: A * B * alpha + C * beta
702 const fixed_point<T> c0_q(in3[c + r * N], fixed_point_position, true);
703
704 fixed_point<T> res_q(acc_q);
705 res_q = alpha_q * res_q;
706 res_q = (c0_q * beta_q) + res_q;
707
708 // Store the result
709 out[c + r * N] = res_q.raw();
710 }
711 }
712}
713
714// Pixel-wise multiplication
715template <typename T1, typename T2, typename T3>
716void pixel_wise_multiplication(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
717{
718 if(scale < 0)
719 {
720 ARM_COMPUTE_ERROR("Scale of pixel-wise multiplication must be non-negative");
721 }
722 using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
723 for(int i = 0; i < in1.num_elements(); ++i)
724 {
725 double val = static_cast<intermediate_type>(in1[i]) * static_cast<intermediate_type>(in2[i]) * static_cast<double>(scale);
Pablo Tello383deec2017-06-23 10:40:05 +0100726 if(is_floating_point<T3>::value)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100727 {
728 out[i] = val;
729 }
730 else
731 {
732 double rounded_val = 0;
733 switch(rounding_policy)
734 {
735 case(RoundingPolicy::TO_ZERO):
736 rounded_val = cpp11::trunc(val);
737 break;
738 case(RoundingPolicy::TO_NEAREST_UP):
739 rounded_val = cpp11::round_half_up(val);
740 break;
741 case(RoundingPolicy::TO_NEAREST_EVEN):
742 rounded_val = cpp11::round_half_even(val);
743 break;
744 default:
745 ARM_COMPUTE_ERROR("Unsupported rounding policy");
746 }
747 out[i] = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T3>(rounded_val) : static_cast<T3>(rounded_val);
748 }
749 }
750}
751
752// Fixed-point Pixel-wise Multiplication
753template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
754void fixed_point_pixel_wise_multiplication(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out, int scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
755{
756 using namespace fixed_point_arithmetic;
757
758 const int fixed_point_position = in1.fixed_point_position();
759
760 ARM_COMPUTE_ERROR_ON_MSG(in1.data_type() != in2.data_type() || in1.data_type() != out.data_type(),
761 "Tensors must all have the same DataType");
762 ARM_COMPUTE_ERROR_ON_MSG(fixed_point_position != in2.fixed_point_position() || fixed_point_position != out.fixed_point_position(),
763 "Fixed-point position must be the same for both inputs and outputs");
764
765 // Validate fixed_point_position
766 ARM_COMPUTE_ERROR_ON((in1.data_type() == DataType::QS8) && (fixed_point_position == 0 || fixed_point_position > 7));
767 ARM_COMPUTE_ERROR_ON((in1.data_type() == DataType::QS16) && (fixed_point_position == 0 || fixed_point_position > 15));
768
769 fixed_point<T> fp_scale(scale, fixed_point_position);
770 const bool is_sat = convert_policy == ConvertPolicy::SATURATE;
771 const bool do_scaling = scale != 1;
772
773 for(int i = 0; i < in1.num_elements(); ++i)
774 {
775 fixed_point<T> val1(in1[i], fixed_point_position, true);
776 fixed_point<T> val2(in2[i], fixed_point_position, true);
777 fixed_point<T> res = (is_sat) ? val1 * val2 : mul<OverflowPolicy::WRAP>(val1, val2);
778 if(do_scaling)
779 {
780 res = (is_sat) ? res * fp_scale : mul<OverflowPolicy::WRAP>(res, fp_scale);
781 }
782 out[i] = res.raw();
783 }
784}
785
786// Threshold
787template <typename T>
788void threshold(const Tensor<T> &in, Tensor<T> &out, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
789{
790 switch(type)
791 {
792 case ThresholdType::BINARY:
793 for(int i = 0; i < in.num_elements(); ++i)
794 {
795 out[i] = ((in[i] > threshold) ? true_value : false_value);
796 }
797 break;
798 case ThresholdType::RANGE:
799 for(int i = 0; i < in.num_elements(); ++i)
800 {
801 if(in[i] > upper)
802 {
803 out[i] = false_value;
804 }
805 else if(in[i] < threshold)
806 {
807 out[i] = false_value;
808 }
809 else
810 {
811 out[i] = true_value;
812 }
813 }
814 break;
815 default:
816 ARM_COMPUTE_ERROR("Thresholding type not recognised");
817 break;
818 }
819}
820
821// Activation Layer for floating point type
Pablo Tello383deec2017-06-23 10:40:05 +0100822template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100823void activation_layer(const Tensor<T> &in, Tensor<T> &out, ActivationLayerInfo act_info)
824{
825 const T a = static_cast<T>(act_info.a());
826 const T b = static_cast<T>(act_info.b());
827
828 for(int i = 0; i < in.num_elements(); ++i)
829 {
830 T x = in[i];
831 switch(act_info.activation())
832 {
833 case ActivationLayerInfo::ActivationFunction::ABS:
834 out[i] = std::abs(x);
835 break;
836 case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
837 out[i] = std::min<T>(a, std::max<T>(0, x));
838 break;
839 case ActivationLayerInfo::ActivationFunction::LINEAR:
840 out[i] = a * x + b;
841 break;
842 case ActivationLayerInfo::ActivationFunction::LOGISTIC:
843 out[i] = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-x));
844 break;
845 case ActivationLayerInfo::ActivationFunction::RELU:
846 out[i] = std::max<T>(0, x);
847 break;
848 case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
849 out[i] = std::log(static_cast<T>(1) + std::exp(x));
850 break;
851 case ActivationLayerInfo::ActivationFunction::SQRT:
852 out[i] = std::sqrt(x);
853 break;
854 case ActivationLayerInfo::ActivationFunction::SQUARE:
855 out[i] = x * x;
856 break;
857 case ActivationLayerInfo::ActivationFunction::TANH:
858 out[i] = a * std::tanh(b * x);
859 break;
860 default:
861 ARM_COMPUTE_ERROR("Activation function not recognised");
862 break;
863 }
864 }
865}
866
867// Activation Layer for fixed point type
868template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
869void activation_layer(const Tensor<T> &in, Tensor<T> &out, ActivationLayerInfo act_info)
870{
871 using namespace fixed_point_arithmetic;
872 int fixed_point_position = in.fixed_point_position();
873 ActivationLayerInfo::ActivationFunction act_func = act_info.activation();
874 const fixed_point<T> a(act_info.a(), fixed_point_position);
875 const fixed_point<T> b(act_info.b(), fixed_point_position);
876 const fixed_point<T> const_0(0, fixed_point_position);
877 const fixed_point<T> const_1(1, fixed_point_position);
878
879 for(int i = 0; i < in.num_elements(); ++i)
880 {
881 fixed_point<T> x(in[i], fixed_point_position, true);
882 switch(act_func)
883 {
884 case ActivationLayerInfo::ActivationFunction::ABS:
885 out[i] = abs(x).raw();
886 break;
887 case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
888 out[i] = min(a, max(const_0, x)).raw();
889 break;
890 case ActivationLayerInfo::ActivationFunction::LINEAR:
891 out[i] = add(b, mul(a, x)).raw();
892 break;
893 case ActivationLayerInfo::ActivationFunction::LOGISTIC:
894 out[i] = (const_1 / (const_1 + exp(-x))).raw();
895 break;
896 case ActivationLayerInfo::ActivationFunction::RELU:
897 out[i] = max(const_0, x).raw();
898 break;
899 case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
900 out[i] = log(const_1 + exp(x)).raw();
901 break;
902 case ActivationLayerInfo::ActivationFunction::SQRT:
903 out[i] = (const_1 / inv_sqrt(x)).raw();
904 break;
905 case ActivationLayerInfo::ActivationFunction::SQUARE:
906 out[i] = mul(x, x).raw();
907 break;
908 case ActivationLayerInfo::ActivationFunction::TANH:
909 out[i] = tanh(x).raw();
910 break;
911 default:
912 ARM_COMPUTE_ERROR("Activation function not recognised");
913 break;
914 }
915 }
916}
917
918// Batch Normalization Layer for fixed point type
919template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
920void batch_normalization_layer(const Tensor<T> &in, Tensor<T> &out, const Tensor<T> &mean, const Tensor<T> &var, const Tensor<T> &beta, const Tensor<T> &gamma, float epsilon, int fixed_point_position)
921{
922 const int cols = static_cast<int>(in.shape()[0]);
923 const int rows = static_cast<int>(in.shape()[1]);
924 const int depth = static_cast<int>(in.shape()[2]);
925 int upper_dims = in.shape().total_size() / (cols * rows * depth);
926
927 for(int r = 0; r < upper_dims; ++r)
928 {
929 for(int i = 0; i < depth; ++i)
930 {
931 for(int k = 0; k < rows; ++k)
932 {
933 for(int l = 0; l < cols; ++l)
934 {
935 const int pos = l + k * cols + i * rows * cols + r * cols * rows * depth;
936 fixed_point_arithmetic::fixed_point<T> in_qs8(in[pos], fixed_point_position, true);
937 fixed_point_arithmetic::fixed_point<T> var_qs8(var[i], fixed_point_position, true);
938 fixed_point_arithmetic::fixed_point<T> mean_qs8(mean[i], fixed_point_position, true);
939 fixed_point_arithmetic::fixed_point<T> beta_qs8(beta[i], fixed_point_position, true);
940 fixed_point_arithmetic::fixed_point<T> gamma_qs8(gamma[i], fixed_point_position, true);
941 fixed_point_arithmetic::fixed_point<T> epsilon_qs8(epsilon, fixed_point_position);
942
943 auto denominator = fixed_point_arithmetic::inv_sqrt(var_qs8 + epsilon_qs8);
944 auto numerator = in_qs8 - mean_qs8;
945 auto x_bar = numerator * denominator;
946 x_bar = beta_qs8 + x_bar * gamma_qs8;
947 out[pos] = x_bar.raw();
948 }
949 }
950 }
951 }
952}
953
954// Batch Normalization Layer for floating point type
Pablo Tello383deec2017-06-23 10:40:05 +0100955template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100956void batch_normalization_layer(const Tensor<T> &in, Tensor<T> &out, const Tensor<T> &mean, const Tensor<T> &var, const Tensor<T> &beta, const Tensor<T> &gamma, float epsilon, int fixed_point_position)
957{
958 const int cols = static_cast<int>(in.shape()[0]);
959 const int rows = static_cast<int>(in.shape()[1]);
960 const int depth = static_cast<int>(in.shape()[2]);
961 int upper_dims = in.shape().total_size() / (cols * rows * depth);
962
963 for(int r = 0; r < upper_dims; ++r)
964 {
965 for(int i = 0; i < depth; ++i)
966 {
967 for(int k = 0; k < rows; ++k)
968 {
969 for(int l = 0; l < cols; ++l)
970 {
971 const int pos = l + k * cols + i * rows * cols + r * cols * rows * depth;
972 const float denominator = sqrt(var[i] + epsilon);
973 const float numerator = in[pos] - mean[i];
974 const float x_bar = numerator / denominator;
975 out[pos] = beta[i] + x_bar * gamma[i];
976 }
977 }
978 }
979 }
980}
981
982// Convolution layer
983template <typename T>
984void convolution_layer(const Tensor<T> &in, const Tensor<T> &weights, const Tensor<T> &bias, Tensor<T> &out, const PadStrideInfo &conv_info)
985{
986 const int width_in = in.shape().x();
987 const int height_in = in.shape().y();
988 const int depth_in = in.shape().z();
989 const int width_out = out.shape().x();
990 const int height_out = out.shape().y();
991 const int depth_out = out.shape().z();
992 const int width_weights = weights.shape().x();
993 const int height_weights = weights.shape().y();
994 const int depth_weights = weights.shape().z();
995 const int pad_xi = std::min(static_cast<int>(conv_info.pad().first), width_weights / 2);
996 const int pad_yi = std::min(static_cast<int>(conv_info.pad().second), height_weights / 2);
997 const int start_xi = width_weights / 2 - pad_xi;
998 const int start_yi = height_weights / 2 - pad_yi;
999 const int end_xi = width_in - start_xi;
1000 const int end_yi = height_in - start_yi;
1001 const int stride_xi = conv_info.stride().first;
1002 const int stride_yi = conv_info.stride().second;
1003 const int num_batches = in.shape().total_size() / (width_in * height_in * depth_in);
1004
1005 for(int r = 0; r < num_batches; ++r)
1006 {
1007 for(int yi = start_yi; yi < end_yi; yi += stride_yi)
1008 {
1009 for(int xi = start_xi; xi < end_xi; xi += stride_xi)
1010 {
1011 for(int ofm = 0; ofm < depth_out; ++ofm)
1012 {
1013 // Compute input and output offsets
1014 const int offset_in = r * width_in * height_in * depth_in;
1015 const int xo = (xi - start_xi) / stride_xi;
1016 const int yo = (yi - start_yi) / stride_yi;
1017 const int offset_out = xo + yo * width_out + ofm * width_out * height_out + r * width_out * height_out * depth_out;
1018
1019 // Compute 3D convolution
1020 convolution3d(in.data() + offset_in,
1021 weights.data() + ofm * width_weights * height_weights * depth_weights,
1022 bias.data() + ofm,
1023 out.data() + offset_out,
1024 xi, yi,
1025 width_in, height_in, depth_in,
1026 width_weights, height_weights,
1027 static_cast<int8_t>(in.fixed_point_position()));
1028 }
1029 }
1030 }
1031 }
1032}
1033
1034// Fully connected layer
1035template <typename T>
1036void fully_connected_layer(const Tensor<T> &in, const Tensor<T> &weights, const Tensor<T> &bias, Tensor<T> &out)
1037{
1038 ARM_COMPUTE_ERROR_ON(weights.shape().x() != out.shape().x());
1039 ARM_COMPUTE_ERROR_ON(weights.shape().y() != in.shape().x() * in.shape().y() * in.shape().z());
1040 const int cols_weights = weights.shape().x();
1041 const int rows_weights = weights.shape().y();
1042 const int num_batches = in.shape().total_size() / rows_weights;
1043
1044 for(int k = 0; k < num_batches; ++k)
1045 {
1046 vector_matrix_multiply<T>(in.data() + k * rows_weights,
1047 weights.data(),
1048 bias.data(),
1049 out.data() + k * cols_weights,
1050 cols_weights,
1051 rows_weights,
1052 in.fixed_point_position());
1053 }
1054}
1055
1056// Normalization Layer for floating point type
Pablo Tello383deec2017-06-23 10:40:05 +01001057template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001058void normalization_layer(const Tensor<T> &in, Tensor<T> &out, NormalizationLayerInfo norm_info)
1059{
1060 const uint32_t norm_size = norm_info.norm_size();
1061 NormType type = norm_info.type();
1062 float beta = norm_info.beta();
1063 uint32_t kappa = norm_info.kappa();
1064
1065 const int cols = static_cast<int>(in.shape()[0]);
1066 const int rows = static_cast<int>(in.shape()[1]);
1067 const int depth = static_cast<int>(in.shape()[2]);
1068 int upper_dims = in.shape().total_size() / (cols * rows);
1069
1070 float coeff = norm_info.scale_coeff();
1071 int radius_cols = norm_size / 2;
1072 // IN_MAP_1D and CROSS_MAP normalize over a single axis only
1073 int radius_rows = (NormType::IN_MAP_2D == type) ? norm_size / 2 : 0;
1074
1075 if(type == NormType::CROSS_MAP)
1076 {
1077 // Remove also depth from upper dimensions since it is the axes we want
1078 // to use for normalization
1079 upper_dims /= depth;
1080 for(int r = 0; r < upper_dims; ++r)
1081 {
1082 for(int i = 0; i < rows; ++i)
1083 {
1084 for(int k = 0; k < cols; ++k)
1085 {
1086 for(int l = 0; l < depth; ++l)
1087 {
1088 float accumulated_scale = 0.f;
1089 for(int j = -radius_cols; j <= radius_cols; ++j)
1090 {
1091 const int z = l + j;
1092 if(z >= 0 && z < depth)
1093 {
1094 const T value = in[k + i * cols + z * rows * cols + r * cols * rows * depth];
1095 accumulated_scale += value * value;
1096 }
1097 }
1098 out[k + i * cols + l * rows * cols + r * cols * rows * depth] = kappa + accumulated_scale * coeff;
1099 }
1100 }
1101 }
1102 }
1103 }
1104 else
1105 {
1106 for(int r = 0; r < upper_dims; ++r)
1107 {
1108 for(int i = 0; i < rows; ++i)
1109 {
1110 for(int k = 0; k < cols; ++k)
1111 {
1112 float accumulated_scale = 0.f;
1113 for(int j = -radius_rows; j <= radius_rows; ++j)
1114 {
1115 const int y = i + j;
1116 for(int l = -radius_cols; l <= radius_cols; ++l)
1117 {
1118 const int x = k + l;
1119 if((x >= 0 && y >= 0) && (x < cols && y < rows))
1120 {
1121 const T value = in[x + y * cols + r * cols * rows];
1122 accumulated_scale += value * value;
1123 }
1124 }
1125 }
1126 out[k + i * cols + r * cols * rows] = kappa + accumulated_scale * coeff;
1127 }
1128 }
1129 }
1130 }
1131
1132 if(beta == 1.f)
1133 {
1134 for(int i = 0; i < out.num_elements(); ++i)
1135 {
1136 out[i] = in[i] / out[i];
1137 }
1138 }
1139 else if(beta == 0.5f)
1140 {
1141 for(int i = 0; i < out.num_elements(); ++i)
1142 {
1143 out[i] = in[i] / std::sqrt(out[i]);
1144 }
1145 }
1146 else
1147 {
1148 for(int i = 0; i < out.num_elements(); ++i)
1149 {
1150 out[i] = in[i] * std::exp(std::log(out[i]) * -beta);
1151 }
1152 }
1153}
1154// Normalization Layer for fixed-point types
1155template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
1156void normalization_layer(const Tensor<T> &in, Tensor<T> &out, NormalizationLayerInfo norm_info)
1157{
1158 using namespace fixed_point_arithmetic;
1159
1160 const int fixed_point_position = in.fixed_point_position();
1161
1162 const uint32_t norm_size = norm_info.norm_size();
1163 NormType type = norm_info.type();
1164 fixed_point<T> beta(norm_info.beta(), fixed_point_position);
1165 fixed_point<T> kappa(norm_info.kappa(), fixed_point_position);
1166
1167 const int cols = static_cast<int>(in.shape()[0]);
1168 const int rows = static_cast<int>(in.shape()[1]);
1169 const int depth = static_cast<int>(in.shape()[2]);
1170 int upper_dims = in.shape().total_size() / (cols * rows);
1171
1172 fixed_point<T> coeff(norm_info.scale_coeff(), fixed_point_position);
1173 int radius_cols = norm_size / 2;
1174 // IN_MAP_1D and CROSS_MAP normalize over a single axis only
1175 int radius_rows = (NormType::IN_MAP_2D == type) ? norm_size / 2 : 0;
1176
1177 if(type == NormType::CROSS_MAP)
1178 {
1179 // Remove also depth from upper dimensions since it is the axes we want
1180 // to use for normalization
1181 upper_dims /= depth;
1182 for(int r = 0; r < upper_dims; ++r)
1183 {
1184 for(int i = 0; i < rows; ++i)
1185 {
1186 for(int k = 0; k < cols; ++k)
1187 {
1188 for(int l = 0; l < depth; ++l)
1189 {
1190 fixed_point<T> accumulated_scale(0.f, fixed_point_position);
1191 for(int j = -radius_cols; j <= radius_cols; ++j)
1192 {
1193 const int z = l + j;
1194 if(z >= 0 && z < depth)
1195 {
1196 const T value = in[k + i * cols + z * rows * cols + r * cols * rows * depth];
1197 const fixed_point<T> fp_value(value, fixed_point_position, true);
1198 accumulated_scale = add(accumulated_scale, mul(fp_value, fp_value));
1199 }
1200 }
1201 accumulated_scale = add(kappa, mul(accumulated_scale, coeff));
1202 out[k + i * cols + l * rows * cols + r * cols * rows * depth] = accumulated_scale.raw();
1203 }
1204 }
1205 }
1206 }
1207 }
1208 else
1209 {
1210 for(int r = 0; r < upper_dims; ++r)
1211 {
1212 for(int i = 0; i < rows; ++i)
1213 {
1214 for(int k = 0; k < cols; ++k)
1215 {
1216 fixed_point<T> accumulated_scale(0.f, fixed_point_position);
1217 for(int j = -radius_rows; j <= radius_rows; ++j)
1218 {
1219 const int y = i + j;
1220 for(int l = -radius_cols; l <= radius_cols; ++l)
1221 {
1222 const int x = k + l;
1223 if((x >= 0 && y >= 0) && (x < cols && y < rows))
1224 {
1225 const T value = in[x + y * cols + r * cols * rows];
1226 const fixed_point<T> fp_value(value, fixed_point_position, true);
1227 accumulated_scale = add(accumulated_scale, mul(fp_value, fp_value));
1228 }
1229 }
1230 }
1231 accumulated_scale = add(kappa, mul(accumulated_scale, coeff));
1232 out[k + i * cols + r * cols * rows] = accumulated_scale.raw();
1233 }
1234 }
1235 }
1236 }
1237
1238 if(norm_info.beta() == 1.f)
1239 {
1240 for(int i = 0; i < out.num_elements(); ++i)
1241 {
1242 fixed_point<T> res = div(fixed_point<T>(in[i], fixed_point_position, true), fixed_point<T>(out[i], fixed_point_position, true));
1243 out[i] = res.raw();
1244 }
1245 }
1246 else
1247 {
1248 const fixed_point<T> beta(norm_info.beta(), fixed_point_position);
1249 for(int i = 0; i < out.num_elements(); ++i)
1250 {
1251 fixed_point<T> res = pow(fixed_point<T>(out[i], fixed_point_position, true), beta);
1252 res = div(fixed_point<T>(in[i], fixed_point_position, true), res);
1253 out[i] = res.raw();
1254 }
1255 }
1256}
1257
1258// Pooling layer
1259template <typename T>
1260void pooling_layer(const Tensor<T> &in, Tensor<T> &out, PoolingLayerInfo pool_info, int fixed_point_position)
1261{
1262 const int pool_size = pool_info.pool_size();
1263 PoolingType type = pool_info.pool_type();
1264 int pool_stride_x = 0;
1265 int pool_stride_y = 0;
1266 int pad_x = 0;
1267 int pad_y = 0;
1268 std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info().stride();
1269 std::tie(pad_x, pad_y) = pool_info.pad_stride_info().pad();
1270
Georgios Pinitasce093142017-06-19 16:11:53 +01001271 const int w_in = static_cast<int>(in.shape()[0]);
1272 const int h_in = static_cast<int>(in.shape()[1]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001273
Georgios Pinitasce093142017-06-19 16:11:53 +01001274 const int w_out = static_cast<int>(out.shape()[0]);
1275 const int h_out = static_cast<int>(out.shape()[1]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001276
Georgios Pinitasce093142017-06-19 16:11:53 +01001277 int upper_dims = in.shape().total_size() / (w_in * h_in);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001278
Georgios Pinitasce093142017-06-19 16:11:53 +01001279 int pooled_w = 0;
1280 int pooled_h = 0;
1281 if(pool_info.pad_stride_info().round() == DimensionRoundingType::CEIL)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001282 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001283 pooled_w = static_cast<int>(ceil(static_cast<float>(w_in + 2 * pad_x - pool_size) / pool_stride_x)) + 1;
1284 pooled_h = static_cast<int>(ceil(static_cast<float>(h_in + 2 * pad_y - pool_size) / pool_stride_y)) + 1;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001285 }
Georgios Pinitasce093142017-06-19 16:11:53 +01001286 else
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001287 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001288 pooled_w = static_cast<int>(floor(static_cast<float>(w_in + 2 * pad_x - pool_size) / pool_stride_x)) + 1;
1289 pooled_h = static_cast<int>(floor(static_cast<float>(h_in + 2 * pad_y - pool_size) / pool_stride_y)) + 1;
1290 }
1291
1292 if((pooled_w - 1) * pool_stride_x >= w_in + pad_x)
1293 {
1294 --pooled_w;
1295 }
1296 if((pooled_h - 1) * pool_stride_y >= h_in + pad_y)
1297 {
1298 --pooled_h;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001299 }
1300
1301 if(type == PoolingType::MAX)
1302 {
1303 for(int r = 0; r < upper_dims; ++r)
1304 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001305 for(int h = 0; h < pooled_h; ++h)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001306 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001307 for(int w = 0; w < pooled_w; ++w)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001308 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001309 int wstart = w * pool_stride_x - pad_x;
1310 int hstart = h * pool_stride_y - pad_y;
1311 int wend = std::min(wstart + pool_size, w_in);
1312 int hend = std::min(hstart + pool_size, h_in);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001313 wstart = std::max(wstart, 0);
Georgios Pinitasce093142017-06-19 16:11:53 +01001314 hstart = std::max(hstart, 0);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001315
1316 T max_val = std::numeric_limits<T>::lowest();
1317 for(int y = hstart; y < hend; ++y)
1318 {
1319 for(int x = wstart; x < wend; ++x)
1320 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001321 T val = in[r * h_in * w_in + y * w_in + x];
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001322 if(val > max_val)
1323 {
1324 max_val = val;
1325 }
1326 }
1327 }
1328
Georgios Pinitasce093142017-06-19 16:11:53 +01001329 out[r * h_out * w_out + h * pooled_w + w] = max_val;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001330 }
1331 }
1332 }
1333 }
1334 else // Average pooling
1335 {
1336 for(int r = 0; r < upper_dims; ++r)
1337 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001338 for(int h = 0; h < pooled_h; ++h)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001339 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001340 for(int w = 0; w < pooled_w; ++w)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001341 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001342 T avg_val = 0;
1343 int wstart = w * pool_stride_x - pad_x;
1344 int hstart = h * pool_stride_y - pad_y;
1345 int wend = std::min(wstart + pool_size, w_in + pad_x);
1346 int hend = std::min(hstart + pool_size, h_in + pad_y);
1347 int pool = (hend - hstart) * (wend - wstart);
1348 wstart = std::max(wstart, 0);
1349 hstart = std::max(hstart, 0);
1350 wend = std::min(wend, w_in);
1351 hend = std::min(hend, h_in);
Pablo Tello383deec2017-06-23 10:40:05 +01001352 if(is_floating_point<T>::value)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001353 {
1354 for(int y = hstart; y < hend; ++y)
1355 {
1356 for(int x = wstart; x < wend; ++x)
1357 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001358 avg_val += in[r * h_in * w_in + y * w_in + x];
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001359 }
1360 }
Georgios Pinitasce093142017-06-19 16:11:53 +01001361 out[r * h_out * w_out + h * pooled_w + w] = avg_val / pool;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001362 }
1363 else
1364 {
1365 static std::array<qint8_t, 10> scale_values_q8 =
1366 { { 0x0, 0x0, 0x40, 0x2A, 0x20, 0x19, 0x15, 0x12, 0x10, 0xE } };
1367
1368 for(int y = hstart; y < hend; ++y)
1369 {
1370 for(int x = wstart; x < wend; ++x)
1371 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001372 avg_val = sqadd_qs8(avg_val, in[r * h_in * w_in + y * w_in + x]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001373 }
1374 }
Georgios Pinitasce093142017-06-19 16:11:53 +01001375 out[r * h_out * w_out + h * pooled_w + w] = sqmul_qs8(avg_val, (scale_values_q8[pool] >> (7 - fixed_point_position)), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001376 }
1377 }
1378 }
1379 }
1380 }
1381}
1382
1383// Softmax Layer
Pablo Tello383deec2017-06-23 10:40:05 +01001384template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001385void softmax_layer(const Tensor<T> &in, Tensor<T> &out)
1386{
1387 const int cols = static_cast<int>(in.shape()[0]);
1388 const int upper_dims = in.shape().total_size() / cols;
1389 for(int r = 0; r < upper_dims; ++r)
1390 {
1391 // Find max
1392 T max = std::numeric_limits<T>::lowest();
1393 for(int c = 0; c < cols; ++c)
1394 {
1395 const T x = in[r * cols + c];
1396 if(x > max)
1397 {
1398 max = x;
1399 }
1400 }
1401
1402 // Regularize
1403 T sum = 0;
1404 for(int c = 0; c < cols; ++c)
1405 {
1406 const T res = exp(in[r * cols + c] - max);
1407 out[r * cols + c] = res;
1408 sum += res;
1409 }
1410
1411 // Normalize
1412 const T norm_val = 1 / sum;
1413 for(int c = 0; c < cols; ++c)
1414 {
1415 out[r * cols + c] *= norm_val;
1416 }
1417 }
1418}
1419template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
1420void softmax_layer(const Tensor<T> &in, Tensor<T> &out)
1421{
1422 using namespace fixed_point_arithmetic;
1423 using promoted_T = typename test::traits::promote<T>::type;
1424
1425 const int fixed_point_position = in.fixed_point_position();
1426 const int cols = static_cast<int>(in.shape()[0]);
1427 const int upper_dims = in.shape().total_size() / cols;
1428
1429 for(int r = 0; r < upper_dims; ++r)
1430 {
1431 // Find max
1432 fixed_point<T> max(std::numeric_limits<T>::lowest(), fixed_point_position, true);
1433 for(int c = 0; c < cols; ++c)
1434 {
1435 const fixed_point<T> x(in[r * cols + c], fixed_point_position, true);
1436 if(x > max)
1437 {
1438 max = x;
1439 }
1440 }
1441
1442 // Regularize
1443 fixed_point<promoted_T> sum(0, fixed_point_position);
1444 for(int c = 0; c < cols; ++c)
1445 {
1446 const fixed_point<T> x(in[r * cols + c], fixed_point_position, true);
1447 fixed_point<T> res = exp(x - max);
1448 out[r * cols + c] = res.raw();
1449 sum = add(sum, static_cast<fixed_point<promoted_T>>(res));
1450 }
1451
1452 // Normalize
1453 fixed_point<T> sat_sum(sum);
1454 for(int c = 0; c < cols; ++c)
1455 {
1456 const fixed_point<T> x(out[r * cols + c], fixed_point_position, true);
1457 out[r * cols + c] = div(x, sat_sum).raw();
1458 }
1459 }
1460}
1461
1462// Fixed point operations
1463template <typename T>
1464void fixed_point_operation(const Tensor<T> &in, Tensor<T> &out, FixedPointOp op)
1465{
1466 int p = in.fixed_point_position();
1467 switch(op)
1468 {
1469 case FixedPointOp::EXP:
1470 for(int i = 0; i < in.num_elements(); ++i)
1471 {
1472 out[i] = fixed_point_arithmetic::exp(fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
1473 }
1474 break;
1475 case FixedPointOp::LOG:
1476 for(int i = 0; i < in.num_elements(); ++i)
1477 {
1478 out[i] = fixed_point_arithmetic::log(fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
1479 }
1480 break;
1481 case FixedPointOp::INV_SQRT:
1482 for(int i = 0; i < in.num_elements(); ++i)
1483 {
1484 out[i] = fixed_point_arithmetic::inv_sqrt(fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
1485 }
1486 break;
1487 case FixedPointOp::RECIPROCAL:
1488 for(int i = 0; i < in.num_elements(); ++i)
1489 {
1490 out[i] = fixed_point_arithmetic::div(fixed_point_arithmetic::fixed_point<T>(1, p), fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
1491 }
1492 break;
1493 default:
1494 ARM_COMPUTE_ERROR("Fixed point operation not supported");
1495 break;
1496 }
1497}
1498
1499// Tensor print
1500template <typename T>
1501void print(const Tensor<T> &in, std::ostream &out)
1502{
1503 out << "\n";
1504 for(int i = 0; i < in.num_elements(); ++i)
1505 {
1506 out << in[i] << " ";
1507 }
1508 out << "\n";
1509}
1510} // namespace tensor_operations
1511} // namespace validation
1512} // namespace test
1513} // namespace arm_compute
1514
1515#endif /* __ARM_COMPUTE_TEST_TENSOR_OPERATIONS_H__ */