blob: 843c52fec48d4f093ea5c5448b682ba13e6cfe35 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_TEST_TENSOR_OPERATIONS_H__
25#define __ARM_COMPUTE_TEST_TENSOR_OPERATIONS_H__
26
27#include "FixedPoint.h"
28#include "Tensor.h"
29#include "Types.h"
30#include "Utils.h"
31
32#include "FixedPoint.h"
33#include "Types.h"
34#include "arm_compute/core/FixedPoint.h"
35#include "arm_compute/core/Types.h"
36#include "tests/validation/FixedPoint.h"
Giorgio Arena50f9fd72017-06-19 17:05:30 +010037#include "tests/validation/ValidationUserConfiguration.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010038
39#include <algorithm>
40#include <array>
41#include <cmath>
Giorgio Arena50f9fd72017-06-19 17:05:30 +010042#include <random>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010043
44namespace arm_compute
45{
46namespace test
47{
48namespace validation
49{
50namespace tensor_operations
51{
52namespace
53{
Pablo Tello383deec2017-06-23 10:40:05 +010054template <class T>
55struct is_floating_point
56 : std::integral_constant < bool,
57 std::is_same<float, typename std::remove_cv<T>::type>::value ||
58#if ARM_COMPUTE_ENABLE_FP16
59 std::is_same<float16_t, typename std::remove_cv<T>::type>::value ||
60#endif
61 std::is_same<double, typename std::remove_cv<T>::type>::value || std::is_same<long double, typename std::remove_cv<T>::type>::value >
62{
63};
64
Anthony Barbier6ff3b192017-09-04 18:44:23 +010065bool is_valid_pixel(int i, int min, int max)
66{
67 return (i >= min && i < max);
68}
69
70// 3D convolution for floating point type
Pablo Tello383deec2017-06-23 10:40:05 +010071template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010072void convolution3d(const T *in, const T *weights, const T *bias, T *out, int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int8_t fixed_point_position)
73{
74 const int half_width_weights = width_weights / 2;
75 const int half_height_weights = height_weights / 2;
76
77 // Reset accumulator
78 T acc = static_cast<T>(0);
79
80 // Compute a 2D convolution for each IFM and accumulate the result
81 for(int ifm = 0; ifm < depth_in; ++ifm)
82 {
83 // Compute the offset for the input slice
84 const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
85
86 // Compute 2D convolution
87 for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
88 {
89 for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
90 {
91 // Check if the pixel is out-of-bound
92 if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
93 {
94 const int idx = xk + half_width_weights;
95 const int idy = yk + half_height_weights;
96
97 const T i_value = in[offset_slice_in + xk + yk * width_in];
98 const T w_value = weights[idx + idy * width_weights + ifm * width_weights * height_weights];
99
100 acc += i_value * w_value;
101 }
102 }
103 }
104 }
105
106 // Accumulate the bias and store the result
107 *out = acc + (*bias);
108}
109
110// 3D convolution for fixed point type
111template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
112void convolution3d(const T *in, const T *weights, const T *bias, T *out, int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights,
113 int8_t fixed_point_position)
114{
115 const int half_width_weights = width_weights / 2;
116 const int half_height_weights = height_weights / 2;
117
118 using namespace fixed_point_arithmetic;
119 using promoted_type = typename fixed_point_arithmetic::traits::promote<T>::type;
120
121 // Reset accumulator
122 fixed_point<promoted_type> acc(0, fixed_point_position);
123
124 // Compute a 2D convolution for each IFM and accumulate the result
125 for(int ifm = 0; ifm < depth_in; ++ifm)
126 {
127 // Compute the offset for the input slice
128 const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
129
130 // Compute 2D convolution
131 for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
132 {
133 for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
134 {
135 // Check if the pixel is out-of-bound
136 if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
137 {
138 const int idx = xk + half_width_weights;
139 const int idy = yk + half_height_weights;
140
141 const fixed_point<promoted_type> i_value(in[offset_slice_in + xk + yk * width_in], fixed_point_position, true);
142 const fixed_point<promoted_type> w_value(weights[idx + idy * width_weights + ifm * width_weights * height_weights], fixed_point_position, true);
143 const fixed_point<promoted_type> iw = i_value * w_value;
144 acc = iw + acc;
145 }
146 }
147 }
148 }
149
150 // Get the bias
151 const fixed_point<promoted_type> b(*bias, fixed_point_position, true);
152
153 // Accumulate the bias and covert back
154 acc = acc + b;
155 fixed_point<T> res(acc);
156 *out = res.raw();
157}
158
159template <typename T>
160void vector_matrix_multiply(const T *in, const T *weights, const T *bias, T *out, int cols_weights, int rows_weights, uint8_t fixed_point_position)
161{
162 for(int x = 0; x < cols_weights; ++x)
163 {
164 T acc = 0.0f;
165 for(int y = 0; y < rows_weights; ++y)
166 {
167 acc += in[y] * weights[x + y * cols_weights];
168 }
169 out[x] = acc + bias[x];
170 }
171}
172
173template <>
174void vector_matrix_multiply(const int8_t *in, const int8_t *weights, const int8_t *bias, int8_t *out, int cols_weights, int rows_weights, uint8_t fixed_point_position)
175{
176 using namespace fixed_point_arithmetic;
177 using promoted_type = typename fixed_point_arithmetic::traits::promote<int8_t>::type;
178
179 for(int x = 0; x < cols_weights; ++x)
180 {
181 // Reset accumulator
182 fixed_point<promoted_type> acc(0, fixed_point_position);
183
184 for(int y = 0; y < rows_weights; ++y)
185 {
186 const fixed_point<promoted_type> i_value(in[y], fixed_point_position, true);
187 const fixed_point<promoted_type> w_value(weights[x + y * cols_weights], fixed_point_position, true);
188 const fixed_point<promoted_type> iw = i_value * w_value;
189 acc = iw + acc;
190 }
191
192 // Get the bias
193 const fixed_point<int8_t> b(bias[x], fixed_point_position, true);
194
195 // Convert back and accumulate the bias
196 fixed_point<int8_t> res(acc);
197 res = res + b;
198
199 // Store the result
200 out[x] = res.raw();
201 }
202}
203
SiCong Libacaf9a2017-06-19 13:41:45 +0100204// Return a tensor element at a specified coordinate with different border modes
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100205template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
206T tensor_elem_at(const Tensor<T> &in, Coordinates &coord, BorderMode border_mode, T constant_border_value)
207{
208 const int x = coord.x();
209 const int y = coord.y();
210 const int width = static_cast<int>(in.shape().x());
211 const int height = static_cast<int>(in.shape().y());
212
SiCong Libacaf9a2017-06-19 13:41:45 +0100213 // If coordinates beyond range of tensor's width or height
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100214 if(x < 0 || y < 0 || x >= width || y >= height)
215 {
SiCong Libacaf9a2017-06-19 13:41:45 +0100216 if(border_mode == BorderMode::REPLICATE)
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100217 {
218 coord.set(0, std::max(0, std::min(x, width - 1)));
219 coord.set(1, std::max(0, std::min(y, height - 1)));
220 return in[coord2index(in.shape(), coord)];
221 }
222 else
223 {
SiCong Libacaf9a2017-06-19 13:41:45 +0100224 return constant_border_value;
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100225 }
226 }
227 else
228 {
229 return in[coord2index(in.shape(), coord)];
230 }
231}
232
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100233/** Apply 2D spatial filter on a single element of @p in at coordinates @p coord
234 *
235 * - filter sizes have to be odd number
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100236 * - Row major order of filter assumed
237 * - TO_ZERO rounding policy assumed
238 * - SATURATE convert policy assumed
239 *
240 */
241template <typename T1, typename T2, typename T3>
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100242void apply_2d_spatial_filter(Coordinates coord, const Tensor<T1> &in, Tensor<T3> &out, const TensorShape &filter_shape, const T2 *filter_itr, float scale, BorderMode border_mode,
243 T1 constant_border_value = 0)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100244{
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100245 double val = 0;
246 const int x = coord.x();
247 const int y = coord.y();
248 for(int j = y - static_cast<int>(filter_shape[1] / 2); j <= y + static_cast<int>(filter_shape[1] / 2); ++j)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100249 {
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100250 for(int i = x - static_cast<int>(filter_shape[0] / 2); i <= x + static_cast<int>(filter_shape[0] / 2); ++i)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100251 {
252 coord.set(0, i);
253 coord.set(1, j);
SiCong Libacaf9a2017-06-19 13:41:45 +0100254 val += static_cast<double>(*filter_itr) * tensor_elem_at(in, coord, border_mode, constant_border_value);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100255 ++filter_itr;
256 }
257 }
258 coord.set(0, x);
259 coord.set(1, y);
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100260 const double rounded_val = cpp11::trunc(val * static_cast<double>(scale));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100261 out[coord2index(in.shape(), coord)] = saturate_cast<T3>(rounded_val);
262}
263} // namespace
264
Giorgio Arena50f9fd72017-06-19 17:05:30 +0100265// Sobel 3x3
266template <typename T1, typename T2>
267void sobel_3x3(Tensor<T1> &in, Tensor<T2> &out_x, Tensor<T2> &out_y, BorderMode border_mode, uint8_t constant_border_value)
268{
269 const std::array<int8_t, 9> sobel_x{ { -1, 0, 1, -2, 0, 2, -1, 0, 1 } };
270 const std::array<int8_t, 9> sobel_y{ { -1, -2, -1, 0, 0, 0, 1, 2, 1 } };
271
272 for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
273 {
274 const Coordinates id = index2coord(in.shape(), element_idx);
275
276 apply_2d_spatial_filter(id, in, out_x, TensorShape(3U, 3U), sobel_x.data(), 1.f, border_mode, constant_border_value);
277 apply_2d_spatial_filter(id, in, out_y, TensorShape(3U, 3U), sobel_y.data(), 1.f, border_mode, constant_border_value);
278 }
279}
280
281// Sobel 5x5
282template <typename T1, typename T2>
283void sobel_5x5(Tensor<T1> &in, Tensor<T2> &out_x, Tensor<T2> &out_y, BorderMode border_mode, uint8_t constant_border_value)
284{
285 const std::array<int8_t, 25> sobel_x{ {
286 -1, -2, 0, 2, 1,
287 -4, -8, 0, 8, 4,
288 -6, -12, 0, 12, 6,
289 -4, -8, 0, 8, 4,
290 -1, -2, 0, 2, 1
291 } };
292
293 const std::array<int8_t, 25> sobel_y{ {
294 -1, -4, -6, -4, -1,
295 -2, -8, -12, -8, -2,
296 0, 0, 0, 0, 0,
297 2, 8, 12, 8, 2,
298 1, 4, 6, 4, 1
299 } };
300
301 for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
302 {
303 const Coordinates id = index2coord(in.shape(), element_idx);
304
305 apply_2d_spatial_filter(id, in, out_x, TensorShape(5U, 5U), sobel_x.data(), 1.f, border_mode, constant_border_value);
306 apply_2d_spatial_filter(id, in, out_y, TensorShape(5U, 5U), sobel_y.data(), 1.f, border_mode, constant_border_value);
307 }
308}
309
Giorgio Arenaf7959862017-06-13 15:19:51 +0100310// Mean Standard Deviation
311template <typename T1>
312void mean_and_standard_deviation(const Tensor<T1> &in, float &mean, float &std_dev)
313{
314 int num_elements = in.num_elements();
315
316 // Calculate mean
317 mean = 0.f;
318 for(int i = 0; i < num_elements; ++i)
319 {
320 mean += in[i];
321 }
322 mean /= num_elements;
323
324 // Calculate standard deviation
325 std_dev = 0.f;
326 for(int i = 0; i < num_elements; ++i)
327 {
328 std_dev += (mean - in[i]) * (mean - in[i]);
329 }
330 std_dev = sqrt(std_dev / num_elements);
331}
332
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100333// Integral Image
334void integral_image(const Tensor<uint8_t> &in, Tensor<uint32_t> &out)
335{
336 // Length of dimensions
337 const size_t width = in.shape().x();
338 const size_t height = in.shape().y();
339 const size_t depth = in.shape().z() * in.shape()[3] * in.shape()[4] * in.shape()[5];
340
341 const size_t image_size = width * height;
342
343 for(size_t z = 0; z < depth; ++z)
344 {
345 size_t current_image = z * image_size;
346
347 //First element of each image
348 out[current_image] = in[current_image];
349
350 // First row of each image (add only pixel on the left)
351 for(size_t x = 1; x < width; ++x)
352 {
353 out[current_image + x] = static_cast<uint32_t>(in[current_image + x]) + out[current_image + x - 1];
354 }
355
356 // Subsequent rows
357 for(size_t y = 1; y < height; ++y)
358 {
359 size_t current_row = current_image + (width * y);
360
361 // First element of each row (add only pixel up)
362 out[current_row] = static_cast<uint32_t>(in[current_row]) + out[current_row - width];
363
364 // Following row elements
365 for(size_t x = 1; x < width; ++x)
366 {
367 size_t current_pixel = current_row + x;
368
369 // out = in + up(out) + left(out) - up_left(out)
370 out[current_pixel] = static_cast<uint32_t>(in[current_pixel]) + out[current_pixel - 1]
371 + out[current_pixel - width] - out[current_pixel - width - 1];
372 }
373 }
374 }
375}
376
377// Absolute difference
378template <typename T1, typename T2, typename T3>
379void absolute_difference(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out)
380{
381 using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
382
383 for(int i = 0; i < in1.num_elements(); ++i)
384 {
385 intermediate_type val = std::abs(static_cast<intermediate_type>(in1[i]) - static_cast<intermediate_type>(in2[i]));
386 out[i] = saturate_cast<T3>(val);
387 }
388}
389
390// Accumulate
391template <typename T1, typename T2>
392void accumulate(const Tensor<T1> &in, Tensor<T2> &out)
393{
394 using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
395
396 for(int i = 0; i < in.num_elements(); ++i)
397 {
398 intermediate_type val = static_cast<intermediate_type>(out[i]) + static_cast<intermediate_type>(in[i]);
399 out[i] = saturate_cast<T2>(val);
400 }
401}
402
403// Accumulate squared
404template <typename T1, typename T2>
405void accumulate_squared(const Tensor<T1> &in, Tensor<T2> &out, uint32_t shift)
406{
407 if(shift > 15)
408 {
409 ARM_COMPUTE_ERROR("Shift in accumulate_squared must be within the range [0, 15]");
410 }
411 using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
412 intermediate_type denom = 1 << shift;
413
414 for(int i = 0; i < in.num_elements(); ++i)
415 {
416 intermediate_type val = static_cast<intermediate_type>(out[i]) + (static_cast<intermediate_type>(in[i]) * static_cast<intermediate_type>(in[i]) / denom);
417 out[i] = saturate_cast<T2>(val);
418 }
419}
420
421// Accumulate weighted
422template <typename T>
423void accumulate_weighted(const Tensor<T> &in, Tensor<T> &out, float alpha)
424{
425 if(alpha < 0.f || alpha > 1.f)
426 {
427 ARM_COMPUTE_ERROR("Weight (alpha) specified in accumulate_weighted must be within the range [0, 1]");
428 }
429 using intermediate_type = typename common_promoted_signed_type<T>::intermediate_type;
430
431 for(int i = 0; i < in.num_elements(); ++i)
432 {
433 double val = (1. - static_cast<double>(alpha)) * static_cast<intermediate_type>(out[i]) + static_cast<double>(alpha) * static_cast<intermediate_type>(in[i]);
434 out[i] = static_cast<T>(val);
435 }
436}
437
438// Arithmetic addition
439template <typename T1, typename T2, typename T3>
440void arithmetic_addition(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out, ConvertPolicy convert_policy)
441{
442 using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
443
444 for(int i = 0; i < in1.num_elements(); ++i)
445 {
446 intermediate_type val = static_cast<intermediate_type>(in1[i]) + static_cast<intermediate_type>(in2[i]);
447 out[i] = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T3>(val) : static_cast<T3>(val);
448 }
449}
450
451// Arithmetic Subtraction
452template <typename T1, typename T2, typename T3>
453void arithmetic_subtraction(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out, ConvertPolicy convert_policy)
454{
455 using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
456
457 for(int i = 0; i < in1.num_elements(); ++i)
458 {
459 intermediate_type val = static_cast<intermediate_type>(in1[i]) - static_cast<intermediate_type>(in2[i]);
460 out[i] = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T3>(val) : static_cast<T3>(val);
461 }
462}
463
464// Bitwise and
465template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
466void bitwise_and(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out)
467{
468 for(int i = 0; i < in1.num_elements(); ++i)
469 {
470 out[i] = in1[i] & in2[i];
471 }
472}
473
474// Bitwise or
475template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
476void bitwise_or(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out)
477{
478 for(int i = 0; i < in1.num_elements(); ++i)
479 {
480 out[i] = in1[i] | in2[i];
481 }
482}
483
484// Bitwise xor
485template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
486void bitwise_xor(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out)
487{
488 for(int i = 0; i < in1.num_elements(); ++i)
489 {
490 out[i] = in1[i] ^ in2[i];
491 }
492}
493
494// Bitwise not
495template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
496void bitwise_not(const Tensor<T> &in, Tensor<T> &out)
497{
498 for(int i = 0; i < in.num_elements(); ++i)
499 {
500 out[i] = ~in[i];
501 }
502}
503
SiCong Libacaf9a2017-06-19 13:41:45 +0100504// Box3x3 filter
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100505template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
SiCong Libacaf9a2017-06-19 13:41:45 +0100506void box3x3(const Tensor<T> &in, Tensor<T> &out, BorderMode border_mode, T constant_border_value)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100507{
508 const std::array<T, 9> filter{ { 1, 1, 1, 1, 1, 1, 1, 1, 1 } };
SiCong Libacaf9a2017-06-19 13:41:45 +0100509 float scale = 1.f / static_cast<float>(filter.size());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100510 for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
511 {
512 const Coordinates id = index2coord(in.shape(), element_idx);
SiCong Libacaf9a2017-06-19 13:41:45 +0100513 apply_2d_spatial_filter(id, in, out, TensorShape(3U, 3U), filter.data(), scale, border_mode, constant_border_value);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100514 }
515}
516
517// Depth conversion
518template <typename T1, typename T2>
519void depth_convert(const Tensor<T1> &in, Tensor<T2> &out, ConvertPolicy policy, uint32_t shift)
520{
521 ARM_COMPUTE_ERROR("The conversion is not supported");
522}
523
524template <>
525void depth_convert<int8_t, float>(const Tensor<int8_t> &in, Tensor<float> &out, ConvertPolicy policy, uint32_t shift)
526{
527 const int8_t fixed_point_position = static_cast<int8_t>(in.fixed_point_position());
528 for(int i = 0; i < in.num_elements(); ++i)
529 {
530 out[i] = static_cast<float>(in[i]) * (1.0f / (1 << fixed_point_position));
531 }
532}
533
534template <>
535void depth_convert<float, int8_t>(const Tensor<float> &in, Tensor<int8_t> &out, ConvertPolicy policy, uint32_t shift)
536{
537 const int8_t fixed_point_position = static_cast<int8_t>(in.fixed_point_position());
538 for(int i = 0; i < in.num_elements(); ++i)
539 {
540 float val = in[i] * (1 << fixed_point_position) + 0.5f;
541 out[i] = ((policy == ConvertPolicy::SATURATE) ? saturate_cast<int8_t>(val) : static_cast<int8_t>(val));
542 }
543}
544
545template <>
546void depth_convert<uint8_t, uint16_t>(const Tensor<uint8_t> &in, Tensor<uint16_t> &out, ConvertPolicy policy, uint32_t shift)
547{
548 for(int i = 0; i < in.num_elements(); ++i)
549 {
550 out[i] = static_cast<uint16_t>(in[i]) << shift;
551 }
552}
553
554template <>
555void depth_convert<uint8_t, int16_t>(const Tensor<uint8_t> &in, Tensor<int16_t> &out, ConvertPolicy policy, uint32_t shift)
556{
557 for(int i = 0; i < in.num_elements(); ++i)
558 {
559 out[i] = static_cast<int16_t>(in[i]) << shift;
560 }
561}
562
563template <>
564void depth_convert<uint8_t, int32_t>(const Tensor<uint8_t> &in, Tensor<int32_t> &out, ConvertPolicy policy, uint32_t shift)
565{
566 for(int i = 0; i < in.num_elements(); ++i)
567 {
568 out[i] = static_cast<int32_t>(in[i]) << shift;
569 }
570}
571
572template <>
573void depth_convert<uint16_t, uint8_t>(const Tensor<uint16_t> &in, Tensor<uint8_t> &out, ConvertPolicy policy, uint32_t shift)
574{
575 for(int i = 0; i < in.num_elements(); ++i)
576 {
577 uint16_t val = in[i] >> shift;
578 out[i] = ((policy == ConvertPolicy::SATURATE) ? saturate_cast<uint8_t>(val) : static_cast<uint8_t>(val));
579 }
580}
581
582template <>
583void depth_convert<uint16_t, uint32_t>(const Tensor<uint16_t> &in, Tensor<uint32_t> &out, ConvertPolicy policy, uint32_t shift)
584{
585 for(int i = 0; i < in.num_elements(); ++i)
586 {
587 out[i] = static_cast<uint32_t>(in[i]) << shift;
588 }
589}
590
591template <>
592void depth_convert<int16_t, uint8_t>(const Tensor<int16_t> &in, Tensor<uint8_t> &out, ConvertPolicy policy, uint32_t shift)
593{
594 for(int i = 0; i < in.num_elements(); ++i)
595 {
596 int16_t val = in[i] >> shift;
597 out[i] = ((policy == ConvertPolicy::SATURATE) ? saturate_cast<uint8_t>(val) : static_cast<uint8_t>(val));
598 }
599}
600template <>
601void depth_convert<int16_t, int32_t>(const Tensor<int16_t> &in, Tensor<int32_t> &out, ConvertPolicy policy, uint32_t shift)
602{
603 for(int i = 0; i < in.num_elements(); ++i)
604 {
605 out[i] = static_cast<int32_t>(in[i]) << shift;
606 }
607}
608
SiCong Li5a536642017-06-19 14:47:05 +0100609// Gaussian3x3 filter
610template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
611void gaussian3x3(const Tensor<T> &in, Tensor<T> &out, BorderMode border_mode, T constant_border_value)
612{
613 const std::array<T, 9> filter{ { 1, 2, 1, 2, 4, 2, 1, 2, 1 } };
614 const float scale = 1.f / 16.f;
615 for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
616 {
617 const Coordinates id = index2coord(in.shape(), element_idx);
618 apply_2d_spatial_filter(id, in, out, TensorShape(3U, 3U), filter.data(), scale, border_mode, constant_border_value);
619 }
620}
621
SiCong Li3eb263e2017-06-19 15:31:43 +0100622// Gaussian5x5 filter
623template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
624void gaussian5x5(const Tensor<T> &in, Tensor<T> &out, BorderMode border_mode, T constant_border_value)
625{
626 const std::array<T, 25> filter{ {
627 1, 4, 6, 4, 1,
628 4, 16, 24, 16, 4,
629 6, 24, 36, 24, 6,
630 4, 16, 24, 16, 4,
631 1, 4, 6, 4, 1
632 } };
633 const float scale = 1.f / 256.f;
634 for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
635 {
636 const Coordinates id = index2coord(in.shape(), element_idx);
637 apply_2d_spatial_filter(id, in, out, TensorShape(5U, 5U), filter.data(), scale, border_mode, constant_border_value);
638 }
639}
640
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100641// Matrix multiplication for floating point type
Pablo Tello383deec2017-06-23 10:40:05 +0100642template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100643void gemm(const Tensor<T> &in1, const Tensor<T> &in2, const Tensor<T> &in3, Tensor<T> &out, float alpha, float beta)
644{
645 const int M = out.shape().y();
646 const int N = out.shape().x();
647 const int K = in1.shape().x();
648
649 for(int r = 0; r < M; ++r)
650 {
651 for(int c = 0; c < N; ++c)
652 {
653 T acc = 0.0f;
654
655 for(int k = 0; k < K; ++k)
656 {
657 const T a0 = in1[r * K + k];
658 const T b0 = in2[k * N + c];
659
660 acc += a0 * b0;
661 }
662
663 // Finalize the result: A * B * alpha + C * beta
664 const T c0 = in3[c + r * N];
665 out[c + r * N] = alpha * acc + beta * c0;
666 }
667 }
668}
669
670// Matrix multiplication for fixed point type
671template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
672void gemm(const Tensor<T> &in1, const Tensor<T> &in2, const Tensor<T> &in3, Tensor<T> &out, float alpha, float beta)
673{
674 using namespace fixed_point_arithmetic;
675
676 using promoted_type = typename fixed_point_arithmetic::traits::promote<T>::type;
677
678 const int M = out.shape().y();
679 const int N = out.shape().x();
680 const int K = in1.shape().x();
681 const int8_t fixed_point_position = static_cast<int8_t>(in1.fixed_point_position());
682
683 const fixed_point<T> alpha_q(alpha, fixed_point_position);
684 const fixed_point<T> beta_q(beta, fixed_point_position);
685
686 for(int r = 0; r < M; ++r)
687 {
688 for(int c = 0; c < N; ++c)
689 {
690 fixed_point<promoted_type> acc_q(0, fixed_point_position);
691
692 for(int k = 0; k < K; ++k)
693 {
694 const fixed_point<promoted_type> a0_q(in1[r * K + k], fixed_point_position, true);
695 const fixed_point<promoted_type> b0_q(in2[k * N + c], fixed_point_position, true);
696 const fixed_point<promoted_type> axb_q = a0_q * b0_q;
697
698 acc_q = axb_q + acc_q;
699 }
700
701 // Finalize the result: A * B * alpha + C * beta
702 const fixed_point<T> c0_q(in3[c + r * N], fixed_point_position, true);
703
704 fixed_point<T> res_q(acc_q);
705 res_q = alpha_q * res_q;
706 res_q = (c0_q * beta_q) + res_q;
707
708 // Store the result
709 out[c + r * N] = res_q.raw();
710 }
711 }
712}
713
Isabella Gottardi3b77e9d2017-06-22 11:05:41 +0100714// Non linear filter
715template <typename T>
716void non_linear_filter(const Tensor<T> &in, Tensor<T> &out, NonLinearFilterFunction function, unsigned int mask_size,
717 MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, uint8_t constant_border_value)
718{
719 ARM_COMPUTE_ERROR_ON(MatrixPattern::OTHER == pattern && nullptr == mask);
720
721 using intermediate_type = typename common_promoted_signed_type<T>::intermediate_type;
722
723 const int sq_mask_size = mask_size * mask_size;
724 const int half_mask_size = mask_size / 2;
725 std::vector<intermediate_type> vals(sq_mask_size);
726 intermediate_type current_value = 0;
727
728 ValidRegion valid_region = shape_to_valid_region(in.shape());
729 if(border_mode == BorderMode::UNDEFINED)
730 {
731 valid_region = shape_to_valid_region_undefined_border(in.shape(), BorderSize(half_mask_size));
732 }
733
734 for(int element_idx = 0, count = 0, index = 0; element_idx < in.num_elements(); ++element_idx, count = 0, index = 0)
735 {
736 Coordinates id = index2coord(in.shape(), element_idx);
737 if(is_in_valid_region(valid_region, id))
738 {
739 int idx = id.x();
740 int idy = id.y();
741 for(int y = idy - half_mask_size; y <= idy + half_mask_size; ++y)
742 {
743 for(int x = idx - half_mask_size; x <= idx + half_mask_size; ++x, ++index)
744 {
745 id.set(0, x);
746 id.set(1, y);
747 current_value = tensor_elem_at(in, id, border_mode, constant_border_value);
748
749 if(mask[index] == 255)
750 {
751 vals[count] = static_cast<intermediate_type>(current_value);
752 ++count;
753 }
754 }
755 }
756 std::sort(vals.begin(), vals.begin() + count);
757 switch(function)
758 {
759 case NonLinearFilterFunction::MIN:
760 out[element_idx] = saturate_cast<T>(vals[0]);
761 break;
762 case NonLinearFilterFunction::MAX:
763 out[element_idx] = saturate_cast<T>(vals[count - 1]);
764 break;
765 case NonLinearFilterFunction::MEDIAN:
766 out[element_idx] = saturate_cast<T>(vals[count / 2]);
767 break;
768 default:
769 ARM_COMPUTE_ERROR("Unsupported NonLinearFilter function.");
770 }
771 }
772 }
773}
774
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100775// Pixel-wise multiplication
776template <typename T1, typename T2, typename T3>
777void pixel_wise_multiplication(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
778{
779 if(scale < 0)
780 {
781 ARM_COMPUTE_ERROR("Scale of pixel-wise multiplication must be non-negative");
782 }
783 using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
784 for(int i = 0; i < in1.num_elements(); ++i)
785 {
786 double val = static_cast<intermediate_type>(in1[i]) * static_cast<intermediate_type>(in2[i]) * static_cast<double>(scale);
Pablo Tello383deec2017-06-23 10:40:05 +0100787 if(is_floating_point<T3>::value)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100788 {
789 out[i] = val;
790 }
791 else
792 {
793 double rounded_val = 0;
794 switch(rounding_policy)
795 {
796 case(RoundingPolicy::TO_ZERO):
797 rounded_val = cpp11::trunc(val);
798 break;
799 case(RoundingPolicy::TO_NEAREST_UP):
800 rounded_val = cpp11::round_half_up(val);
801 break;
802 case(RoundingPolicy::TO_NEAREST_EVEN):
803 rounded_val = cpp11::round_half_even(val);
804 break;
805 default:
806 ARM_COMPUTE_ERROR("Unsupported rounding policy");
807 }
808 out[i] = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T3>(rounded_val) : static_cast<T3>(rounded_val);
809 }
810 }
811}
812
813// Fixed-point Pixel-wise Multiplication
814template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
815void fixed_point_pixel_wise_multiplication(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out, int scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
816{
817 using namespace fixed_point_arithmetic;
818
819 const int fixed_point_position = in1.fixed_point_position();
820
821 ARM_COMPUTE_ERROR_ON_MSG(in1.data_type() != in2.data_type() || in1.data_type() != out.data_type(),
822 "Tensors must all have the same DataType");
823 ARM_COMPUTE_ERROR_ON_MSG(fixed_point_position != in2.fixed_point_position() || fixed_point_position != out.fixed_point_position(),
824 "Fixed-point position must be the same for both inputs and outputs");
825
826 // Validate fixed_point_position
827 ARM_COMPUTE_ERROR_ON((in1.data_type() == DataType::QS8) && (fixed_point_position == 0 || fixed_point_position > 7));
828 ARM_COMPUTE_ERROR_ON((in1.data_type() == DataType::QS16) && (fixed_point_position == 0 || fixed_point_position > 15));
829
830 fixed_point<T> fp_scale(scale, fixed_point_position);
831 const bool is_sat = convert_policy == ConvertPolicy::SATURATE;
832 const bool do_scaling = scale != 1;
833
834 for(int i = 0; i < in1.num_elements(); ++i)
835 {
836 fixed_point<T> val1(in1[i], fixed_point_position, true);
837 fixed_point<T> val2(in2[i], fixed_point_position, true);
838 fixed_point<T> res = (is_sat) ? val1 * val2 : mul<OverflowPolicy::WRAP>(val1, val2);
839 if(do_scaling)
840 {
841 res = (is_sat) ? res * fp_scale : mul<OverflowPolicy::WRAP>(res, fp_scale);
842 }
843 out[i] = res.raw();
844 }
845}
846
847// Threshold
848template <typename T>
849void threshold(const Tensor<T> &in, Tensor<T> &out, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
850{
851 switch(type)
852 {
853 case ThresholdType::BINARY:
854 for(int i = 0; i < in.num_elements(); ++i)
855 {
856 out[i] = ((in[i] > threshold) ? true_value : false_value);
857 }
858 break;
859 case ThresholdType::RANGE:
860 for(int i = 0; i < in.num_elements(); ++i)
861 {
862 if(in[i] > upper)
863 {
864 out[i] = false_value;
865 }
866 else if(in[i] < threshold)
867 {
868 out[i] = false_value;
869 }
870 else
871 {
872 out[i] = true_value;
873 }
874 }
875 break;
876 default:
877 ARM_COMPUTE_ERROR("Thresholding type not recognised");
878 break;
879 }
880}
881
882// Activation Layer for floating point type
Pablo Tello383deec2017-06-23 10:40:05 +0100883template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100884void activation_layer(const Tensor<T> &in, Tensor<T> &out, ActivationLayerInfo act_info)
885{
886 const T a = static_cast<T>(act_info.a());
887 const T b = static_cast<T>(act_info.b());
888
889 for(int i = 0; i < in.num_elements(); ++i)
890 {
891 T x = in[i];
892 switch(act_info.activation())
893 {
894 case ActivationLayerInfo::ActivationFunction::ABS:
895 out[i] = std::abs(x);
896 break;
897 case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
898 out[i] = std::min<T>(a, std::max<T>(0, x));
899 break;
900 case ActivationLayerInfo::ActivationFunction::LINEAR:
901 out[i] = a * x + b;
902 break;
903 case ActivationLayerInfo::ActivationFunction::LOGISTIC:
904 out[i] = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-x));
905 break;
906 case ActivationLayerInfo::ActivationFunction::RELU:
907 out[i] = std::max<T>(0, x);
908 break;
909 case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
910 out[i] = std::log(static_cast<T>(1) + std::exp(x));
911 break;
912 case ActivationLayerInfo::ActivationFunction::SQRT:
913 out[i] = std::sqrt(x);
914 break;
915 case ActivationLayerInfo::ActivationFunction::SQUARE:
916 out[i] = x * x;
917 break;
918 case ActivationLayerInfo::ActivationFunction::TANH:
919 out[i] = a * std::tanh(b * x);
920 break;
921 default:
922 ARM_COMPUTE_ERROR("Activation function not recognised");
923 break;
924 }
925 }
926}
927
928// Activation Layer for fixed point type
929template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
930void activation_layer(const Tensor<T> &in, Tensor<T> &out, ActivationLayerInfo act_info)
931{
932 using namespace fixed_point_arithmetic;
933 int fixed_point_position = in.fixed_point_position();
934 ActivationLayerInfo::ActivationFunction act_func = act_info.activation();
935 const fixed_point<T> a(act_info.a(), fixed_point_position);
936 const fixed_point<T> b(act_info.b(), fixed_point_position);
937 const fixed_point<T> const_0(0, fixed_point_position);
938 const fixed_point<T> const_1(1, fixed_point_position);
939
940 for(int i = 0; i < in.num_elements(); ++i)
941 {
942 fixed_point<T> x(in[i], fixed_point_position, true);
943 switch(act_func)
944 {
945 case ActivationLayerInfo::ActivationFunction::ABS:
946 out[i] = abs(x).raw();
947 break;
948 case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
949 out[i] = min(a, max(const_0, x)).raw();
950 break;
951 case ActivationLayerInfo::ActivationFunction::LINEAR:
952 out[i] = add(b, mul(a, x)).raw();
953 break;
954 case ActivationLayerInfo::ActivationFunction::LOGISTIC:
955 out[i] = (const_1 / (const_1 + exp(-x))).raw();
956 break;
957 case ActivationLayerInfo::ActivationFunction::RELU:
958 out[i] = max(const_0, x).raw();
959 break;
960 case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
961 out[i] = log(const_1 + exp(x)).raw();
962 break;
963 case ActivationLayerInfo::ActivationFunction::SQRT:
964 out[i] = (const_1 / inv_sqrt(x)).raw();
965 break;
966 case ActivationLayerInfo::ActivationFunction::SQUARE:
967 out[i] = mul(x, x).raw();
968 break;
969 case ActivationLayerInfo::ActivationFunction::TANH:
970 out[i] = tanh(x).raw();
971 break;
972 default:
973 ARM_COMPUTE_ERROR("Activation function not recognised");
974 break;
975 }
976 }
977}
978
979// Batch Normalization Layer for fixed point type
980template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
981void batch_normalization_layer(const Tensor<T> &in, Tensor<T> &out, const Tensor<T> &mean, const Tensor<T> &var, const Tensor<T> &beta, const Tensor<T> &gamma, float epsilon, int fixed_point_position)
982{
983 const int cols = static_cast<int>(in.shape()[0]);
984 const int rows = static_cast<int>(in.shape()[1]);
985 const int depth = static_cast<int>(in.shape()[2]);
986 int upper_dims = in.shape().total_size() / (cols * rows * depth);
987
988 for(int r = 0; r < upper_dims; ++r)
989 {
990 for(int i = 0; i < depth; ++i)
991 {
992 for(int k = 0; k < rows; ++k)
993 {
994 for(int l = 0; l < cols; ++l)
995 {
996 const int pos = l + k * cols + i * rows * cols + r * cols * rows * depth;
997 fixed_point_arithmetic::fixed_point<T> in_qs8(in[pos], fixed_point_position, true);
998 fixed_point_arithmetic::fixed_point<T> var_qs8(var[i], fixed_point_position, true);
999 fixed_point_arithmetic::fixed_point<T> mean_qs8(mean[i], fixed_point_position, true);
1000 fixed_point_arithmetic::fixed_point<T> beta_qs8(beta[i], fixed_point_position, true);
1001 fixed_point_arithmetic::fixed_point<T> gamma_qs8(gamma[i], fixed_point_position, true);
1002 fixed_point_arithmetic::fixed_point<T> epsilon_qs8(epsilon, fixed_point_position);
1003
1004 auto denominator = fixed_point_arithmetic::inv_sqrt(var_qs8 + epsilon_qs8);
1005 auto numerator = in_qs8 - mean_qs8;
1006 auto x_bar = numerator * denominator;
1007 x_bar = beta_qs8 + x_bar * gamma_qs8;
1008 out[pos] = x_bar.raw();
1009 }
1010 }
1011 }
1012 }
1013}
1014
1015// Batch Normalization Layer for floating point type
Pablo Tello383deec2017-06-23 10:40:05 +01001016template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001017void batch_normalization_layer(const Tensor<T> &in, Tensor<T> &out, const Tensor<T> &mean, const Tensor<T> &var, const Tensor<T> &beta, const Tensor<T> &gamma, float epsilon, int fixed_point_position)
1018{
1019 const int cols = static_cast<int>(in.shape()[0]);
1020 const int rows = static_cast<int>(in.shape()[1]);
1021 const int depth = static_cast<int>(in.shape()[2]);
1022 int upper_dims = in.shape().total_size() / (cols * rows * depth);
1023
1024 for(int r = 0; r < upper_dims; ++r)
1025 {
1026 for(int i = 0; i < depth; ++i)
1027 {
1028 for(int k = 0; k < rows; ++k)
1029 {
1030 for(int l = 0; l < cols; ++l)
1031 {
1032 const int pos = l + k * cols + i * rows * cols + r * cols * rows * depth;
1033 const float denominator = sqrt(var[i] + epsilon);
1034 const float numerator = in[pos] - mean[i];
1035 const float x_bar = numerator / denominator;
1036 out[pos] = beta[i] + x_bar * gamma[i];
1037 }
1038 }
1039 }
1040 }
1041}
1042
1043// Convolution layer
1044template <typename T>
1045void convolution_layer(const Tensor<T> &in, const Tensor<T> &weights, const Tensor<T> &bias, Tensor<T> &out, const PadStrideInfo &conv_info)
1046{
1047 const int width_in = in.shape().x();
1048 const int height_in = in.shape().y();
1049 const int depth_in = in.shape().z();
1050 const int width_out = out.shape().x();
1051 const int height_out = out.shape().y();
1052 const int depth_out = out.shape().z();
1053 const int width_weights = weights.shape().x();
1054 const int height_weights = weights.shape().y();
1055 const int depth_weights = weights.shape().z();
1056 const int pad_xi = std::min(static_cast<int>(conv_info.pad().first), width_weights / 2);
1057 const int pad_yi = std::min(static_cast<int>(conv_info.pad().second), height_weights / 2);
1058 const int start_xi = width_weights / 2 - pad_xi;
1059 const int start_yi = height_weights / 2 - pad_yi;
1060 const int end_xi = width_in - start_xi;
1061 const int end_yi = height_in - start_yi;
1062 const int stride_xi = conv_info.stride().first;
1063 const int stride_yi = conv_info.stride().second;
1064 const int num_batches = in.shape().total_size() / (width_in * height_in * depth_in);
1065
1066 for(int r = 0; r < num_batches; ++r)
1067 {
1068 for(int yi = start_yi; yi < end_yi; yi += stride_yi)
1069 {
1070 for(int xi = start_xi; xi < end_xi; xi += stride_xi)
1071 {
1072 for(int ofm = 0; ofm < depth_out; ++ofm)
1073 {
1074 // Compute input and output offsets
1075 const int offset_in = r * width_in * height_in * depth_in;
1076 const int xo = (xi - start_xi) / stride_xi;
1077 const int yo = (yi - start_yi) / stride_yi;
1078 const int offset_out = xo + yo * width_out + ofm * width_out * height_out + r * width_out * height_out * depth_out;
1079
1080 // Compute 3D convolution
1081 convolution3d(in.data() + offset_in,
1082 weights.data() + ofm * width_weights * height_weights * depth_weights,
1083 bias.data() + ofm,
1084 out.data() + offset_out,
1085 xi, yi,
1086 width_in, height_in, depth_in,
1087 width_weights, height_weights,
1088 static_cast<int8_t>(in.fixed_point_position()));
1089 }
1090 }
1091 }
1092 }
1093}
1094
1095// Fully connected layer
1096template <typename T>
1097void fully_connected_layer(const Tensor<T> &in, const Tensor<T> &weights, const Tensor<T> &bias, Tensor<T> &out)
1098{
1099 ARM_COMPUTE_ERROR_ON(weights.shape().x() != out.shape().x());
1100 ARM_COMPUTE_ERROR_ON(weights.shape().y() != in.shape().x() * in.shape().y() * in.shape().z());
1101 const int cols_weights = weights.shape().x();
1102 const int rows_weights = weights.shape().y();
1103 const int num_batches = in.shape().total_size() / rows_weights;
1104
1105 for(int k = 0; k < num_batches; ++k)
1106 {
1107 vector_matrix_multiply<T>(in.data() + k * rows_weights,
1108 weights.data(),
1109 bias.data(),
1110 out.data() + k * cols_weights,
1111 cols_weights,
1112 rows_weights,
1113 in.fixed_point_position());
1114 }
1115}
1116
1117// Normalization Layer for floating point type
Pablo Tello383deec2017-06-23 10:40:05 +01001118template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001119void normalization_layer(const Tensor<T> &in, Tensor<T> &out, NormalizationLayerInfo norm_info)
1120{
1121 const uint32_t norm_size = norm_info.norm_size();
1122 NormType type = norm_info.type();
1123 float beta = norm_info.beta();
1124 uint32_t kappa = norm_info.kappa();
1125
1126 const int cols = static_cast<int>(in.shape()[0]);
1127 const int rows = static_cast<int>(in.shape()[1]);
1128 const int depth = static_cast<int>(in.shape()[2]);
1129 int upper_dims = in.shape().total_size() / (cols * rows);
1130
1131 float coeff = norm_info.scale_coeff();
1132 int radius_cols = norm_size / 2;
1133 // IN_MAP_1D and CROSS_MAP normalize over a single axis only
1134 int radius_rows = (NormType::IN_MAP_2D == type) ? norm_size / 2 : 0;
1135
1136 if(type == NormType::CROSS_MAP)
1137 {
1138 // Remove also depth from upper dimensions since it is the axes we want
1139 // to use for normalization
1140 upper_dims /= depth;
1141 for(int r = 0; r < upper_dims; ++r)
1142 {
1143 for(int i = 0; i < rows; ++i)
1144 {
1145 for(int k = 0; k < cols; ++k)
1146 {
1147 for(int l = 0; l < depth; ++l)
1148 {
1149 float accumulated_scale = 0.f;
1150 for(int j = -radius_cols; j <= radius_cols; ++j)
1151 {
1152 const int z = l + j;
1153 if(z >= 0 && z < depth)
1154 {
1155 const T value = in[k + i * cols + z * rows * cols + r * cols * rows * depth];
1156 accumulated_scale += value * value;
1157 }
1158 }
1159 out[k + i * cols + l * rows * cols + r * cols * rows * depth] = kappa + accumulated_scale * coeff;
1160 }
1161 }
1162 }
1163 }
1164 }
1165 else
1166 {
1167 for(int r = 0; r < upper_dims; ++r)
1168 {
1169 for(int i = 0; i < rows; ++i)
1170 {
1171 for(int k = 0; k < cols; ++k)
1172 {
1173 float accumulated_scale = 0.f;
1174 for(int j = -radius_rows; j <= radius_rows; ++j)
1175 {
1176 const int y = i + j;
1177 for(int l = -radius_cols; l <= radius_cols; ++l)
1178 {
1179 const int x = k + l;
1180 if((x >= 0 && y >= 0) && (x < cols && y < rows))
1181 {
1182 const T value = in[x + y * cols + r * cols * rows];
1183 accumulated_scale += value * value;
1184 }
1185 }
1186 }
1187 out[k + i * cols + r * cols * rows] = kappa + accumulated_scale * coeff;
1188 }
1189 }
1190 }
1191 }
1192
1193 if(beta == 1.f)
1194 {
1195 for(int i = 0; i < out.num_elements(); ++i)
1196 {
1197 out[i] = in[i] / out[i];
1198 }
1199 }
1200 else if(beta == 0.5f)
1201 {
1202 for(int i = 0; i < out.num_elements(); ++i)
1203 {
1204 out[i] = in[i] / std::sqrt(out[i]);
1205 }
1206 }
1207 else
1208 {
1209 for(int i = 0; i < out.num_elements(); ++i)
1210 {
1211 out[i] = in[i] * std::exp(std::log(out[i]) * -beta);
1212 }
1213 }
1214}
1215// Normalization Layer for fixed-point types
1216template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
1217void normalization_layer(const Tensor<T> &in, Tensor<T> &out, NormalizationLayerInfo norm_info)
1218{
1219 using namespace fixed_point_arithmetic;
1220
1221 const int fixed_point_position = in.fixed_point_position();
1222
1223 const uint32_t norm_size = norm_info.norm_size();
1224 NormType type = norm_info.type();
1225 fixed_point<T> beta(norm_info.beta(), fixed_point_position);
1226 fixed_point<T> kappa(norm_info.kappa(), fixed_point_position);
1227
1228 const int cols = static_cast<int>(in.shape()[0]);
1229 const int rows = static_cast<int>(in.shape()[1]);
1230 const int depth = static_cast<int>(in.shape()[2]);
1231 int upper_dims = in.shape().total_size() / (cols * rows);
1232
1233 fixed_point<T> coeff(norm_info.scale_coeff(), fixed_point_position);
1234 int radius_cols = norm_size / 2;
1235 // IN_MAP_1D and CROSS_MAP normalize over a single axis only
1236 int radius_rows = (NormType::IN_MAP_2D == type) ? norm_size / 2 : 0;
1237
1238 if(type == NormType::CROSS_MAP)
1239 {
1240 // Remove also depth from upper dimensions since it is the axes we want
1241 // to use for normalization
1242 upper_dims /= depth;
1243 for(int r = 0; r < upper_dims; ++r)
1244 {
1245 for(int i = 0; i < rows; ++i)
1246 {
1247 for(int k = 0; k < cols; ++k)
1248 {
1249 for(int l = 0; l < depth; ++l)
1250 {
1251 fixed_point<T> accumulated_scale(0.f, fixed_point_position);
1252 for(int j = -radius_cols; j <= radius_cols; ++j)
1253 {
1254 const int z = l + j;
1255 if(z >= 0 && z < depth)
1256 {
1257 const T value = in[k + i * cols + z * rows * cols + r * cols * rows * depth];
1258 const fixed_point<T> fp_value(value, fixed_point_position, true);
1259 accumulated_scale = add(accumulated_scale, mul(fp_value, fp_value));
1260 }
1261 }
1262 accumulated_scale = add(kappa, mul(accumulated_scale, coeff));
1263 out[k + i * cols + l * rows * cols + r * cols * rows * depth] = accumulated_scale.raw();
1264 }
1265 }
1266 }
1267 }
1268 }
1269 else
1270 {
1271 for(int r = 0; r < upper_dims; ++r)
1272 {
1273 for(int i = 0; i < rows; ++i)
1274 {
1275 for(int k = 0; k < cols; ++k)
1276 {
1277 fixed_point<T> accumulated_scale(0.f, fixed_point_position);
1278 for(int j = -radius_rows; j <= radius_rows; ++j)
1279 {
1280 const int y = i + j;
1281 for(int l = -radius_cols; l <= radius_cols; ++l)
1282 {
1283 const int x = k + l;
1284 if((x >= 0 && y >= 0) && (x < cols && y < rows))
1285 {
1286 const T value = in[x + y * cols + r * cols * rows];
1287 const fixed_point<T> fp_value(value, fixed_point_position, true);
1288 accumulated_scale = add(accumulated_scale, mul(fp_value, fp_value));
1289 }
1290 }
1291 }
1292 accumulated_scale = add(kappa, mul(accumulated_scale, coeff));
1293 out[k + i * cols + r * cols * rows] = accumulated_scale.raw();
1294 }
1295 }
1296 }
1297 }
1298
1299 if(norm_info.beta() == 1.f)
1300 {
1301 for(int i = 0; i < out.num_elements(); ++i)
1302 {
1303 fixed_point<T> res = div(fixed_point<T>(in[i], fixed_point_position, true), fixed_point<T>(out[i], fixed_point_position, true));
1304 out[i] = res.raw();
1305 }
1306 }
1307 else
1308 {
1309 const fixed_point<T> beta(norm_info.beta(), fixed_point_position);
1310 for(int i = 0; i < out.num_elements(); ++i)
1311 {
1312 fixed_point<T> res = pow(fixed_point<T>(out[i], fixed_point_position, true), beta);
1313 res = div(fixed_point<T>(in[i], fixed_point_position, true), res);
1314 out[i] = res.raw();
1315 }
1316 }
1317}
1318
1319// Pooling layer
1320template <typename T>
1321void pooling_layer(const Tensor<T> &in, Tensor<T> &out, PoolingLayerInfo pool_info, int fixed_point_position)
1322{
1323 const int pool_size = pool_info.pool_size();
1324 PoolingType type = pool_info.pool_type();
1325 int pool_stride_x = 0;
1326 int pool_stride_y = 0;
1327 int pad_x = 0;
1328 int pad_y = 0;
1329 std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info().stride();
1330 std::tie(pad_x, pad_y) = pool_info.pad_stride_info().pad();
1331
Georgios Pinitasce093142017-06-19 16:11:53 +01001332 const int w_in = static_cast<int>(in.shape()[0]);
1333 const int h_in = static_cast<int>(in.shape()[1]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001334
Georgios Pinitasce093142017-06-19 16:11:53 +01001335 const int w_out = static_cast<int>(out.shape()[0]);
1336 const int h_out = static_cast<int>(out.shape()[1]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001337
Georgios Pinitasce093142017-06-19 16:11:53 +01001338 int upper_dims = in.shape().total_size() / (w_in * h_in);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001339
Georgios Pinitasce093142017-06-19 16:11:53 +01001340 int pooled_w = 0;
1341 int pooled_h = 0;
1342 if(pool_info.pad_stride_info().round() == DimensionRoundingType::CEIL)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001343 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001344 pooled_w = static_cast<int>(ceil(static_cast<float>(w_in + 2 * pad_x - pool_size) / pool_stride_x)) + 1;
1345 pooled_h = static_cast<int>(ceil(static_cast<float>(h_in + 2 * pad_y - pool_size) / pool_stride_y)) + 1;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001346 }
Georgios Pinitasce093142017-06-19 16:11:53 +01001347 else
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001348 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001349 pooled_w = static_cast<int>(floor(static_cast<float>(w_in + 2 * pad_x - pool_size) / pool_stride_x)) + 1;
1350 pooled_h = static_cast<int>(floor(static_cast<float>(h_in + 2 * pad_y - pool_size) / pool_stride_y)) + 1;
1351 }
1352
1353 if((pooled_w - 1) * pool_stride_x >= w_in + pad_x)
1354 {
1355 --pooled_w;
1356 }
1357 if((pooled_h - 1) * pool_stride_y >= h_in + pad_y)
1358 {
1359 --pooled_h;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001360 }
1361
1362 if(type == PoolingType::MAX)
1363 {
1364 for(int r = 0; r < upper_dims; ++r)
1365 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001366 for(int h = 0; h < pooled_h; ++h)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001367 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001368 for(int w = 0; w < pooled_w; ++w)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001369 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001370 int wstart = w * pool_stride_x - pad_x;
1371 int hstart = h * pool_stride_y - pad_y;
1372 int wend = std::min(wstart + pool_size, w_in);
1373 int hend = std::min(hstart + pool_size, h_in);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001374 wstart = std::max(wstart, 0);
Georgios Pinitasce093142017-06-19 16:11:53 +01001375 hstart = std::max(hstart, 0);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001376
1377 T max_val = std::numeric_limits<T>::lowest();
1378 for(int y = hstart; y < hend; ++y)
1379 {
1380 for(int x = wstart; x < wend; ++x)
1381 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001382 T val = in[r * h_in * w_in + y * w_in + x];
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001383 if(val > max_val)
1384 {
1385 max_val = val;
1386 }
1387 }
1388 }
1389
Georgios Pinitasce093142017-06-19 16:11:53 +01001390 out[r * h_out * w_out + h * pooled_w + w] = max_val;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001391 }
1392 }
1393 }
1394 }
1395 else // Average pooling
1396 {
1397 for(int r = 0; r < upper_dims; ++r)
1398 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001399 for(int h = 0; h < pooled_h; ++h)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001400 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001401 for(int w = 0; w < pooled_w; ++w)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001402 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001403 T avg_val = 0;
1404 int wstart = w * pool_stride_x - pad_x;
1405 int hstart = h * pool_stride_y - pad_y;
1406 int wend = std::min(wstart + pool_size, w_in + pad_x);
1407 int hend = std::min(hstart + pool_size, h_in + pad_y);
1408 int pool = (hend - hstart) * (wend - wstart);
1409 wstart = std::max(wstart, 0);
1410 hstart = std::max(hstart, 0);
1411 wend = std::min(wend, w_in);
1412 hend = std::min(hend, h_in);
Pablo Tello383deec2017-06-23 10:40:05 +01001413 if(is_floating_point<T>::value)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001414 {
1415 for(int y = hstart; y < hend; ++y)
1416 {
1417 for(int x = wstart; x < wend; ++x)
1418 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001419 avg_val += in[r * h_in * w_in + y * w_in + x];
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001420 }
1421 }
Georgios Pinitasce093142017-06-19 16:11:53 +01001422 out[r * h_out * w_out + h * pooled_w + w] = avg_val / pool;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001423 }
1424 else
1425 {
1426 static std::array<qint8_t, 10> scale_values_q8 =
1427 { { 0x0, 0x0, 0x40, 0x2A, 0x20, 0x19, 0x15, 0x12, 0x10, 0xE } };
1428
1429 for(int y = hstart; y < hend; ++y)
1430 {
1431 for(int x = wstart; x < wend; ++x)
1432 {
Georgios Pinitasce093142017-06-19 16:11:53 +01001433 avg_val = sqadd_qs8(avg_val, in[r * h_in * w_in + y * w_in + x]);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001434 }
1435 }
Georgios Pinitasce093142017-06-19 16:11:53 +01001436 out[r * h_out * w_out + h * pooled_w + w] = sqmul_qs8(avg_val, (scale_values_q8[pool] >> (7 - fixed_point_position)), fixed_point_position);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001437 }
1438 }
1439 }
1440 }
1441 }
1442}
1443
1444// Softmax Layer
Pablo Tello383deec2017-06-23 10:40:05 +01001445template <typename T, typename std::enable_if<is_floating_point<T>::value, int>::type * = nullptr>
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001446void softmax_layer(const Tensor<T> &in, Tensor<T> &out)
1447{
1448 const int cols = static_cast<int>(in.shape()[0]);
1449 const int upper_dims = in.shape().total_size() / cols;
1450 for(int r = 0; r < upper_dims; ++r)
1451 {
1452 // Find max
1453 T max = std::numeric_limits<T>::lowest();
1454 for(int c = 0; c < cols; ++c)
1455 {
1456 const T x = in[r * cols + c];
1457 if(x > max)
1458 {
1459 max = x;
1460 }
1461 }
1462
1463 // Regularize
1464 T sum = 0;
1465 for(int c = 0; c < cols; ++c)
1466 {
1467 const T res = exp(in[r * cols + c] - max);
1468 out[r * cols + c] = res;
1469 sum += res;
1470 }
1471
1472 // Normalize
1473 const T norm_val = 1 / sum;
1474 for(int c = 0; c < cols; ++c)
1475 {
1476 out[r * cols + c] *= norm_val;
1477 }
1478 }
1479}
1480template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
1481void softmax_layer(const Tensor<T> &in, Tensor<T> &out)
1482{
1483 using namespace fixed_point_arithmetic;
1484 using promoted_T = typename test::traits::promote<T>::type;
1485
1486 const int fixed_point_position = in.fixed_point_position();
1487 const int cols = static_cast<int>(in.shape()[0]);
1488 const int upper_dims = in.shape().total_size() / cols;
1489
1490 for(int r = 0; r < upper_dims; ++r)
1491 {
1492 // Find max
1493 fixed_point<T> max(std::numeric_limits<T>::lowest(), fixed_point_position, true);
1494 for(int c = 0; c < cols; ++c)
1495 {
1496 const fixed_point<T> x(in[r * cols + c], fixed_point_position, true);
1497 if(x > max)
1498 {
1499 max = x;
1500 }
1501 }
1502
1503 // Regularize
1504 fixed_point<promoted_T> sum(0, fixed_point_position);
1505 for(int c = 0; c < cols; ++c)
1506 {
1507 const fixed_point<T> x(in[r * cols + c], fixed_point_position, true);
1508 fixed_point<T> res = exp(x - max);
1509 out[r * cols + c] = res.raw();
1510 sum = add(sum, static_cast<fixed_point<promoted_T>>(res));
1511 }
1512
1513 // Normalize
1514 fixed_point<T> sat_sum(sum);
1515 for(int c = 0; c < cols; ++c)
1516 {
1517 const fixed_point<T> x(out[r * cols + c], fixed_point_position, true);
1518 out[r * cols + c] = div(x, sat_sum).raw();
1519 }
1520 }
1521}
1522
1523// Fixed point operations
1524template <typename T>
1525void fixed_point_operation(const Tensor<T> &in, Tensor<T> &out, FixedPointOp op)
1526{
1527 int p = in.fixed_point_position();
1528 switch(op)
1529 {
1530 case FixedPointOp::EXP:
1531 for(int i = 0; i < in.num_elements(); ++i)
1532 {
1533 out[i] = fixed_point_arithmetic::exp(fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
1534 }
1535 break;
1536 case FixedPointOp::LOG:
1537 for(int i = 0; i < in.num_elements(); ++i)
1538 {
1539 out[i] = fixed_point_arithmetic::log(fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
1540 }
1541 break;
1542 case FixedPointOp::INV_SQRT:
1543 for(int i = 0; i < in.num_elements(); ++i)
1544 {
1545 out[i] = fixed_point_arithmetic::inv_sqrt(fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
1546 }
1547 break;
1548 case FixedPointOp::RECIPROCAL:
1549 for(int i = 0; i < in.num_elements(); ++i)
1550 {
1551 out[i] = fixed_point_arithmetic::div(fixed_point_arithmetic::fixed_point<T>(1, p), fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
1552 }
1553 break;
1554 default:
1555 ARM_COMPUTE_ERROR("Fixed point operation not supported");
1556 break;
1557 }
1558}
1559
1560// Tensor print
1561template <typename T>
1562void print(const Tensor<T> &in, std::ostream &out)
1563{
1564 out << "\n";
1565 for(int i = 0; i < in.num_elements(); ++i)
1566 {
1567 out << in[i] << " ";
1568 }
1569 out << "\n";
1570}
1571} // namespace tensor_operations
1572} // namespace validation
1573} // namespace test
1574} // namespace arm_compute
1575
1576#endif /* __ARM_COMPUTE_TEST_TENSOR_OPERATIONS_H__ */