blob: 9f2c6e23b579a154c3a7d692e7548766b3eff2ba [file] [log] [blame]
Michalis Spyrou04f089c2017-08-08 17:42:38 +01001/*
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +01002 * Copyright (c) 2016-2021 Arm Limited.
Michalis Spyrou04f089c2017-08-08 17:42:38 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
Michalis Spyrou0b18d972020-01-30 18:11:13 +000025#include "helpers_asymm.h"
Michalis Spyrou04f089c2017-08-08 17:42:38 +010026
Michalis Spyrou7317e392020-01-17 11:27:49 +000027#if defined(FLOAT_DATA_TYPE)
Giorgio Arena1fac0372021-04-30 15:09:46 +010028#define ISGREATER(x, y) (SELECT_VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE))(isgreater(x, y))
29#define ISLESS(x, y) (SELECT_VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE))(isless(x, y))
30#define ISGREATER_SCALAR(x, y) (SELECT_DATA_TYPE(DATA_TYPE_PROMOTED))(isgreater(x, y))
31#define ISLESS_SCALAR(x, y) (SELECT_DATA_TYPE(DATA_TYPE_PROMOTED))(isless(x, y))
Michalis Spyrou7317e392020-01-17 11:27:49 +000032#else // !FLOAT_DATA_TYPE
33#if defined(WIDTH)
34#define ISGREATER(x, y) (x > y) ? 1 : 0
35#define ISLESS(x, y) (x < y) ? 1 : 0
Giorgio Arena1fac0372021-04-30 15:09:46 +010036#define ISGREATER_SCALAR ISGREATER
37#define ISLESS_SCALAR ISLESS
Michalis Spyrou7317e392020-01-17 11:27:49 +000038#else // !defined(WIDTH)
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +010039#define ISGREATER(x, y) select((VEC_DATA_TYPE(int, VEC_SIZE))0, (VEC_DATA_TYPE(int, VEC_SIZE)) - 1, x > y)
40#define ISLESS(x, y) select((VEC_DATA_TYPE(int, VEC_SIZE))0, (VEC_DATA_TYPE(int, VEC_SIZE)) - 1, x < y)
Michalis Spyrou7317e392020-01-17 11:27:49 +000041#endif // defined(WIDTH)
42#endif // defined(FLOAT_DATA_TYPE)
43
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +010044#if defined(WIDTH)
Manuel Bottini34f88dd2019-10-18 10:37:46 +000045#if defined(OPERATION)
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +010046
47#define sum(in0, in1, size) (in0 + SUM_REDUCE(in1, size))
48#define square_sum(in0, in1, size) (in0 + SUM_REDUCE((in1 * in1), size))
49#define product(in0, in1, size) (in0 * PROD_REDUCE(in1, size))
50
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010051/** This kernel performs parallel reduction given an operation on x-axis.
Michalis Spyrou04f089c2017-08-08 17:42:38 +010052 *
53 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou04f089c2017-08-08 17:42:38 +010054 * @note The operation we want to perform must be passed at compile time using -DOPERATION e.g. -DOPERATION=square_sum
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010055 * @note The mean flag must be passed at compile time using -DMEAN if we want to compute the mean value
Manuel Bottinib412fab2018-12-10 17:40:23 +000056 * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010057 * @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 if we want to compute the mean value
Michalis Spyrou04f089c2017-08-08 17:42:38 +010058 *
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +010059 * @param[in] input_ptr Pointer to the source tensor. Supported data types: F16/F32
60 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
61 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
62 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
63 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
64 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
65 * @param[in] output_ptr Pointer to the destination tensor. Supported data types: same as @p input
66 * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
67 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
68 * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
69 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
70 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
Michalis Spyrou04f089c2017-08-08 17:42:38 +010071 */
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010072__kernel void reduction_operation_x(
Giorgio Arena1fac0372021-04-30 15:09:46 +010073 TENSOR3D_DECLARATION(input),
74 TENSOR3D_DECLARATION(output))
Michalis Spyrou04f089c2017-08-08 17:42:38 +010075{
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +010076 int y = get_global_id(1);
Giorgio Arena1fac0372021-04-30 15:09:46 +010077 int z = get_global_id(2);
Michalis Spyrou04f089c2017-08-08 17:42:38 +010078
Giorgio Arena1fac0372021-04-30 15:09:46 +010079 __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + y * input_stride_y + z * input_stride_z;
80 __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + y * output_stride_y + z * output_stride_z;
Michalis Spyrou04f089c2017-08-08 17:42:38 +010081
Manuel Bottinib412fab2018-12-10 17:40:23 +000082#if defined(PROD)
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +010083 DATA_TYPE res = (DATA_TYPE)1;
84#else // defined(PROD)
85 DATA_TYPE res = (DATA_TYPE)0;
Manuel Bottini34f88dd2019-10-18 10:37:46 +000086#endif // defined(PROD)
Michalis Spyrouf6402dd2018-01-26 15:06:19 +000087
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +010088 int x = 0;
89
90 for(; x <= (WIDTH - VEC_SIZE); x += VEC_SIZE)
91 {
92 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
93 vals = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(input_addr + x * sizeof(DATA_TYPE)));
94 res = OPERATION(res, vals, VEC_SIZE);
Michalis Spyrou04f089c2017-08-08 17:42:38 +010095 }
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +010096
97#if(WIDTH % VEC_SIZE)
98 _Pragma("unroll") for(; x < WIDTH; ++x)
99 {
100 DATA_TYPE val = *((__global DATA_TYPE *)(input_addr + x * sizeof(DATA_TYPE)));
101 res = OPERATION(res, val, 1);
102 }
103#endif // (WIDTH % VEC_SIZE)
104
105#if defined(MEAN)
106 res /= WIDTH;
107#endif // defined(MEAN)
108 *((__global DATA_TYPE *)output_addr) = res;
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100109}
Manuel Bottini34f88dd2019-10-18 10:37:46 +0000110#endif // defined(OPERATION)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000111/** This kernel performs reduction on x-axis. (Non parallel)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100112 *
Michalis Spyrou7930db42018-11-22 17:36:28 +0000113 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100114 * @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128
Manuel Bottinib412fab2018-12-10 17:40:23 +0000115 * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100116 *
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100117 * @param[in] input_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8/QASYMM8_SIGNED for operation MEAN
118 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
119 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
120 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
121 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100122 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
123 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
124 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
125 */
Michalis Spyrou7930db42018-11-22 17:36:28 +0000126__kernel void reduction_operation_non_parallel_x(
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100127 VECTOR_DECLARATION(input),
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100128 VECTOR_DECLARATION(output))
129{
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100130 Vector input = CONVERT_TO_VECTOR_STRUCT(input);
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100131 Vector output = CONVERT_TO_VECTOR_STRUCT(output);
132
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100133 DATA_TYPE_PROMOTED res = CONVERT(*((__global DATA_TYPE *)vector_offset(&input, 0)), DATA_TYPE_PROMOTED);
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000134
135 // Convert input into F32 in order to perform quantized multiplication
136#if defined(PROD) && defined(OFFSET) && defined(SCALE)
137 float res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
138#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100139
Michalis Spyrou7930db42018-11-22 17:36:28 +0000140 for(unsigned int x = 1; x < WIDTH; ++x)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100141 {
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100142 DATA_TYPE_PROMOTED in = CONVERT(*((__global DATA_TYPE *)vector_offset(&input, x)), DATA_TYPE_PROMOTED);
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100143#if defined(MIN)
Giorgio Arena1fac0372021-04-30 15:09:46 +0100144 res = select(res, in, ISLESS_SCALAR(in, res));
Usama Arif048b0f32019-05-22 16:32:27 +0100145#elif defined(MAX)
Giorgio Arena1fac0372021-04-30 15:09:46 +0100146 res = select(res, in, ISGREATER_SCALAR(in, res));
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000147#elif defined(PROD)
148#if defined(OFFSET) && defined(SCALE)
149 res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
150#else // !(defined(OFFSET) && defined(SCALE))
151 res *= in;
152#endif // defined(OFFSET) && defined(SCALE)
153#else // defined(SUM))
Michalis Spyrou7930db42018-11-22 17:36:28 +0000154 res += in;
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000155#endif // defined(MAX) || defined(MIN) || defined(PROD)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100156 }
157
Michalis Spyrou7930db42018-11-22 17:36:28 +0000158 // Store result
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100159#if defined(MEAN)
160 res /= WIDTH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000161#endif // defined(MEAN)
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000162
163 // Subtract the offsets in case of quantized SUM
164#if defined(SUM) && defined(OFFSET) && defined(SCALE)
165 res -= (WIDTH - 1) * OFFSET;
166#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
167
168 // Re-quantize
169#if defined(PROD) && defined(OFFSET) && defined(SCALE)
170 res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, 1);
171#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
172
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000173 *((__global DATA_TYPE *)output.ptr) = CONVERT_SAT(res, DATA_TYPE);
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100174}
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100175#endif // defined(WIDTH)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100176
177#if defined(HEIGHT)
178/** This kernel performs reduction on y-axis.
179 *
Michalis Spyrou7930db42018-11-22 17:36:28 +0000180 * @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100181 * @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
182 *
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100183 * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32
184 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
185 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
186 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
187 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
188 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
189 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100190 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
191 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
192 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
193 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
194 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
195 */
196__kernel void reduction_operation_y(
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100197 IMAGE_DECLARATION(input),
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100198 IMAGE_DECLARATION(output))
199{
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100200 int x = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0);
201 int y = get_global_id(1);
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100202
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100203 __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x * sizeof(DATA_TYPE) + y * input_stride_y;
204 __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x * sizeof(DATA_TYPE) + y * output_stride_y;
205
206 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)
207 res = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100208
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000209 // Convert input into F32 in order to perform quantized multiplication
210#if defined(PROD) && defined(OFFSET) && defined(SCALE)
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100211 VEC_DATA_TYPE(float, VEC_SIZE)
212 res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000213#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
214
Michalis Spyrou7930db42018-11-22 17:36:28 +0000215#if defined(SUM_SQUARE)
216 res *= res;
217#endif // defined(SUM_SQUARE)
218
Michalis Spyrou7930db42018-11-22 17:36:28 +0000219 for(unsigned int y = 1; y < HEIGHT; ++y)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100220 {
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100221 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)
222 in = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(input_addr + y * input_stride_y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE));
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100223#if defined(MIN)
Usama Arif048b0f32019-05-22 16:32:27 +0100224 res = select(res, in, ISLESS(in, res));
225#elif defined(MAX)
Michele Di Giorgio03337042020-02-26 17:47:55 +0000226 res = select(res, in, ISGREATER(in, res));
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100227#else // !(defined(MAX) || defined(MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100228#if defined(SUM_SQUARE)
229 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000230#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000231#if defined(PROD)
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000232
233#if defined(OFFSET) && defined(SCALE)
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100234 res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000235#else // !(defined(OFFSET) && defined(SCALE))
Manuel Bottinib412fab2018-12-10 17:40:23 +0000236 res *= in;
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000237#endif // defined(OFFSET) && defined(SCALE)
238
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100239#else // !defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100240 res += in;
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100241#endif // defined(PROD)
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100242#endif // defined(MAX) || defined(MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100243 }
244
245#if defined(MEAN)
246 res /= HEIGHT;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000247#endif // defined(MEAN)
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000248
249 // Subtract the offsets in case of quantized SUM
250#if defined(SUM) && defined(OFFSET) && defined(SCALE)
251 res -= (HEIGHT - 1) * OFFSET;
252#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
253
254 // Re-quantize
255#if defined(PROD) && defined(OFFSET) && defined(SCALE)
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100256 res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000257#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
258
259 // Store result
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100260 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
261 res0 = CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE));
262 STORE_VECTOR_SELECT(res, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0);
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100263}
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100264#endif // defined(HEIGHT)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100265
266#if defined(DEPTH)
267/** This kernel performs reduction on z-axis.
268 *
269 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
270 * @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
271 *
Michele Di Giorgiof6f78762020-07-06 11:27:21 +0100272 * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100273 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
274 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
275 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
276 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
277 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
278 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
279 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
Michele Di Giorgiof6f78762020-07-06 11:27:21 +0100280 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100281 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
282 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
283 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
284 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
285 * @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
286 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
287 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
288 */
289__kernel void reduction_operation_z(
290 TENSOR3D_DECLARATION(input),
291 TENSOR3D_DECLARATION(output))
292{
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100293 int x = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0);
294 int y = get_global_id(1);
295 int z = get_global_id(2);
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100296
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100297 __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x * sizeof(DATA_TYPE) + y * input_stride_y + z * input_stride_z;
298 __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x * sizeof(DATA_TYPE) + y * output_stride_y + z * output_stride_z;
299
300 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)
301 res = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100302
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000303 // Convert input into F32 in order to perform quantized multiplication
304#if defined(PROD) && defined(OFFSET) && defined(SCALE)
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100305 VEC_DATA_TYPE(float, VEC_SIZE)
306 res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000307#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
308
Michalis Spyrou7930db42018-11-22 17:36:28 +0000309#if defined(SUM_SQUARE)
310 res *= res;
311#endif // defined(SUM_SQUARE)
312
Michalis Spyrou7930db42018-11-22 17:36:28 +0000313 for(unsigned int z = 1; z < DEPTH; ++z)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100314 {
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100315 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)
316 in = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(input_addr + z * input_stride_z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE));
Georgios Pinitas8be91482019-03-26 17:23:28 +0000317
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100318#if defined(MIN)
Usama Arif048b0f32019-05-22 16:32:27 +0100319 res = select(res, in, ISLESS(in, res));
320#elif defined(MAX)
Michele Di Giorgio03337042020-02-26 17:47:55 +0000321 res = select(res, in, ISGREATER(in, res));
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100322#else // !(defined(MAX) || defined(MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100323#if defined(SUM_SQUARE)
324 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000325#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000326#if defined(PROD)
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000327
328#if defined(OFFSET) && defined(SCALE)
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100329 res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000330#else // !(defined(OFFSET) && defined(SCALE))
Manuel Bottinib412fab2018-12-10 17:40:23 +0000331 res *= in;
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000332#endif // defined(OFFSET) && defined(SCALE)
333
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100334#else // !defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100335 res += in;
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100336#endif // defined(PROD)
337#endif // defined(MAX) || defined(MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100338 }
339
340#if defined(MEAN)
341 res /= DEPTH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000342#endif // defined(MEAN)
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000343
344 // Subtract the offsets in case of quantized SUM
345#if defined(SUM) && defined(OFFSET) && defined(SCALE)
346 res -= (DEPTH - 1) * OFFSET;
347#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
348
349 // Re-quantize
350#if defined(PROD) && defined(OFFSET) && defined(SCALE)
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100351 res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000352#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
353
354 // Store result
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100355 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
356 res0 = CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE));
357
358 STORE_VECTOR_SELECT(res, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0);
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100359}
360#endif /* defined(DEPTH) */
361
362#if defined(BATCH) && defined(DEPTH)
363/** This kernel performs reduction on w-axis.
364 *
365 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
366 * @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
Manuel Bottini34f88dd2019-10-18 10:37:46 +0000367 * @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100368 *
Michele Di Giorgiof6f78762020-07-06 11:27:21 +0100369 * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/QASYMM8_SIGNED/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100370 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
371 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
372 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
373 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
374 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
375 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
376 * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
377 * @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
378 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
Michele Di Giorgiof6f78762020-07-06 11:27:21 +0100379 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptr
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100380 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
381 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
382 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
383 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
384 * @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
385 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
386 * @param[in] output_stride_w Stride of the output tensor in W dimension (in bytes)
387 * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
388 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
389 */
390__kernel void reduction_operation_w(
391 TENSOR4D_DECLARATION(input),
392 TENSOR4D_DECLARATION(output))
393{
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100394 int x = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0);
395 int y = get_global_id(1);
396 int z = get_global_id(2);
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100397
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100398 __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x * sizeof(DATA_TYPE) + y * input_stride_y + (z % DEPTH) * input_stride_z + (z / DEPTH) * input_stride_w;
399 __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x * sizeof(DATA_TYPE) + y * output_stride_y + (z % DEPTH) * output_stride_z + (z / DEPTH) * output_stride_z;
400
401 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)
402 res = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100403
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000404 // Convert input into F32 in order to perform quantized multiplication
405#if defined(PROD) && defined(OFFSET) && defined(SCALE)
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100406 VEC_DATA_TYPE(float, VEC_SIZE)
407 res_f = DEQUANTIZE(res, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000408#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
409
Michalis Spyrou7930db42018-11-22 17:36:28 +0000410#if defined(SUM_SQUARE)
411 res *= res;
412#endif // defined(SUM_SQUARE)
413
Michalis Spyrou7930db42018-11-22 17:36:28 +0000414 for(unsigned int w = 1; w < BATCH; ++w)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100415 {
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100416 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE)
417 in = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(input_addr + w * input_stride_w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, VEC_SIZE));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000418
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100419#if defined(MIN)
Usama Arif048b0f32019-05-22 16:32:27 +0100420 res = select(res, in, ISLESS(in, res));
421#elif defined(MAX)
Michele Di Giorgio03337042020-02-26 17:47:55 +0000422 res = select(res, in, ISGREATER(in, res));
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100423#else // !(defined(MAX) || defined(MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100424#if defined(SUM_SQUARE)
425 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000426#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000427#if defined(PROD)
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000428
429#if defined(OFFSET) && defined(SCALE)
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100430 res_f *= DEQUANTIZE(in, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000431#else // !(defined(OFFSET) && defined(SCALE))
Manuel Bottinib412fab2018-12-10 17:40:23 +0000432 res *= in;
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000433#endif // defined(OFFSET) && defined(SCALE)
434
435#else // !defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100436 res += in;
Manuel Bottinib412fab2018-12-10 17:40:23 +0000437#endif //defined(PROD)
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100438#endif // defined(MAX) || defined(MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100439 }
440
441#if defined(MEAN)
442 res /= BATCH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000443#endif // defined(MEAN)
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000444
445 // Subtract the offsets in case of quantized SUM
446#if defined(SUM) && defined(OFFSET) && defined(SCALE)
447 res -= (BATCH - 1) * OFFSET;
448#endif // defined(OFFSET) && defined(OFFSET) && defined(SCALE)
449
450 // Re-quantize
451#if defined(PROD) && defined(OFFSET) && defined(SCALE)
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100452 res = QUANTIZE(res_f, OFFSET, SCALE, DATA_TYPE_PROMOTED, VEC_SIZE);
Michalis Spyrou0b18d972020-01-30 18:11:13 +0000453#endif // defined(PROD) && defined(OFFSET) && defined(SCALE)
454
455 // Store result
Giorgio Arena3ecf9fe2021-04-28 16:11:51 +0100456 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
457 res0 = CONVERT_SAT(res, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE));
458 STORE_VECTOR_SELECT(res, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0);
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100459}
Manuel Bottinib412fab2018-12-10 17:40:23 +0000460#endif /* defined(BATCH) && defined(DEPTH) */