blob: 86cf37e4915966d467fbfcc74a0e4ffb04777990 [file] [log] [blame]
Michalis Spyrou04f089c2017-08-08 17:42:38 +01001/*
Manuel Bottinib412fab2018-12-10 17:40:23 +00002 * Copyright (c) 2016-2019 ARM Limited.
Michalis Spyrou04f089c2017-08-08 17:42:38 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25
Michalis Spyroub9626ab2019-05-13 17:41:01 +010026#if FLOAT_DATA_TYPE
27#define ISGREATER(x, y) isgreater(x, y)
28#define ISLESS(x, y) isless(x, y)
29#else // !FLOAT_DATA_TYPE
30#if defined(WIDTH)
31#define ISGREATER(x, y) (x > y) ? 1 : 0
32#define ISLESS(x, y) (x < y) ? 1 : 0
33#else // !defined(WIDTH)
34#define ISGREATER(x, y) select((int16)0, (int16)-1, x > y)
35#define ISLESS(x, y) select((int16)0, (int16)-1, x < y)
36#endif // defined(WIDTH)
37#endif // FLOAT_DATA_TYPE
38
Michalis Spyrou04f089c2017-08-08 17:42:38 +010039/** Calculate square sum of a vector
40 *
41 * @param[in] input Pointer to the first pixel.
42 *
43 * @return square sum of vector.
44 */
45inline DATA_TYPE square_sum(__global const DATA_TYPE *input)
46{
47 VEC_DATA_TYPE(DATA_TYPE, 16)
48 in = vload16(0, input);
49
50 in *= in;
51
52 in.s01234567 += in.s89ABCDEF;
53 in.s0123 += in.s4567;
54 in.s01 += in.s23;
55
56 return (in.s0 + in.s1);
57}
58
59/** Calculate sum of a vector
60 *
61 * @param[in] input Pointer to the first pixel.
62 *
63 * @return sum of vector.
64 */
65inline DATA_TYPE sum(__global const DATA_TYPE *input)
66{
67 VEC_DATA_TYPE(DATA_TYPE, 16)
68 in = vload16(0, input);
69
70 in.s01234567 += in.s89ABCDEF;
71 in.s0123 += in.s4567;
72 in.s01 += in.s23;
73
74 return (in.s0 + in.s1);
75}
Manuel Bottinib412fab2018-12-10 17:40:23 +000076
77/** Calculate product of a vector
78 *
79 * @param[in] input Pointer to the first pixel.
80 *
81 * @return product of vector.
82 */
83inline DATA_TYPE product(__global const DATA_TYPE *input)
84{
85 VEC_DATA_TYPE(DATA_TYPE, 16)
86 in = vload16(0, input);
87
88 in.s01234567 *= in.s89ABCDEF;
89 in.s0123 *= in.s4567;
90 in.s01 *= in.s23;
91
92 return (in.s0 * in.s1);
93}
Michalis Spyrou7930db42018-11-22 17:36:28 +000094#if defined(OPERATION)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010095/** This kernel performs parallel reduction given an operation on x-axis.
Michalis Spyrou04f089c2017-08-08 17:42:38 +010096 *
97 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou04f089c2017-08-08 17:42:38 +010098 * @note The operation we want to perform must be passed at compile time using -DOPERATION e.g. -DOPERATION=square_sum
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010099 * @note The mean flag must be passed at compile time using -DMEAN if we want to compute the mean value
Manuel Bottinib412fab2018-12-10 17:40:23 +0000100 * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100101 * @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 if we want to compute the mean value
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100102 *
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100103 * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100104 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
105 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000106 * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
107 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100108 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
Manuel Bottinib412fab2018-12-10 17:40:23 +0000109 * @param[in] partial_res_ptr The local buffer to hold partial result values. Supported data types: same as @p src_ptr
110 * @param[in] partial_res_stride_x Stride of the output tensor in X dimension (in bytes)
111 * @param[in] partial_res_step_x partial_res_stride_x * number of elements along X processed per workitem(in bytes)
112 * @param[in] partial_res_stride_y Stride of the output tensor in Y dimension (in bytes)
113 * @param[in] partial_res_step_y partial_res_stride_y * number of elements along Y processed per workitem(in bytes)
114 * @param[in] partial_res_offset_first_element_in_bytes The offset of the first element in the source tensor
115 * @param[in] local_results Local buffer for storing the partial result
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100116 */
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100117__kernel void reduction_operation_x(
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000118 IMAGE_DECLARATION(src),
Manuel Bottinib412fab2018-12-10 17:40:23 +0000119 IMAGE_DECLARATION(partial_res),
120 __local DATA_TYPE *local_results)
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100121{
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000122 Image src = CONVERT_TO_IMAGE_STRUCT(src);
Manuel Bottinib412fab2018-12-10 17:40:23 +0000123 Image partial_res = CONVERT_TO_IMAGE_STRUCT(partial_res);
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100124
125 unsigned int lsize = get_local_size(0);
126 unsigned int lid = get_local_id(0);
127
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000128 for(unsigned int y = 0; y < get_local_size(1); ++y)
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100129 {
Manuel Bottinib412fab2018-12-10 17:40:23 +0000130 local_results[lid] = OPERATION((__global DATA_TYPE *)offset(&src, 0, y));
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100131 barrier(CLK_LOCAL_MEM_FENCE);
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100132
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000133 // Perform parallel reduction
134 for(unsigned int i = lsize >> 1; i > 0; i >>= 1)
135 {
136 if(lid < i)
137 {
Manuel Bottinib412fab2018-12-10 17:40:23 +0000138#if defined(PROD)
139 local_results[lid] *= local_results[lid + i];
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100140#else // !defined(PROD)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000141 local_results[lid] += local_results[lid + i];
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100142#endif // defined(PROD)
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000143 }
144 barrier(CLK_LOCAL_MEM_FENCE);
145 }
146
147 if(lid == 0)
148 {
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100149#if defined(MEAN) && defined(WIDTH)
150 if(y == get_local_size(1) - 1)
151 {
Manuel Bottinib412fab2018-12-10 17:40:23 +0000152 local_results[0] /= WIDTH;
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100153 }
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100154#endif // defined(MEAN) && defined(WIDTH)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000155 ((__global DATA_TYPE *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0];
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000156 }
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100157 }
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100158}
Michalis Spyrou7930db42018-11-22 17:36:28 +0000159#endif // defined(OPERATION)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100160
161#if defined(WIDTH)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000162/** This kernel performs reduction on x-axis. (Non parallel)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100163 *
Michalis Spyrou7930db42018-11-22 17:36:28 +0000164 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100165 * @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128
Manuel Bottinib412fab2018-12-10 17:40:23 +0000166 * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou7930db42018-11-22 17:36:28 +0000167 * @note In case of ARG_MIN and ARG_MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100168 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100169 * @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8 for operation MEAN
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100170 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
171 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
172 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
173 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
174 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
175 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
176 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
177 */
Michalis Spyrou7930db42018-11-22 17:36:28 +0000178__kernel void reduction_operation_non_parallel_x(
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100179 VECTOR_DECLARATION(src),
180 VECTOR_DECLARATION(output))
181{
182 Vector src = CONVERT_TO_VECTOR_STRUCT(src);
183 Vector output = CONVERT_TO_VECTOR_STRUCT(output);
184
Michalis Spyrou7930db42018-11-22 17:36:28 +0000185 DATA_TYPE_PROMOTED res = *((__global DATA_TYPE *)vector_offset(&src, 0));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100186
Michalis Spyrou7930db42018-11-22 17:36:28 +0000187#if defined(ARG_MAX) || defined(ARG_MIN)
188 uint indx = 0;
189#endif // defined(ARG_MAX) || defined(ARG_MIN)
190
191 for(unsigned int x = 1; x < WIDTH; ++x)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100192 {
Michalis Spyrou7930db42018-11-22 17:36:28 +0000193 DATA_TYPE_PROMOTED in = *((__global DATA_TYPE *)vector_offset(&src, x));
194#if defined(ARG_MAX)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100195 indx = select(indx, x, ISGREATER(in, res));
196 res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000197#elif defined(ARG_MIN)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100198 indx = select(indx, x, ISLESS(in, res));
199 res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
Usama Arifb2890502019-05-21 11:48:37 +0100200#elif defined(MIN)
201 res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000202#else // !(defined(ARG_MAX) || defined(ARG_MIN))
203 res += in;
204#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100205 }
206
Michalis Spyrou7930db42018-11-22 17:36:28 +0000207 // Store result
208#if defined(ARG_MAX) || defined(ARG_MIN)
209 *((__global uint *)output.ptr) = indx;
210#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100211#if defined(MEAN)
212 res /= WIDTH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000213#endif // defined(MEAN)
Usama Arifb2890502019-05-21 11:48:37 +0100214#if defined(MIN)
215 *((__global DATA_TYPE_PROMOTED *)output.ptr) = res;
216#else // defined(MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100217 *((__global uchar *)output.ptr) = convert_uchar(res);
Usama Arifb2890502019-05-21 11:48:37 +0100218#endif // defined(MIN)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000219#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100220}
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100221#endif // defined(WIDTH)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100222
223#if defined(HEIGHT)
224/** This kernel performs reduction on y-axis.
225 *
Michalis Spyrou7930db42018-11-22 17:36:28 +0000226 * @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100227 * @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
228 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100229 * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100230 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
231 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
232 * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
233 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
234 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
235 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
236 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
237 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
238 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
239 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
240 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
241 */
242__kernel void reduction_operation_y(
243 IMAGE_DECLARATION(src),
244 IMAGE_DECLARATION(output))
245{
246 Image src = CONVERT_TO_IMAGE_STRUCT(src);
247 Image output = CONVERT_TO_IMAGE_STRUCT(output);
248
249 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000250 res = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100251
Michalis Spyrou7930db42018-11-22 17:36:28 +0000252#if defined(SUM_SQUARE)
253 res *= res;
254#endif // defined(SUM_SQUARE)
255
256#if defined(ARG_MAX) || defined(ARG_MIN)
257 uint16 indx = 0;
258#endif // defined(ARG_MAX) || defined(ARG_MIN)
259
260 for(unsigned int y = 1; y < HEIGHT; ++y)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100261 {
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100262 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
263 in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000264#if defined(ARG_MAX)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100265 uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000266 indx = select(indx, y, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100267 res = select(res, in, ISGREATER(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000268#elif defined(ARG_MIN)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100269 uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000270 indx = select(indx, y, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100271 res = select(res, in, ISLESS(in, res));
Usama Arifb2890502019-05-21 11:48:37 +0100272#elif defined(MIN)
273 res = select(res, in, ISLESS(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000274#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100275#if defined(SUM_SQUARE)
276 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000277#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000278#if defined(PROD)
279 res *= in;
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100280#else // !defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100281 res += in;
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100282#endif // defined(PROD)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000283#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100284 }
285
Michalis Spyrou7930db42018-11-22 17:36:28 +0000286 // Store result
287#if defined(ARG_MAX) || defined(ARG_MIN)
288 vstore16(indx, 0, (__global uint *)output.ptr);
289#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100290#if defined(MEAN)
291 res /= HEIGHT;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000292#endif // defined(MEAN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100293 vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000294#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100295}
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100296#endif // defined(HEIGHT)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100297
298#if defined(DEPTH)
299/** This kernel performs reduction on z-axis.
300 *
301 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
302 * @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
303 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100304 * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100305 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
306 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
307 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
308 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
309 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
310 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
311 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
312 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
313 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
314 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
315 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
316 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
317 * @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
318 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
319 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
320 */
321__kernel void reduction_operation_z(
322 TENSOR3D_DECLARATION(input),
323 TENSOR3D_DECLARATION(output))
324{
325 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
326 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
327
328 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000329 res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100330
Georgios Pinitas8be91482019-03-26 17:23:28 +0000331#if defined(COMPLEX)
332 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
333 res1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
334#endif // defined(COMPLEX)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000335#if defined(SUM_SQUARE)
336 res *= res;
337#endif // defined(SUM_SQUARE)
338
339#if defined(ARG_MAX) || defined(ARG_MIN)
340 uint16 indx = 0;
341#endif // defined(ARG_MAX) || defined(ARG_MIN)
342
343 for(unsigned int z = 1; z < DEPTH; ++z)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100344 {
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100345 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
346 in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000347
Georgios Pinitas8be91482019-03-26 17:23:28 +0000348#if defined(COMPLEX)
349 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
350 in1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
351#endif // defined(COMPLEX)
352
Michalis Spyrou7930db42018-11-22 17:36:28 +0000353#if defined(ARG_MAX)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100354 uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000355 indx = select(indx, z, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100356 res = select(res, in, ISGREATER(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000357#elif defined(ARG_MIN)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100358 uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000359 indx = select(indx, z, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100360 res = select(res, in, ISLESS(in, res));
Usama Arifb2890502019-05-21 11:48:37 +0100361#elif defined(MIN)
362 res = select(res, in, ISLESS(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000363#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100364#if defined(SUM_SQUARE)
365 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000366#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000367#if defined(PROD)
368 res *= in;
Georgios Pinitas8be91482019-03-26 17:23:28 +0000369#else //!defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100370 res += in;
Georgios Pinitas8be91482019-03-26 17:23:28 +0000371#if defined(COMPLEX)
372 res1 += in1;
373#endif // defined(COMPLEX)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000374#endif //defined(PROD)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000375#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100376 }
377
Michalis Spyrou7930db42018-11-22 17:36:28 +0000378 // Store result
379#if defined(ARG_MAX) || defined(ARG_MIN)
380 vstore16(indx, 0, (__global uint *)output.ptr);
381#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100382#if defined(MEAN)
383 res /= DEPTH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000384#endif // defined(MEAN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100385 vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Georgios Pinitas8be91482019-03-26 17:23:28 +0000386#if defined(COMPLEX)
387 vstore16(CONVERT(res1, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)tensor3D_offset(&output, 8, 0, 0));
388#endif // defined(COMPLEX)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000389#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100390}
391#endif /* defined(DEPTH) */
392
393#if defined(BATCH) && defined(DEPTH)
394/** This kernel performs reduction on w-axis.
395 *
396 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
397 * @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
398 * @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
399 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100400 * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100401 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
402 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
403 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
404 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
405 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
406 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
407 * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
408 * @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
409 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
410 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
411 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
412 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
413 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
414 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
415 * @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
416 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
417 * @param[in] output_stride_w Stride of the output tensor in W dimension (in bytes)
418 * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
419 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
420 */
421__kernel void reduction_operation_w(
422 TENSOR4D_DECLARATION(input),
423 TENSOR4D_DECLARATION(output))
424{
425 Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH);
426 Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH);
427
428 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000429 res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100430
Michalis Spyrou7930db42018-11-22 17:36:28 +0000431#if defined(SUM_SQUARE)
432 res *= res;
433#endif // defined(SUM_SQUARE)
434
435#if defined(ARG_MAX) || defined(ARG_MIN)
436 uint16 indx = 0;
437#endif // defined(ARG_MAX) || defined(ARG_MIN)
438
439 for(unsigned int w = 1; w < BATCH; ++w)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100440 {
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100441 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
442 in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000443
444#if defined(ARG_MAX)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100445 uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000446 indx = select(indx, w, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100447 res = select(res, in, ISGREATER(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000448#elif defined(ARG_MIN)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100449 uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000450 indx = select(indx, w, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100451 res = select(res, in, ISLESS(in, res));
Usama Arifb2890502019-05-21 11:48:37 +0100452#elif defined(MIN)
453 res = select(res, in, ISLESS(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000454#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100455#if defined(SUM_SQUARE)
456 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000457#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000458#if defined(PROD)
459 res *= in;
460#else //!defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100461 res += in;
Manuel Bottinib412fab2018-12-10 17:40:23 +0000462#endif //defined(PROD)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000463#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100464 }
465
Michalis Spyrou7930db42018-11-22 17:36:28 +0000466 // Store result
467#if defined(ARG_MAX) || defined(ARG_MIN)
468 vstore16(indx, 0, (__global uint *)output.ptr);
469#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100470#if defined(MEAN)
471 res /= BATCH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000472#endif // defined(MEAN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100473 vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000474#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100475}
Manuel Bottinib412fab2018-12-10 17:40:23 +0000476#endif /* defined(BATCH) && defined(DEPTH) */