blob: 5a4bb9ff4ce2c46ea16badf52ad8a8149e31609f [file] [log] [blame]
Michalis Spyrou04f089c2017-08-08 17:42:38 +01001/*
Manuel Bottinib412fab2018-12-10 17:40:23 +00002 * Copyright (c) 2016-2019 ARM Limited.
Michalis Spyrou04f089c2017-08-08 17:42:38 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25
Michalis Spyroub9626ab2019-05-13 17:41:01 +010026#if FLOAT_DATA_TYPE
27#define ISGREATER(x, y) isgreater(x, y)
28#define ISLESS(x, y) isless(x, y)
29#else // !FLOAT_DATA_TYPE
30#if defined(WIDTH)
31#define ISGREATER(x, y) (x > y) ? 1 : 0
32#define ISLESS(x, y) (x < y) ? 1 : 0
33#else // !defined(WIDTH)
34#define ISGREATER(x, y) select((int16)0, (int16)-1, x > y)
35#define ISLESS(x, y) select((int16)0, (int16)-1, x < y)
36#endif // defined(WIDTH)
37#endif // FLOAT_DATA_TYPE
38
Michalis Spyrou04f089c2017-08-08 17:42:38 +010039/** Calculate square sum of a vector
40 *
41 * @param[in] input Pointer to the first pixel.
42 *
43 * @return square sum of vector.
44 */
45inline DATA_TYPE square_sum(__global const DATA_TYPE *input)
46{
47 VEC_DATA_TYPE(DATA_TYPE, 16)
48 in = vload16(0, input);
49
50 in *= in;
51
52 in.s01234567 += in.s89ABCDEF;
53 in.s0123 += in.s4567;
54 in.s01 += in.s23;
55
56 return (in.s0 + in.s1);
57}
58
59/** Calculate sum of a vector
60 *
61 * @param[in] input Pointer to the first pixel.
62 *
63 * @return sum of vector.
64 */
65inline DATA_TYPE sum(__global const DATA_TYPE *input)
66{
67 VEC_DATA_TYPE(DATA_TYPE, 16)
68 in = vload16(0, input);
69
70 in.s01234567 += in.s89ABCDEF;
71 in.s0123 += in.s4567;
72 in.s01 += in.s23;
73
74 return (in.s0 + in.s1);
75}
Manuel Bottinib412fab2018-12-10 17:40:23 +000076
77/** Calculate product of a vector
78 *
79 * @param[in] input Pointer to the first pixel.
80 *
81 * @return product of vector.
82 */
83inline DATA_TYPE product(__global const DATA_TYPE *input)
84{
85 VEC_DATA_TYPE(DATA_TYPE, 16)
86 in = vload16(0, input);
87
88 in.s01234567 *= in.s89ABCDEF;
89 in.s0123 *= in.s4567;
90 in.s01 *= in.s23;
91
92 return (in.s0 * in.s1);
93}
Michalis Spyrou7930db42018-11-22 17:36:28 +000094#if defined(OPERATION)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010095/** This kernel performs parallel reduction given an operation on x-axis.
Michalis Spyrou04f089c2017-08-08 17:42:38 +010096 *
97 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou04f089c2017-08-08 17:42:38 +010098 * @note The operation we want to perform must be passed at compile time using -DOPERATION e.g. -DOPERATION=square_sum
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010099 * @note The mean flag must be passed at compile time using -DMEAN if we want to compute the mean value
Manuel Bottinib412fab2018-12-10 17:40:23 +0000100 * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100101 * @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 if we want to compute the mean value
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100102 *
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100103 * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100104 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
105 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000106 * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
107 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100108 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
Manuel Bottinib412fab2018-12-10 17:40:23 +0000109 * @param[in] partial_res_ptr The local buffer to hold partial result values. Supported data types: same as @p src_ptr
110 * @param[in] partial_res_stride_x Stride of the output tensor in X dimension (in bytes)
111 * @param[in] partial_res_step_x partial_res_stride_x * number of elements along X processed per workitem(in bytes)
112 * @param[in] partial_res_stride_y Stride of the output tensor in Y dimension (in bytes)
113 * @param[in] partial_res_step_y partial_res_stride_y * number of elements along Y processed per workitem(in bytes)
114 * @param[in] partial_res_offset_first_element_in_bytes The offset of the first element in the source tensor
115 * @param[in] local_results Local buffer for storing the partial result
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100116 */
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100117__kernel void reduction_operation_x(
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000118 IMAGE_DECLARATION(src),
Manuel Bottinib412fab2018-12-10 17:40:23 +0000119 IMAGE_DECLARATION(partial_res),
120 __local DATA_TYPE *local_results)
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100121{
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000122 Image src = CONVERT_TO_IMAGE_STRUCT(src);
Manuel Bottinib412fab2018-12-10 17:40:23 +0000123 Image partial_res = CONVERT_TO_IMAGE_STRUCT(partial_res);
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100124
125 unsigned int lsize = get_local_size(0);
126 unsigned int lid = get_local_id(0);
127
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000128 for(unsigned int y = 0; y < get_local_size(1); ++y)
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100129 {
Manuel Bottinib412fab2018-12-10 17:40:23 +0000130 local_results[lid] = OPERATION((__global DATA_TYPE *)offset(&src, 0, y));
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100131 barrier(CLK_LOCAL_MEM_FENCE);
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100132
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000133 // Perform parallel reduction
134 for(unsigned int i = lsize >> 1; i > 0; i >>= 1)
135 {
136 if(lid < i)
137 {
Manuel Bottinib412fab2018-12-10 17:40:23 +0000138#if defined(PROD)
139 local_results[lid] *= local_results[lid + i];
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100140#else // !defined(PROD)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000141 local_results[lid] += local_results[lid + i];
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100142#endif // defined(PROD)
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000143 }
144 barrier(CLK_LOCAL_MEM_FENCE);
145 }
146
147 if(lid == 0)
148 {
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100149#if defined(MEAN) && defined(WIDTH)
150 if(y == get_local_size(1) - 1)
151 {
Manuel Bottinib412fab2018-12-10 17:40:23 +0000152 local_results[0] /= WIDTH;
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100153 }
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100154#endif // defined(MEAN) && defined(WIDTH)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000155 ((__global DATA_TYPE *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0];
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000156 }
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100157 }
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100158}
Michalis Spyrou7930db42018-11-22 17:36:28 +0000159#endif // defined(OPERATION)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100160
161#if defined(WIDTH)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000162/** This kernel performs reduction on x-axis. (Non parallel)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100163 *
Michalis Spyrou7930db42018-11-22 17:36:28 +0000164 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100165 * @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128
Manuel Bottinib412fab2018-12-10 17:40:23 +0000166 * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou7930db42018-11-22 17:36:28 +0000167 * @note In case of ARG_MIN and ARG_MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100168 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100169 * @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8 for operation MEAN
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100170 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
171 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
172 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
173 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
174 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
175 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
176 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
177 */
Michalis Spyrou7930db42018-11-22 17:36:28 +0000178__kernel void reduction_operation_non_parallel_x(
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100179 VECTOR_DECLARATION(src),
180 VECTOR_DECLARATION(output))
181{
182 Vector src = CONVERT_TO_VECTOR_STRUCT(src);
183 Vector output = CONVERT_TO_VECTOR_STRUCT(output);
184
Michalis Spyrou7930db42018-11-22 17:36:28 +0000185 DATA_TYPE_PROMOTED res = *((__global DATA_TYPE *)vector_offset(&src, 0));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100186
Michalis Spyrou7930db42018-11-22 17:36:28 +0000187#if defined(ARG_MAX) || defined(ARG_MIN)
188 uint indx = 0;
189#endif // defined(ARG_MAX) || defined(ARG_MIN)
190
191 for(unsigned int x = 1; x < WIDTH; ++x)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100192 {
Michalis Spyrou7930db42018-11-22 17:36:28 +0000193 DATA_TYPE_PROMOTED in = *((__global DATA_TYPE *)vector_offset(&src, x));
194#if defined(ARG_MAX)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100195 indx = select(indx, x, ISGREATER(in, res));
196 res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000197#elif defined(ARG_MIN)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100198 indx = select(indx, x, ISLESS(in, res));
199 res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
Usama Arifb2890502019-05-21 11:48:37 +0100200#elif defined(MIN)
Usama Arif048b0f32019-05-22 16:32:27 +0100201 res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
202#elif defined(MAX)
203 res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000204#else // !(defined(ARG_MAX) || defined(ARG_MIN))
205 res += in;
206#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100207 }
208
Michalis Spyrou7930db42018-11-22 17:36:28 +0000209 // Store result
210#if defined(ARG_MAX) || defined(ARG_MIN)
211 *((__global uint *)output.ptr) = indx;
212#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100213#if defined(MEAN)
214 res /= WIDTH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000215#endif // defined(MEAN)
Usama Arif048b0f32019-05-22 16:32:27 +0100216#if defined(MIN) || defined(MAX)
Usama Arifb2890502019-05-21 11:48:37 +0100217 *((__global DATA_TYPE_PROMOTED *)output.ptr) = res;
Usama Arif048b0f32019-05-22 16:32:27 +0100218#else // defined(MIN) || defined(MAX)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100219 *((__global uchar *)output.ptr) = convert_uchar(res);
Usama Arif048b0f32019-05-22 16:32:27 +0100220#endif // defined(MIN) || defined(MAX)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000221#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100222}
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100223#endif // defined(WIDTH)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100224
225#if defined(HEIGHT)
226/** This kernel performs reduction on y-axis.
227 *
Michalis Spyrou7930db42018-11-22 17:36:28 +0000228 * @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100229 * @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
230 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100231 * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100232 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
233 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
234 * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
235 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
236 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
237 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
238 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
239 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
240 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
241 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
242 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
243 */
244__kernel void reduction_operation_y(
245 IMAGE_DECLARATION(src),
246 IMAGE_DECLARATION(output))
247{
248 Image src = CONVERT_TO_IMAGE_STRUCT(src);
249 Image output = CONVERT_TO_IMAGE_STRUCT(output);
250
251 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000252 res = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100253
Michalis Spyrou7930db42018-11-22 17:36:28 +0000254#if defined(SUM_SQUARE)
255 res *= res;
256#endif // defined(SUM_SQUARE)
257
258#if defined(ARG_MAX) || defined(ARG_MIN)
259 uint16 indx = 0;
260#endif // defined(ARG_MAX) || defined(ARG_MIN)
261
262 for(unsigned int y = 1; y < HEIGHT; ++y)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100263 {
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100264 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
265 in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000266#if defined(ARG_MAX)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100267 uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000268 indx = select(indx, y, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100269 res = select(res, in, ISGREATER(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000270#elif defined(ARG_MIN)
Usama Arif048b0f32019-05-22 16:32:27 +0100271 uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
272 indx = select(indx, y, cond_conv);
273 res = select(res, in, ISLESS(in, res));
Usama Arifb2890502019-05-21 11:48:37 +0100274#elif defined(MIN)
Usama Arif048b0f32019-05-22 16:32:27 +0100275 res = select(res, in, ISLESS(in, res));
276#elif defined(MAX)
277 res = select(res, in, ISGREATER(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000278#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100279#if defined(SUM_SQUARE)
280 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000281#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000282#if defined(PROD)
283 res *= in;
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100284#else // !defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100285 res += in;
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100286#endif // defined(PROD)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000287#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100288 }
289
Michalis Spyrou7930db42018-11-22 17:36:28 +0000290 // Store result
291#if defined(ARG_MAX) || defined(ARG_MIN)
292 vstore16(indx, 0, (__global uint *)output.ptr);
293#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100294#if defined(MEAN)
295 res /= HEIGHT;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000296#endif // defined(MEAN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100297 vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000298#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100299}
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100300#endif // defined(HEIGHT)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100301
302#if defined(DEPTH)
303/** This kernel performs reduction on z-axis.
304 *
305 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
306 * @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
307 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100308 * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100309 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
310 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
311 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
312 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
313 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
314 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
315 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
316 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
317 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
318 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
319 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
320 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
321 * @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
322 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
323 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
324 */
325__kernel void reduction_operation_z(
326 TENSOR3D_DECLARATION(input),
327 TENSOR3D_DECLARATION(output))
328{
329 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
330 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
331
332 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000333 res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100334
Georgios Pinitas8be91482019-03-26 17:23:28 +0000335#if defined(COMPLEX)
336 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
337 res1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
338#endif // defined(COMPLEX)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000339#if defined(SUM_SQUARE)
340 res *= res;
341#endif // defined(SUM_SQUARE)
342
343#if defined(ARG_MAX) || defined(ARG_MIN)
344 uint16 indx = 0;
345#endif // defined(ARG_MAX) || defined(ARG_MIN)
346
347 for(unsigned int z = 1; z < DEPTH; ++z)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100348 {
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100349 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
350 in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000351
Georgios Pinitas8be91482019-03-26 17:23:28 +0000352#if defined(COMPLEX)
353 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
354 in1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
355#endif // defined(COMPLEX)
356
Michalis Spyrou7930db42018-11-22 17:36:28 +0000357#if defined(ARG_MAX)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100358 uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000359 indx = select(indx, z, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100360 res = select(res, in, ISGREATER(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000361#elif defined(ARG_MIN)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100362 uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000363 indx = select(indx, z, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100364 res = select(res, in, ISLESS(in, res));
Usama Arifb2890502019-05-21 11:48:37 +0100365#elif defined(MIN)
Usama Arif048b0f32019-05-22 16:32:27 +0100366 res = select(res, in, ISLESS(in, res));
367#elif defined(MAX)
368 res = select(res, in, ISGREATER(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000369#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100370#if defined(SUM_SQUARE)
371 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000372#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000373#if defined(PROD)
374 res *= in;
Georgios Pinitas8be91482019-03-26 17:23:28 +0000375#else //!defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100376 res += in;
Georgios Pinitas8be91482019-03-26 17:23:28 +0000377#if defined(COMPLEX)
378 res1 += in1;
379#endif // defined(COMPLEX)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000380#endif //defined(PROD)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000381#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100382 }
383
Michalis Spyrou7930db42018-11-22 17:36:28 +0000384 // Store result
385#if defined(ARG_MAX) || defined(ARG_MIN)
386 vstore16(indx, 0, (__global uint *)output.ptr);
387#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100388#if defined(MEAN)
389 res /= DEPTH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000390#endif // defined(MEAN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100391 vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Georgios Pinitas8be91482019-03-26 17:23:28 +0000392#if defined(COMPLEX)
393 vstore16(CONVERT(res1, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)tensor3D_offset(&output, 8, 0, 0));
394#endif // defined(COMPLEX)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000395#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100396}
397#endif /* defined(DEPTH) */
398
399#if defined(BATCH) && defined(DEPTH)
400/** This kernel performs reduction on w-axis.
401 *
402 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
403 * @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
404 * @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
405 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100406 * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100407 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
408 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
409 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
410 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
411 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
412 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
413 * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
414 * @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
415 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
416 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
417 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
418 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
419 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
420 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
421 * @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
422 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
423 * @param[in] output_stride_w Stride of the output tensor in W dimension (in bytes)
424 * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
425 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
426 */
427__kernel void reduction_operation_w(
428 TENSOR4D_DECLARATION(input),
429 TENSOR4D_DECLARATION(output))
430{
431 Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH);
432 Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH);
433
434 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000435 res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100436
Michalis Spyrou7930db42018-11-22 17:36:28 +0000437#if defined(SUM_SQUARE)
438 res *= res;
439#endif // defined(SUM_SQUARE)
440
441#if defined(ARG_MAX) || defined(ARG_MIN)
442 uint16 indx = 0;
443#endif // defined(ARG_MAX) || defined(ARG_MIN)
444
445 for(unsigned int w = 1; w < BATCH; ++w)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100446 {
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100447 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
448 in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000449
450#if defined(ARG_MAX)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100451 uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000452 indx = select(indx, w, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100453 res = select(res, in, ISGREATER(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000454#elif defined(ARG_MIN)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100455 uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000456 indx = select(indx, w, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100457 res = select(res, in, ISLESS(in, res));
Usama Arifb2890502019-05-21 11:48:37 +0100458#elif defined(MIN)
Usama Arif048b0f32019-05-22 16:32:27 +0100459 res = select(res, in, ISLESS(in, res));
460#elif defined(MAX)
461 res = select(res, in, ISGREATER(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000462#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100463#if defined(SUM_SQUARE)
464 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000465#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000466#if defined(PROD)
467 res *= in;
468#else //!defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100469 res += in;
Manuel Bottinib412fab2018-12-10 17:40:23 +0000470#endif //defined(PROD)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000471#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100472 }
473
Michalis Spyrou7930db42018-11-22 17:36:28 +0000474 // Store result
475#if defined(ARG_MAX) || defined(ARG_MIN)
476 vstore16(indx, 0, (__global uint *)output.ptr);
477#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100478#if defined(MEAN)
479 res /= BATCH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000480#endif // defined(MEAN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100481 vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000482#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100483}
Manuel Bottinib412fab2018-12-10 17:40:23 +0000484#endif /* defined(BATCH) && defined(DEPTH) */