blob: 749e3cdaa326540c574004207b92b8a7ca6e24de [file] [log] [blame]
Michalis Spyrou04f089c2017-08-08 17:42:38 +01001/*
Manuel Bottinib412fab2018-12-10 17:40:23 +00002 * Copyright (c) 2016-2019 ARM Limited.
Michalis Spyrou04f089c2017-08-08 17:42:38 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25
Michalis Spyroub9626ab2019-05-13 17:41:01 +010026#if FLOAT_DATA_TYPE
27#define ISGREATER(x, y) isgreater(x, y)
28#define ISLESS(x, y) isless(x, y)
29#else // !FLOAT_DATA_TYPE
30#if defined(WIDTH)
31#define ISGREATER(x, y) (x > y) ? 1 : 0
32#define ISLESS(x, y) (x < y) ? 1 : 0
33#else // !defined(WIDTH)
34#define ISGREATER(x, y) select((int16)0, (int16)-1, x > y)
35#define ISLESS(x, y) select((int16)0, (int16)-1, x < y)
36#endif // defined(WIDTH)
37#endif // FLOAT_DATA_TYPE
38
Michalis Spyrou04f089c2017-08-08 17:42:38 +010039/** Calculate square sum of a vector
40 *
41 * @param[in] input Pointer to the first pixel.
42 *
43 * @return square sum of vector.
44 */
45inline DATA_TYPE square_sum(__global const DATA_TYPE *input)
46{
47 VEC_DATA_TYPE(DATA_TYPE, 16)
48 in = vload16(0, input);
49
50 in *= in;
51
52 in.s01234567 += in.s89ABCDEF;
53 in.s0123 += in.s4567;
54 in.s01 += in.s23;
55
56 return (in.s0 + in.s1);
57}
58
59/** Calculate sum of a vector
60 *
61 * @param[in] input Pointer to the first pixel.
62 *
63 * @return sum of vector.
64 */
65inline DATA_TYPE sum(__global const DATA_TYPE *input)
66{
67 VEC_DATA_TYPE(DATA_TYPE, 16)
68 in = vload16(0, input);
69
70 in.s01234567 += in.s89ABCDEF;
71 in.s0123 += in.s4567;
72 in.s01 += in.s23;
73
74 return (in.s0 + in.s1);
75}
Manuel Bottinib412fab2018-12-10 17:40:23 +000076
77/** Calculate product of a vector
78 *
79 * @param[in] input Pointer to the first pixel.
80 *
81 * @return product of vector.
82 */
83inline DATA_TYPE product(__global const DATA_TYPE *input)
84{
85 VEC_DATA_TYPE(DATA_TYPE, 16)
86 in = vload16(0, input);
87
88 in.s01234567 *= in.s89ABCDEF;
89 in.s0123 *= in.s4567;
90 in.s01 *= in.s23;
91
92 return (in.s0 * in.s1);
93}
Michalis Spyrou7930db42018-11-22 17:36:28 +000094#if defined(OPERATION)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010095/** This kernel performs parallel reduction given an operation on x-axis.
Michalis Spyrou04f089c2017-08-08 17:42:38 +010096 *
97 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou04f089c2017-08-08 17:42:38 +010098 * @note The operation we want to perform must be passed at compile time using -DOPERATION e.g. -DOPERATION=square_sum
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010099 * @note The mean flag must be passed at compile time using -DMEAN if we want to compute the mean value
Manuel Bottinib412fab2018-12-10 17:40:23 +0000100 * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100101 * @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 if we want to compute the mean value
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100102 *
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100103 * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100104 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
105 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000106 * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
107 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100108 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
Manuel Bottinib412fab2018-12-10 17:40:23 +0000109 * @param[in] partial_res_ptr The local buffer to hold partial result values. Supported data types: same as @p src_ptr
110 * @param[in] partial_res_stride_x Stride of the output tensor in X dimension (in bytes)
111 * @param[in] partial_res_step_x partial_res_stride_x * number of elements along X processed per workitem(in bytes)
112 * @param[in] partial_res_stride_y Stride of the output tensor in Y dimension (in bytes)
113 * @param[in] partial_res_step_y partial_res_stride_y * number of elements along Y processed per workitem(in bytes)
114 * @param[in] partial_res_offset_first_element_in_bytes The offset of the first element in the source tensor
115 * @param[in] local_results Local buffer for storing the partial result
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100116 */
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100117__kernel void reduction_operation_x(
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000118 IMAGE_DECLARATION(src),
Manuel Bottinib412fab2018-12-10 17:40:23 +0000119 IMAGE_DECLARATION(partial_res),
120 __local DATA_TYPE *local_results)
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100121{
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000122 Image src = CONVERT_TO_IMAGE_STRUCT(src);
Manuel Bottinib412fab2018-12-10 17:40:23 +0000123 Image partial_res = CONVERT_TO_IMAGE_STRUCT(partial_res);
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100124
125 unsigned int lsize = get_local_size(0);
126 unsigned int lid = get_local_id(0);
127
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000128 for(unsigned int y = 0; y < get_local_size(1); ++y)
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100129 {
Manuel Bottinib412fab2018-12-10 17:40:23 +0000130 local_results[lid] = OPERATION((__global DATA_TYPE *)offset(&src, 0, y));
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100131 barrier(CLK_LOCAL_MEM_FENCE);
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100132
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000133 // Perform parallel reduction
134 for(unsigned int i = lsize >> 1; i > 0; i >>= 1)
135 {
136 if(lid < i)
137 {
Manuel Bottinib412fab2018-12-10 17:40:23 +0000138#if defined(PROD)
139 local_results[lid] *= local_results[lid + i];
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100140#else // !defined(PROD)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000141 local_results[lid] += local_results[lid + i];
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100142#endif // defined(PROD)
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000143 }
144 barrier(CLK_LOCAL_MEM_FENCE);
145 }
146
147 if(lid == 0)
148 {
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100149#if defined(MEAN) && defined(WIDTH)
150 if(y == get_local_size(1) - 1)
151 {
Manuel Bottinib412fab2018-12-10 17:40:23 +0000152 local_results[0] /= WIDTH;
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100153 }
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100154#endif // defined(MEAN) && defined(WIDTH)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000155 ((__global DATA_TYPE *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0];
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000156 }
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100157 }
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100158}
Michalis Spyrou7930db42018-11-22 17:36:28 +0000159#endif // defined(OPERATION)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100160
161#if defined(WIDTH)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000162/** This kernel performs reduction on x-axis. (Non parallel)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100163 *
Michalis Spyrou7930db42018-11-22 17:36:28 +0000164 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100165 * @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128
Manuel Bottinib412fab2018-12-10 17:40:23 +0000166 * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou7930db42018-11-22 17:36:28 +0000167 * @note In case of ARG_MIN and ARG_MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100168 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100169 * @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8 for operation MEAN
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100170 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
171 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
172 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
173 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
174 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
175 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
176 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
177 */
Michalis Spyrou7930db42018-11-22 17:36:28 +0000178__kernel void reduction_operation_non_parallel_x(
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100179 VECTOR_DECLARATION(src),
180 VECTOR_DECLARATION(output))
181{
182 Vector src = CONVERT_TO_VECTOR_STRUCT(src);
183 Vector output = CONVERT_TO_VECTOR_STRUCT(output);
184
Michalis Spyrou7930db42018-11-22 17:36:28 +0000185 DATA_TYPE_PROMOTED res = *((__global DATA_TYPE *)vector_offset(&src, 0));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100186
Michalis Spyrou7930db42018-11-22 17:36:28 +0000187#if defined(ARG_MAX) || defined(ARG_MIN)
188 uint indx = 0;
189#endif // defined(ARG_MAX) || defined(ARG_MIN)
190
191 for(unsigned int x = 1; x < WIDTH; ++x)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100192 {
Michalis Spyrou7930db42018-11-22 17:36:28 +0000193 DATA_TYPE_PROMOTED in = *((__global DATA_TYPE *)vector_offset(&src, x));
194#if defined(ARG_MAX)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100195 indx = select(indx, x, ISGREATER(in, res));
196 res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000197#elif defined(ARG_MIN)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100198 indx = select(indx, x, ISLESS(in, res));
199 res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000200#else // !(defined(ARG_MAX) || defined(ARG_MIN))
201 res += in;
202#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100203 }
204
Michalis Spyrou7930db42018-11-22 17:36:28 +0000205 // Store result
206#if defined(ARG_MAX) || defined(ARG_MIN)
207 *((__global uint *)output.ptr) = indx;
208#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100209#if defined(MEAN)
210 res /= WIDTH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000211#endif // defined(MEAN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100212 *((__global uchar *)output.ptr) = convert_uchar(res);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000213#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100214}
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100215#endif // defined(WIDTH)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100216
217#if defined(HEIGHT)
218/** This kernel performs reduction on y-axis.
219 *
Michalis Spyrou7930db42018-11-22 17:36:28 +0000220 * @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100221 * @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
222 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100223 * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100224 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
225 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
226 * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
227 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
228 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
229 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
230 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
231 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
232 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
233 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
234 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
235 */
236__kernel void reduction_operation_y(
237 IMAGE_DECLARATION(src),
238 IMAGE_DECLARATION(output))
239{
240 Image src = CONVERT_TO_IMAGE_STRUCT(src);
241 Image output = CONVERT_TO_IMAGE_STRUCT(output);
242
243 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000244 res = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100245
Michalis Spyrou7930db42018-11-22 17:36:28 +0000246#if defined(SUM_SQUARE)
247 res *= res;
248#endif // defined(SUM_SQUARE)
249
250#if defined(ARG_MAX) || defined(ARG_MIN)
251 uint16 indx = 0;
252#endif // defined(ARG_MAX) || defined(ARG_MIN)
253
254 for(unsigned int y = 1; y < HEIGHT; ++y)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100255 {
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100256 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
257 in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000258#if defined(ARG_MAX)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100259 uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000260 indx = select(indx, y, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100261 res = select(res, in, ISGREATER(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000262#elif defined(ARG_MIN)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100263 uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000264 indx = select(indx, y, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100265 res = select(res, in, ISLESS(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000266#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100267#if defined(SUM_SQUARE)
268 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000269#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000270#if defined(PROD)
271 res *= in;
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100272#else // !defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100273 res += in;
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100274#endif // defined(PROD)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000275#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100276 }
277
Michalis Spyrou7930db42018-11-22 17:36:28 +0000278 // Store result
279#if defined(ARG_MAX) || defined(ARG_MIN)
280 vstore16(indx, 0, (__global uint *)output.ptr);
281#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100282#if defined(MEAN)
283 res /= HEIGHT;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000284#endif // defined(MEAN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100285 vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000286#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100287}
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100288#endif // defined(HEIGHT)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100289
290#if defined(DEPTH)
291/** This kernel performs reduction on z-axis.
292 *
293 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
294 * @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
295 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100296 * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100297 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
298 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
299 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
300 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
301 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
302 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
303 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
304 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
305 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
306 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
307 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
308 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
309 * @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
310 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
311 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
312 */
313__kernel void reduction_operation_z(
314 TENSOR3D_DECLARATION(input),
315 TENSOR3D_DECLARATION(output))
316{
317 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
318 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
319
320 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000321 res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100322
Georgios Pinitas8be91482019-03-26 17:23:28 +0000323#if defined(COMPLEX)
324 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
325 res1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
326#endif // defined(COMPLEX)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000327#if defined(SUM_SQUARE)
328 res *= res;
329#endif // defined(SUM_SQUARE)
330
331#if defined(ARG_MAX) || defined(ARG_MIN)
332 uint16 indx = 0;
333#endif // defined(ARG_MAX) || defined(ARG_MIN)
334
335 for(unsigned int z = 1; z < DEPTH; ++z)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100336 {
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100337 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
338 in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000339
Georgios Pinitas8be91482019-03-26 17:23:28 +0000340#if defined(COMPLEX)
341 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
342 in1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
343#endif // defined(COMPLEX)
344
Michalis Spyrou7930db42018-11-22 17:36:28 +0000345#if defined(ARG_MAX)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100346 uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000347 indx = select(indx, z, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100348 res = select(res, in, ISGREATER(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000349#elif defined(ARG_MIN)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100350 uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000351 indx = select(indx, z, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100352 res = select(res, in, ISLESS(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000353#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100354#if defined(SUM_SQUARE)
355 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000356#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000357#if defined(PROD)
358 res *= in;
Georgios Pinitas8be91482019-03-26 17:23:28 +0000359#else //!defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100360 res += in;
Georgios Pinitas8be91482019-03-26 17:23:28 +0000361#if defined(COMPLEX)
362 res1 += in1;
363#endif // defined(COMPLEX)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000364#endif //defined(PROD)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000365#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100366 }
367
Michalis Spyrou7930db42018-11-22 17:36:28 +0000368 // Store result
369#if defined(ARG_MAX) || defined(ARG_MIN)
370 vstore16(indx, 0, (__global uint *)output.ptr);
371#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100372#if defined(MEAN)
373 res /= DEPTH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000374#endif // defined(MEAN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100375 vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Georgios Pinitas8be91482019-03-26 17:23:28 +0000376#if defined(COMPLEX)
377 vstore16(CONVERT(res1, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)tensor3D_offset(&output, 8, 0, 0));
378#endif // defined(COMPLEX)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000379#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100380}
381#endif /* defined(DEPTH) */
382
383#if defined(BATCH) && defined(DEPTH)
384/** This kernel performs reduction on w-axis.
385 *
386 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
387 * @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
388 * @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
389 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100390 * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100391 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
392 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
393 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
394 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
395 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
396 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
397 * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
398 * @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
399 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
400 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
401 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
402 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
403 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
404 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
405 * @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
406 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
407 * @param[in] output_stride_w Stride of the output tensor in W dimension (in bytes)
408 * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
409 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
410 */
411__kernel void reduction_operation_w(
412 TENSOR4D_DECLARATION(input),
413 TENSOR4D_DECLARATION(output))
414{
415 Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH);
416 Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH);
417
418 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000419 res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100420
Michalis Spyrou7930db42018-11-22 17:36:28 +0000421#if defined(SUM_SQUARE)
422 res *= res;
423#endif // defined(SUM_SQUARE)
424
425#if defined(ARG_MAX) || defined(ARG_MIN)
426 uint16 indx = 0;
427#endif // defined(ARG_MAX) || defined(ARG_MIN)
428
429 for(unsigned int w = 1; w < BATCH; ++w)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100430 {
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100431 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
432 in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000433
434#if defined(ARG_MAX)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100435 uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000436 indx = select(indx, w, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100437 res = select(res, in, ISGREATER(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000438#elif defined(ARG_MIN)
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100439 uint16 cond_conv = CONVERT(ISLESS(in, res), uint16);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000440 indx = select(indx, w, cond_conv);
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100441 res = select(res, in, ISLESS(in, res));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000442#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100443#if defined(SUM_SQUARE)
444 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000445#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000446#if defined(PROD)
447 res *= in;
448#else //!defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100449 res += in;
Manuel Bottinib412fab2018-12-10 17:40:23 +0000450#endif //defined(PROD)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000451#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100452 }
453
Michalis Spyrou7930db42018-11-22 17:36:28 +0000454 // Store result
455#if defined(ARG_MAX) || defined(ARG_MIN)
456 vstore16(indx, 0, (__global uint *)output.ptr);
457#else // !(defined(ARG_MAX) || defined(ARG_MIN))
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100458#if defined(MEAN)
459 res /= BATCH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000460#endif // defined(MEAN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100461 vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Michalis Spyrou7930db42018-11-22 17:36:28 +0000462#endif // defined(ARG_MAX) || defined(ARG_MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100463}
Manuel Bottinib412fab2018-12-10 17:40:23 +0000464#endif /* defined(BATCH) && defined(DEPTH) */