blob: 0c393345e2c7747b06669864311a2233c891cb6a [file] [log] [blame]
Michalis Spyrou04f089c2017-08-08 17:42:38 +01001/*
Michalis Spyrou6c89ffa2020-01-24 12:05:05 +00002 * Copyright (c) 2016-2020 ARM Limited.
Michalis Spyrou04f089c2017-08-08 17:42:38 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25
Michalis Spyrou7317e392020-01-17 11:27:49 +000026#if defined(FLOAT_DATA_TYPE)
27#define ISGREATER(x, y) isgreater(x, y)
28#define ISLESS(x, y) isless(x, y)
29#else // !FLOAT_DATA_TYPE
30#if defined(WIDTH)
31#define ISGREATER(x, y) (x > y) ? 1 : 0
32#define ISLESS(x, y) (x < y) ? 1 : 0
33#else // !defined(WIDTH)
Michalis Spyrou6c89ffa2020-01-24 12:05:05 +000034#define ISGREATER(x, y) select((int16)0, (int16)-1, x > y)
35#define ISLESS(x, y) select((int16)0, (int16)-1, x < y)
Michalis Spyrou7317e392020-01-17 11:27:49 +000036#endif // defined(WIDTH)
37#endif // defined(FLOAT_DATA_TYPE)
38
Michalis Spyrou04f089c2017-08-08 17:42:38 +010039/** Calculate square sum of a vector
40 *
41 * @param[in] input Pointer to the first pixel.
42 *
43 * @return square sum of vector.
44 */
45inline DATA_TYPE square_sum(__global const DATA_TYPE *input)
46{
47 VEC_DATA_TYPE(DATA_TYPE, 16)
48 in = vload16(0, input);
49
50 in *= in;
51
52 in.s01234567 += in.s89ABCDEF;
53 in.s0123 += in.s4567;
54 in.s01 += in.s23;
55
56 return (in.s0 + in.s1);
57}
58
59/** Calculate sum of a vector
60 *
61 * @param[in] input Pointer to the first pixel.
62 *
63 * @return sum of vector.
64 */
65inline DATA_TYPE sum(__global const DATA_TYPE *input)
66{
67 VEC_DATA_TYPE(DATA_TYPE, 16)
68 in = vload16(0, input);
69
70 in.s01234567 += in.s89ABCDEF;
71 in.s0123 += in.s4567;
72 in.s01 += in.s23;
73
74 return (in.s0 + in.s1);
75}
Manuel Bottinib412fab2018-12-10 17:40:23 +000076
77/** Calculate product of a vector
78 *
79 * @param[in] input Pointer to the first pixel.
80 *
81 * @return product of vector.
82 */
83inline DATA_TYPE product(__global const DATA_TYPE *input)
84{
85 VEC_DATA_TYPE(DATA_TYPE, 16)
86 in = vload16(0, input);
87
88 in.s01234567 *= in.s89ABCDEF;
89 in.s0123 *= in.s4567;
90 in.s01 *= in.s23;
91
92 return (in.s0 * in.s1);
93}
Manuel Bottini34f88dd2019-10-18 10:37:46 +000094#if defined(OPERATION)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010095/** This kernel performs parallel reduction given an operation on x-axis.
Michalis Spyrou04f089c2017-08-08 17:42:38 +010096 *
97 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou04f089c2017-08-08 17:42:38 +010098 * @note The operation we want to perform must be passed at compile time using -DOPERATION e.g. -DOPERATION=square_sum
Michalis Spyrou7e9391b2018-10-05 14:49:28 +010099 * @note The mean flag must be passed at compile time using -DMEAN if we want to compute the mean value
Manuel Bottinib412fab2018-12-10 17:40:23 +0000100 * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100101 * @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128 if we want to compute the mean value
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100102 *
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100103 * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100104 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
105 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000106 * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
107 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100108 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
Manuel Bottinib412fab2018-12-10 17:40:23 +0000109 * @param[in] partial_res_ptr The local buffer to hold partial result values. Supported data types: same as @p src_ptr
110 * @param[in] partial_res_stride_x Stride of the output tensor in X dimension (in bytes)
111 * @param[in] partial_res_step_x partial_res_stride_x * number of elements along X processed per workitem(in bytes)
112 * @param[in] partial_res_stride_y Stride of the output tensor in Y dimension (in bytes)
113 * @param[in] partial_res_step_y partial_res_stride_y * number of elements along Y processed per workitem(in bytes)
114 * @param[in] partial_res_offset_first_element_in_bytes The offset of the first element in the source tensor
115 * @param[in] local_results Local buffer for storing the partial result
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100116 */
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100117__kernel void reduction_operation_x(
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000118 IMAGE_DECLARATION(src),
Manuel Bottinib412fab2018-12-10 17:40:23 +0000119 IMAGE_DECLARATION(partial_res),
Manuel Bottini34f88dd2019-10-18 10:37:46 +0000120 __local DATA_TYPE *local_results)
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100121{
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000122 Image src = CONVERT_TO_IMAGE_STRUCT(src);
Manuel Bottinib412fab2018-12-10 17:40:23 +0000123 Image partial_res = CONVERT_TO_IMAGE_STRUCT(partial_res);
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100124
125 unsigned int lsize = get_local_size(0);
126 unsigned int lid = get_local_id(0);
127
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000128 for(unsigned int y = 0; y < get_local_size(1); ++y)
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100129 {
Manuel Bottini34f88dd2019-10-18 10:37:46 +0000130 local_results[lid] = OPERATION((__global DATA_TYPE *)offset(&src, 0, y));
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100131 barrier(CLK_LOCAL_MEM_FENCE);
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100132
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000133 // Perform parallel reduction
134 for(unsigned int i = lsize >> 1; i > 0; i >>= 1)
135 {
136 if(lid < i)
137 {
Manuel Bottinib412fab2018-12-10 17:40:23 +0000138#if defined(PROD)
139 local_results[lid] *= local_results[lid + i];
Manuel Bottini34f88dd2019-10-18 10:37:46 +0000140#else // !defined(PROD)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000141 local_results[lid] += local_results[lid + i];
Manuel Bottini34f88dd2019-10-18 10:37:46 +0000142#endif // defined(PROD)
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000143 }
144 barrier(CLK_LOCAL_MEM_FENCE);
145 }
146
147 if(lid == 0)
148 {
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100149#if defined(MEAN) && defined(WIDTH)
150 if(y == get_local_size(1) - 1)
151 {
Manuel Bottinib412fab2018-12-10 17:40:23 +0000152 local_results[0] /= WIDTH;
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100153 }
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100154#endif // defined(MEAN) && defined(WIDTH)
Manuel Bottini34f88dd2019-10-18 10:37:46 +0000155 ((__global DATA_TYPE *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0];
Michalis Spyrouf6402dd2018-01-26 15:06:19 +0000156 }
Michalis Spyrou04f089c2017-08-08 17:42:38 +0100157 }
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100158}
Manuel Bottini34f88dd2019-10-18 10:37:46 +0000159#endif // defined(OPERATION)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100160
161#if defined(WIDTH)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000162/** This kernel performs reduction on x-axis. (Non parallel)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100163 *
Michalis Spyrou7930db42018-11-22 17:36:28 +0000164 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100165 * @note The width size must be passed at compile time using -DWIDTH e.g. -DWIDTH=128
Manuel Bottinib412fab2018-12-10 17:40:23 +0000166 * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100167 * @note In case of MIN and MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100168 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100169 * @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8 for operation MEAN
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100170 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
171 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
172 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
173 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
174 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
175 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
176 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
177 */
Michalis Spyrou7930db42018-11-22 17:36:28 +0000178__kernel void reduction_operation_non_parallel_x(
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100179 VECTOR_DECLARATION(src),
180 VECTOR_DECLARATION(output))
181{
182 Vector src = CONVERT_TO_VECTOR_STRUCT(src);
183 Vector output = CONVERT_TO_VECTOR_STRUCT(output);
184
Michalis Spyrou7930db42018-11-22 17:36:28 +0000185 DATA_TYPE_PROMOTED res = *((__global DATA_TYPE *)vector_offset(&src, 0));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100186
Michalis Spyrou7930db42018-11-22 17:36:28 +0000187 for(unsigned int x = 1; x < WIDTH; ++x)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100188 {
Michalis Spyrou7930db42018-11-22 17:36:28 +0000189 DATA_TYPE_PROMOTED in = *((__global DATA_TYPE *)vector_offset(&src, x));
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100190#if defined(MIN)
Usama Arif048b0f32019-05-22 16:32:27 +0100191 res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE));
192#elif defined(MAX)
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100193 res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE));
194#else // !(defined(MAX) || defined(MIN))
Michalis Spyrou7930db42018-11-22 17:36:28 +0000195 res += in;
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100196#endif // defined(MAX) || defined(MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100197 }
198
Michalis Spyrou7930db42018-11-22 17:36:28 +0000199 // Store result
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100200#if defined(MEAN)
201 res /= WIDTH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000202#endif // defined(MEAN)
Usama Arif048b0f32019-05-22 16:32:27 +0100203#if defined(MIN) || defined(MAX)
Usama Arifb2890502019-05-21 11:48:37 +0100204 *((__global DATA_TYPE_PROMOTED *)output.ptr) = res;
Usama Arif048b0f32019-05-22 16:32:27 +0100205#else // defined(MIN) || defined(MAX)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100206 *((__global uchar *)output.ptr) = convert_uchar(res);
Usama Arif048b0f32019-05-22 16:32:27 +0100207#endif // defined(MIN) || defined(MAX)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100208}
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100209#endif // defined(WIDTH)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100210
211#if defined(HEIGHT)
212/** This kernel performs reduction on y-axis.
213 *
Michalis Spyrou7930db42018-11-22 17:36:28 +0000214 * @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100215 * @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128
216 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100217 * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100218 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes)
219 * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
220 * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
221 * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
222 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
223 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p src_ptt
224 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
225 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
226 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
227 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
228 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
229 */
230__kernel void reduction_operation_y(
231 IMAGE_DECLARATION(src),
232 IMAGE_DECLARATION(output))
233{
234 Image src = CONVERT_TO_IMAGE_STRUCT(src);
235 Image output = CONVERT_TO_IMAGE_STRUCT(output);
236
237 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000238 res = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100239
Michalis Spyrou7930db42018-11-22 17:36:28 +0000240#if defined(SUM_SQUARE)
241 res *= res;
242#endif // defined(SUM_SQUARE)
243
Michalis Spyrou7930db42018-11-22 17:36:28 +0000244 for(unsigned int y = 1; y < HEIGHT; ++y)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100245 {
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100246 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
247 in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100248#if defined(MIN)
Usama Arif048b0f32019-05-22 16:32:27 +0100249 res = select(res, in, ISLESS(in, res));
250#elif defined(MAX)
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100251 res = select(res, in, ISGREATER(in, res));
252#else // !(defined(MAX) || defined(MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100253#if defined(SUM_SQUARE)
254 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000255#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000256#if defined(PROD)
257 res *= in;
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100258#else // !defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100259 res += in;
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100260#endif // defined(PROD)
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100261#endif // defined(MAX) || defined(MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100262 }
263
Michalis Spyrou7930db42018-11-22 17:36:28 +0000264 // Store result
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100265#if defined(MEAN)
266 res /= HEIGHT;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000267#endif // defined(MEAN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100268 vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
269}
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100270#endif // defined(HEIGHT)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100271
272#if defined(DEPTH)
273/** This kernel performs reduction on z-axis.
274 *
275 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
276 * @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128
277 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100278 * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100279 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
280 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
281 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
282 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
283 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
284 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
285 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
286 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
287 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
288 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
289 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
290 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
291 * @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
292 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
293 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
294 */
295__kernel void reduction_operation_z(
296 TENSOR3D_DECLARATION(input),
297 TENSOR3D_DECLARATION(output))
298{
299 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
300 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
301
302 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000303 res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100304
Georgios Pinitas8be91482019-03-26 17:23:28 +0000305#if defined(COMPLEX)
306 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
307 res1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
308#endif // defined(COMPLEX)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000309#if defined(SUM_SQUARE)
310 res *= res;
311#endif // defined(SUM_SQUARE)
312
Michalis Spyrou7930db42018-11-22 17:36:28 +0000313 for(unsigned int z = 1; z < DEPTH; ++z)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100314 {
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100315 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
316 in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000317
Georgios Pinitas8be91482019-03-26 17:23:28 +0000318#if defined(COMPLEX)
319 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
320 in1 = CONVERT(vload16(0, (__global DATA_TYPE *)tensor3D_offset(&input, 8, 0, z)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
321#endif // defined(COMPLEX)
322
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100323#if defined(MIN)
Usama Arif048b0f32019-05-22 16:32:27 +0100324 res = select(res, in, ISLESS(in, res));
325#elif defined(MAX)
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100326 res = select(res, in, ISGREATER(in, res));
327#else // !(defined(MAX) || defined(MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100328#if defined(SUM_SQUARE)
329 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000330#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000331#if defined(PROD)
332 res *= in;
Georgios Pinitas8be91482019-03-26 17:23:28 +0000333#else //!defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100334 res += in;
Georgios Pinitas8be91482019-03-26 17:23:28 +0000335#if defined(COMPLEX)
336 res1 += in1;
337#endif // defined(COMPLEX)
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100338#endif // defined(PROD)
339#endif // defined(MAX) || defined(MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100340 }
341
Michalis Spyrou7930db42018-11-22 17:36:28 +0000342 // Store result
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100343#if defined(MEAN)
344 res /= DEPTH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000345#endif // defined(MEAN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100346 vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
Georgios Pinitas8be91482019-03-26 17:23:28 +0000347#if defined(COMPLEX)
348 vstore16(CONVERT(res1, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)tensor3D_offset(&output, 8, 0, 0));
349#endif // defined(COMPLEX)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100350}
351#endif /* defined(DEPTH) */
352
353#if defined(BATCH) && defined(DEPTH)
354/** This kernel performs reduction on w-axis.
355 *
356 * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float
357 * @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128
Manuel Bottini34f88dd2019-10-18 10:37:46 +0000358 * @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100359 *
Michalis Spyroub9626ab2019-05-13 17:41:01 +0100360 * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100361 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
362 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
363 * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes)
364 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
365 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
366 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
367 * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
368 * @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
369 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source tensor
370 * @param[in] output_ptr The local buffer to hold sumed values. Supported data types: same as @p input_ptt
371 * @param[in] output_stride_x Stride of the output tensor in X dimension (in bytes)
372 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
373 * @param[in] output_stride_y Stride of the output tensor in Y dimension (in bytes)
374 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
375 * @param[in] output_stride_z Stride of the output tensor in Z dimension (in bytes)
376 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
377 * @param[in] output_stride_w Stride of the output tensor in W dimension (in bytes)
378 * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
379 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the source tensor
380 */
381__kernel void reduction_operation_w(
382 TENSOR4D_DECLARATION(input),
383 TENSOR4D_DECLARATION(output))
384{
385 Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH);
386 Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DEPTH);
387
388 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
Michalis Spyrou7930db42018-11-22 17:36:28 +0000389 res = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, 0)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100390
Michalis Spyrou7930db42018-11-22 17:36:28 +0000391#if defined(SUM_SQUARE)
392 res *= res;
393#endif // defined(SUM_SQUARE)
394
Michalis Spyrou7930db42018-11-22 17:36:28 +0000395 for(unsigned int w = 1; w < BATCH; ++w)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100396 {
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100397 VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)
398 in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16));
Michalis Spyrou7930db42018-11-22 17:36:28 +0000399
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100400#if defined(MIN)
Usama Arif048b0f32019-05-22 16:32:27 +0100401 res = select(res, in, ISLESS(in, res));
402#elif defined(MAX)
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100403 res = select(res, in, ISGREATER(in, res));
404#else // !(defined(MAX) || defined(MIN))
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100405#if defined(SUM_SQUARE)
406 in *= in;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000407#endif // defined(SUM_SQUARE)
Manuel Bottinib412fab2018-12-10 17:40:23 +0000408#if defined(PROD)
409 res *= in;
410#else //!defined(PROD)
Michalis Spyrou8aaf93e2018-10-11 17:33:32 +0100411 res += in;
Manuel Bottinib412fab2018-12-10 17:40:23 +0000412#endif //defined(PROD)
Manuel Bottini7b9998d2019-10-21 17:59:07 +0100413#endif // defined(MAX) || defined(MIN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100414 }
415
Michalis Spyrou7930db42018-11-22 17:36:28 +0000416 // Store result
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100417#if defined(MEAN)
418 res /= BATCH;
Michalis Spyrou7930db42018-11-22 17:36:28 +0000419#endif // defined(MEAN)
Michalis Spyrou7e9391b2018-10-05 14:49:28 +0100420 vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr);
421}
Manuel Bottinib412fab2018-12-10 17:40:23 +0000422#endif /* defined(BATCH) && defined(DEPTH) */