blob: 58d89871e3739d4d4181ad1442e3fb386a70301f [file] [log] [blame]
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +00001/*
Isabella Gottardia527e8c2018-01-31 17:49:25 +00002 * Copyright (c) 2017-2018 ARM Limited.
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25
26#if defined(POOL_AVG)
27#define POOL_OP(x, y) ((x) + (y))
28#else /* defined(POOL_AVG) */
29#define POOL_OP(x, y) (max((x), (y)))
30#endif /* defined(POOL_AVG) */
31
32#define DIV_OP(x, y) (x * (1.f / y))
33
Michalis Spyroue74b2012018-04-18 09:49:16 +010034#define DIV_OP_NHWC(x, y) (convert_float8(x) * (float8)(1.f / y))
35
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000036#if defined(POOL_L2)
37#error "L2 pooling is not supported"
38#endif /* defined(POOL_L2) */
39
Isabella Gottardia527e8c2018-01-31 17:49:25 +000040int calculate_avg_scale(const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000041 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
42{
43 int start_x = get_global_id(0) * stride_x - pad_x;
44 int start_y = get_global_id(1) * stride_y - pad_y;
Isabella Gottardia527e8c2018-01-31 17:49:25 +000045 const int end_x = min(start_x + pool_size_x, upper_bound_w);
46 const int end_y = min(start_y + pool_size_y, upper_bound_h);
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000047#if defined(EXCLUDE_PADDING)
48 start_x = max(0, start_x);
49 start_y = max(0, start_y);
50#endif /* defined(EXCLUDE_PADDING) */
51 return ((end_y - start_y) * (end_x - start_x));
52}
53
Michalis Spyroue74b2012018-04-18 09:49:16 +010054/** Performs a pooling function of pool size equal to N (NCHW)
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000055 *
Isabella Gottardia527e8c2018-01-31 17:49:25 +000056 * @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000057 * @note In case of average pooling the following information must be passed at compile time:
58 * -DPOOL_AVG must be provided otherwise max pooling will be performed.
59 * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
60 * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
61 * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
62 *
63 * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8
64 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
65 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
66 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
67 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
68 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
69 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
70 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
71 * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
72 * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
73 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
74 * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
75 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
76 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
77 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
78 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
79 */
Michalis Spyroue74b2012018-04-18 09:49:16 +010080__kernel void pooling_layer_MxN_quantized_nchw(
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000081 TENSOR3D_DECLARATION(input),
82 TENSOR3D_DECLARATION(output))
83{
84 // Get pixels pointer
85 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
86 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
87
88 int8 vdata = 0;
89 int sdata = 0;
90
91 // Load data
Isabella Gottardia527e8c2018-01-31 17:49:25 +000092 for(int y = 0; y < POOL_SIZE_Y; y++)
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000093 {
94 int x = 0;
Isabella Gottardia527e8c2018-01-31 17:49:25 +000095 for(; x <= ((int)POOL_SIZE_X - 8); x += 8)
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000096 {
97 uchar8 data = vload8(0, (__global uchar *)tensor3D_offset(&input, x, y, 0));
98 int8 data0 = convert_int8(data);
99 vdata = POOL_OP(vdata, data0);
100 }
101
102 // Leftover
Isabella Gottardia527e8c2018-01-31 17:49:25 +0000103 for(; x < (int)POOL_SIZE_X; ++x)
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +0000104 {
105 uchar data = *((__global uchar *)tensor3D_offset(&input, x, y, 0));
106 int data0 = convert_int(data);
107 sdata = POOL_OP(sdata, data0);
108 }
109 }
110
111 // Reduce result
112 int4 reduce4 = POOL_OP(vdata.s0123, vdata.s4567);
113 int2 reduce2 = POOL_OP(reduce4.s01, reduce4.s23);
114 int res = POOL_OP(reduce2.s0, reduce2.s1);
115 res = POOL_OP(res, sdata);
116
117#if defined(POOL_AVG)
Isabella Gottardia527e8c2018-01-31 17:49:25 +0000118 res = round(DIV_OP(res, calculate_avg_scale(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)));
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +0000119#endif /* defined(POOL_AVG) */
120
121 // Store result
122 *(__global uchar *)output.ptr = convert_uchar(res);
123}
Michalis Spyroue74b2012018-04-18 09:49:16 +0100124
125int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int upper_bound_w, int upper_bound_h,
126 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
127{
128 int start_x = get_global_id(1) * stride_x - pad_x;
Georgios Pinitas89d71732018-10-29 20:07:15 +0000129#if defined(DST_DEPTH)
130 int start_y = (get_global_id(2) % DST_DEPTH) * stride_y - pad_y;
131#else /* defined(DST_DEPTH) */
132 int start_y = get_global_id(2) * stride_y - pad_y;
133#endif /* defined(DST_DEPTH) */
Michalis Spyroue74b2012018-04-18 09:49:16 +0100134
135 const int end_x = min(start_x + pool_size_x, upper_bound_w);
136 const int end_y = min(start_y + pool_size_y, upper_bound_h);
137
138 start_x = max(0, start_x);
139 start_y = max(0, start_y);
140
141 return ((end_y - start_y) * (end_x - start_x));
142}
143
144/** Performs a pooling function of pool size equal to N (NHWC)
145 *
146 * @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;
147 * @note Tensors width and height must be passed at compile time using -DMAX_WIDTH and -DMAX_HEIGHT
148 * @note Strides must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
149 * @note Pad values must be passed at compile time using -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
150 * @note In case of average pooling the following information must be passed at compile time:
151 * -DPOOL_AVG must be provided otherwise max pooling will be performed.
152 *
153 * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8
154 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
155 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
156 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
157 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
158 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
159 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
Georgios Pinitas89d71732018-10-29 20:07:15 +0000160 * @param[in] input_stride_w Stride of the source tensor in W dimension (in bytes)
161 * @param[in] input_step_w input_stride_w * number of elements along W processed per workitem(in bytes)
Michalis Spyroue74b2012018-04-18 09:49:16 +0100162 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
163 * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
Georgios Pinitas89d71732018-10-29 20:07:15 +0000164 * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
Michalis Spyroue74b2012018-04-18 09:49:16 +0100165 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
Georgios Pinitas89d71732018-10-29 20:07:15 +0000166 * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
Michalis Spyroue74b2012018-04-18 09:49:16 +0100167 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
Georgios Pinitas89d71732018-10-29 20:07:15 +0000168 * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
Michalis Spyroue74b2012018-04-18 09:49:16 +0100169 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
Georgios Pinitas89d71732018-10-29 20:07:15 +0000170 * @param[in] output_stride_w Stride of the destination tensor in W dimension (in bytes)
171 * @param[in] output_step_w output_stride_w * number of elements along W processed per workitem(in bytes)
Michalis Spyroue74b2012018-04-18 09:49:16 +0100172 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
173 */
174__kernel void pooling_layer_MxN_quantized_nhwc(
Georgios Pinitas89d71732018-10-29 20:07:15 +0000175 TENSOR4D_DECLARATION(input),
176 TENSOR4D_DECLARATION(output))
Michalis Spyroue74b2012018-04-18 09:49:16 +0100177{
178 // Get pixels pointer
Georgios Pinitas89d71732018-10-29 20:07:15 +0000179#if defined(DST_DEPTH)
180 Tensor4D input = CONVERT_TO_TENSOR4D_STRUCT(input, DST_DEPTH);
181 Tensor4D output = CONVERT_TO_TENSOR4D_STRUCT(output, DST_DEPTH);
182#else /* defined(DST_DEPTH) */
Michalis Spyroue74b2012018-04-18 09:49:16 +0100183 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
184 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
Georgios Pinitas89d71732018-10-29 20:07:15 +0000185#endif /* defined(DST_DEPTH) */
Michalis Spyroue74b2012018-04-18 09:49:16 +0100186
187 int8 vdata = 0;
188
Georgios Pinitas89d71732018-10-29 20:07:15 +0000189 const int idx_width = get_global_id(1) * STRIDE_X;
190#if defined(DST_DEPTH)
191 const int idx_height = (get_global_id(2) % DST_DEPTH) * STRIDE_Y;
192#else /* defined(DST_DEPTH) */
Michalis Spyroue74b2012018-04-18 09:49:16 +0100193 const int idx_height = get_global_id(2) * STRIDE_Y;
Georgios Pinitas89d71732018-10-29 20:07:15 +0000194#endif /* defined(DST_DEPTH) */
Michalis Spyroue74b2012018-04-18 09:49:16 +0100195
196 for(int y = 0; y < POOL_SIZE_Y; ++y)
197 {
198 int y1 = select(y, PAD_Y - idx_height, y + idx_height < PAD_Y || y + idx_height > MAX_HEIGHT);
199 for(int x = 0; x < POOL_SIZE_X; ++x)
200 {
Georgios Pinitas89d71732018-10-29 20:07:15 +0000201 int x1 = select(x, PAD_X - idx_width - 1, x + idx_width < PAD_X || x + idx_width > MAX_WIDTH);
202 x1 = select(x1, PAD_X - idx_width - 1, y != y1);
203
204#if defined(DST_DEPTH)
205 uchar8 data = vload8(0, (__global uchar *)tensor4D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y, 0));
206#else /* defined(DST_DEPTH) */
Michalis Spyroue74b2012018-04-18 09:49:16 +0100207 uchar8 data = vload8(0, (__global uchar *)tensor3D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y));
Georgios Pinitas89d71732018-10-29 20:07:15 +0000208#endif /* defined(DST_DEPTH) */
209
210 int8 data0 = convert_int8(data);
211 vdata = POOL_OP(vdata, data0);
Michalis Spyroue74b2012018-04-18 09:49:16 +0100212 }
213 }
214
215#if defined(POOL_AVG)
216 // Divide by pool region in case of average pooling
217 vdata = convert_int8(round(DIV_OP_NHWC(vdata, calculate_avg_scale_nhwc(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y))));
218#endif /* defined(POOL_AVG) */
219
220 // Store result
221 vstore8(convert_uchar8(vdata), 0, (__global uchar *)output.ptr);
222}