blob: 0497bf4b91e322e9c22acc2e722fa213ff136893 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25
steniu010c7614f2017-06-23 17:00:26 +010026#ifdef FIXED_POINT_POSITION
27
28#include "fixed_point.h"
29
30#if defined(POOL_AVG)
31#define POOL_OP(x, y) add_sat(x, y)
32#else /* POOL_AVG */
33#define POOL_OP(x, y) (max((x), (y)))
34#endif /* POOL_AVG */
35
36#define DIV_OP1(x, y) DIV_SAT_OP_EXPAND((x), y, DATA_TYPE, FIXED_POINT_POSITION)
37#define DIV_OP(x, y) DIV_OP1(x, y << FIXED_POINT_POSITION)
38
39#else /* FIXED_POINT_POSITION */
40
41#if defined(POOL_AVG)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010042#define POOL_OP(x, y) ((x) + (y))
Anthony Barbierac69aa12017-07-03 17:39:37 +010043#else /* POOL_AVG */
Anthony Barbier6ff3b192017-09-04 18:44:23 +010044#define POOL_OP(x, y) (fmax((x), (y)))
Anthony Barbierac69aa12017-07-03 17:39:37 +010045#endif /* POOL_AVG */
Anthony Barbier6ff3b192017-09-04 18:44:23 +010046
steniu010c7614f2017-06-23 17:00:26 +010047#define DIV_OP(x, y) (x * (1.f / y))
48
49#endif /* FIXED_POINT_POSITION */
50
Gian Marco Iodicecb292832017-08-02 13:19:48 +010051#if STRIDE_X == 1
52#define POOLING3x3(res, input, output) POOLING3x3_STRIDE1(res, input, output)
53#elif STRIDE_X == 2 /* STRIDE_X == 1 */
54#define POOLING3x3(res, input, output) POOLING3x3_STRIDE2(res, input, output)
55#elif STRIDE_X == 3 /* STRIDE_X not equals 1 or 2 */
56#define POOLING3x3(res, input, output) POOLING3x3_STRIDE3(res, input, output)
57#endif /* STRIDE_X == 3 */
58
Gian Marco Iodicecb292832017-08-02 13:19:48 +010059#define POOLING3x3_STRIDE1(res, input, output) \
60 ({ \
61 VEC_DATA_TYPE(DATA_TYPE, 4) \
62 data00 = vload4(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)); \
63 VEC_DATA_TYPE(DATA_TYPE, 2) \
64 data01 = vload2(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0) + 4); \
65 VEC_DATA_TYPE(DATA_TYPE, 4) \
66 data10 = vload4(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0)); \
67 VEC_DATA_TYPE(DATA_TYPE, 2) \
68 data11 = vload2(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0) + 4); \
69 VEC_DATA_TYPE(DATA_TYPE, 4) \
70 data20 = vload4(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0)); \
71 VEC_DATA_TYPE(DATA_TYPE, 2) \
72 data21 = vload2(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0) + 4); \
73 \
74 VEC_DATA_TYPE(DATA_TYPE, 8) \
75 values00 = (VEC_DATA_TYPE(DATA_TYPE, 8))(data00.s01212323); \
76 VEC_DATA_TYPE(DATA_TYPE, 4) \
77 values01 = (VEC_DATA_TYPE(DATA_TYPE, 4))(data01.s0, data00.s3, data01.s01); \
78 VEC_DATA_TYPE(DATA_TYPE, 8) \
79 values10 = (VEC_DATA_TYPE(DATA_TYPE, 8))(data10.s01212323); \
80 VEC_DATA_TYPE(DATA_TYPE, 4) \
81 values11 = (VEC_DATA_TYPE(DATA_TYPE, 4))(data11.s0, data10.s3, data11.s01); \
82 VEC_DATA_TYPE(DATA_TYPE, 8) \
83 values20 = (VEC_DATA_TYPE(DATA_TYPE, 8))(data20.s01212323); \
84 VEC_DATA_TYPE(DATA_TYPE, 4) \
85 values21 = (VEC_DATA_TYPE(DATA_TYPE, 4))(data21.s0, data20.s3, data21.s01); \
86 \
87 values00 = POOL_OP(values00, values10); \
88 values01 = POOL_OP(values01, values11); \
89 values00 = POOL_OP(values00, values20); \
90 values01 = POOL_OP(values01, values21); \
91 \
92 res = POOL_OP((VEC_DATA_TYPE(DATA_TYPE, 4))(values00.s036, values01.s1), (VEC_DATA_TYPE(DATA_TYPE, 4))(values00.s147, values01.s2)); \
93 res = POOL_OP(res, (VEC_DATA_TYPE(DATA_TYPE, 4))(values00.s25, values01.s03)); \
94 })
95
96#define POOLING3x3_STRIDE2(res, input, output) \
97 ({ \
98 VEC_DATA_TYPE(DATA_TYPE, 8) \
99 data00 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)); \
100 DATA_TYPE data01 = *((__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0) + 8); \
101 VEC_DATA_TYPE(DATA_TYPE, 8) \
102 data10 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0)); \
103 DATA_TYPE data11 = *((__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0) + 8); \
104 VEC_DATA_TYPE(DATA_TYPE, 8) \
105 data20 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0)); \
106 DATA_TYPE data21 = *((__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0) + 8); \
107 \
108 VEC_DATA_TYPE(DATA_TYPE, 8) \
109 values00 = (VEC_DATA_TYPE(DATA_TYPE, 8))(data00.s01223445); \
110 VEC_DATA_TYPE(DATA_TYPE, 4) \
111 values01 = (VEC_DATA_TYPE(DATA_TYPE, 4))(data00.s667, data01); \
112 VEC_DATA_TYPE(DATA_TYPE, 8) \
113 values10 = (VEC_DATA_TYPE(DATA_TYPE, 8))(data10.s01223445); \
114 VEC_DATA_TYPE(DATA_TYPE, 4) \
115 values11 = (VEC_DATA_TYPE(DATA_TYPE, 4))(data10.s667, data11); \
116 VEC_DATA_TYPE(DATA_TYPE, 8) \
117 values20 = (VEC_DATA_TYPE(DATA_TYPE, 8))(data20.s01223445); \
118 VEC_DATA_TYPE(DATA_TYPE, 4) \
119 values21 = (VEC_DATA_TYPE(DATA_TYPE, 4))(data20.s667, data21); \
120 \
121 values00 = POOL_OP(values00, values10); \
122 values01 = POOL_OP(values01, values11); \
123 values00 = POOL_OP(values00, values20); \
124 values01 = POOL_OP(values01, values21); \
125 \
126 res = POOL_OP((VEC_DATA_TYPE(DATA_TYPE, 4))(values00.s036, values01.s1), (VEC_DATA_TYPE(DATA_TYPE, 4))(values00.s147, values01.s2)); \
127 res = POOL_OP(res, (VEC_DATA_TYPE(DATA_TYPE, 4))(values00.s25, values01.s03)); \
128 })
129
130#define POOLING3x3_STRIDE3(res, input, output) \
131 ({ \
132 VEC_DATA_TYPE(DATA_TYPE, 8) \
133 data00 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)); \
134 VEC_DATA_TYPE(DATA_TYPE, 4) \
135 data01 = vload4(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0) + 8); \
136 VEC_DATA_TYPE(DATA_TYPE, 8) \
137 data10 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0)); \
138 VEC_DATA_TYPE(DATA_TYPE, 4) \
139 data11 = vload4(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0) + 8); \
140 VEC_DATA_TYPE(DATA_TYPE, 8) \
141 data20 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0)); \
142 VEC_DATA_TYPE(DATA_TYPE, 4) \
143 data21 = vload4(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0) + 8); \
144 \
145 data00 = POOL_OP(data00, data10); \
146 data01 = POOL_OP(data01, data11); \
147 data00 = POOL_OP(data00, data20); \
148 data01 = POOL_OP(data01, data21); \
149 \
150 res = POOL_OP((VEC_DATA_TYPE(DATA_TYPE, 4))(data00.s036, data01.s1), (VEC_DATA_TYPE(DATA_TYPE, 4))(data00.s147, data01.s2)); \
151 res = POOL_OP(res, (VEC_DATA_TYPE(DATA_TYPE, 4))(data00.s25, data01.s03)); \
152 })
153
154DATA_TYPE calculate_avg_scale(const int pool_size, const int upper_bound_w, const int upper_bound_h,
155 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100156{
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100157 const int start_x = get_global_id(0) * stride_x - pad_x;
158 const int start_y = get_global_id(1) * stride_y - pad_y;
159 const int end_x = min(start_x + pool_size, upper_bound_w);
160 const int end_y = min(start_y + pool_size, upper_bound_h);
steniu010c7614f2017-06-23 17:00:26 +0100161 return ((end_y - start_y) * (end_x - start_x));
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100162}
163
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100164/** Performs a pooling function of pool size equal to 2.
165 *
steniu010c7614f2017-06-23 17:00:26 +0100166 * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100167 * @note In case of average pooling the following information must be passed at compile time:
168 * -DPOOL_AVG must be provided otherwise max pooling will be performed.
169 * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
170 * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
171 * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100172 *
steniu010c7614f2017-06-23 17:00:26 +0100173 * @param[in] input_ptr Pointer to the source image. Supported data types: QS8/QS16/F16/F32
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100174 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
175 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
176 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
177 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
178 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
179 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
180 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100181 * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100182 * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
183 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
184 * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
185 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
186 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
187 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
188 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100189 */
190__kernel void pooling_layer_2(
191 TENSOR3D_DECLARATION(input),
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100192 TENSOR3D_DECLARATION(output))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100193{
194 // Get pixels pointer
195 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
196 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
197
198 // Load data
199 VEC_DATA_TYPE(DATA_TYPE, 2)
200 data0 = vload2(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0));
201 VEC_DATA_TYPE(DATA_TYPE, 2)
202 data1 = vload2(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0));
203
204 // Perform calculations
205 data0 = POOL_OP(data0, data1);
206 DATA_TYPE res = POOL_OP(data0.s0, data0.s1);
207
Georgios Pinitasce093142017-06-19 16:11:53 +0100208 // Divide by pool region in case of average pooling
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100209#ifdef POOL_AVG
steniu010c7614f2017-06-23 17:00:26 +0100210 res = DIV_OP(res, calculate_avg_scale(2, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));
Anthony Barbierac69aa12017-07-03 17:39:37 +0100211#endif /* POOL_AVG */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100212
213 // Store result
214 *(__global DATA_TYPE *)output.ptr = res;
215}
216
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100217/** Performs a pooling function of pool size equal to 3
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100218 *
steniu010c7614f2017-06-23 17:00:26 +0100219 * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100220 * @note In case of average pooling the following information must be passed at compile time:
221 * -DPOOL_AVG must be provided otherwise max pooling will be performed.
222 * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
223 * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
224 * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100225 *
steniu010c7614f2017-06-23 17:00:26 +0100226 * @param[in] input_ptr Pointer to the source image. Supported data types: QS8/QS16/F16/F32
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100227 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
228 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
229 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
230 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
231 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
232 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
233 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100234 * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100235 * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
236 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
237 * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
238 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
239 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
240 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
241 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100242 */
243__kernel void pooling_layer_3(
244 TENSOR3D_DECLARATION(input),
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100245 TENSOR3D_DECLARATION(output))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100246{
247 // Get pixels pointer
248 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
249 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
250
251 // Load data
252 VEC_DATA_TYPE(DATA_TYPE, 3)
253 data0 = vload3(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0));
254 VEC_DATA_TYPE(DATA_TYPE, 3)
255 data1 = vload3(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0));
256 VEC_DATA_TYPE(DATA_TYPE, 3)
257 data2 = vload3(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0));
258
259 // Perform calculations
260 data0 = POOL_OP(data0, data1);
261 data0 = POOL_OP(data0, data2);
262 DATA_TYPE res = POOL_OP(POOL_OP(data0.s0, data0.s1), data0.s2);
263
Georgios Pinitasce093142017-06-19 16:11:53 +0100264 // Divide by pool region in case of average pooling
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100265#ifdef POOL_AVG
steniu010c7614f2017-06-23 17:00:26 +0100266 res = DIV_OP(res, calculate_avg_scale(3, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));
267#endif /* POOL_AVG */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100268
269 // Store result
270 *(__global DATA_TYPE *)output.ptr = res;
271}
Georgios Pinitasce093142017-06-19 16:11:53 +0100272
steniu010c7614f2017-06-23 17:00:26 +0100273#if defined(POOLING3x3) && !defined(FIXED_POINT_POSITION)
274
275#define CONVERT_OP(data_type) convert_##data_type##4
276#define CONVERT_VECTOR4(data_type) CONVERT_OP(data_type)
277
278VEC_DATA_TYPE(DATA_TYPE, 4)
279calculate_avg_scale4(const int pool_size, const int upper_bound_w, const int upper_bound_h,
280 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
281{
282 const int4 start_x = ((int4)get_global_id(0) * 4 + (int4)(0, 1, 2, 3)) * (int4)stride_x - (int4)pad_x;
283 const int start_y = get_global_id(1) * stride_y - pad_y;
284 const int4 end_x = min(start_x + (int4)pool_size, (int4)upper_bound_w);
285 const int end_y = min(start_y + pool_size, upper_bound_h);
286 return (VEC_DATA_TYPE(DATA_TYPE, 4))(1.f) / CONVERT_VECTOR4(DATA_TYPE)(((int4)(end_y - start_y)) * (end_x - start_x));
287}
288
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100289/** Performs an optimized pooling function of pool size equal to 3 when the stride_x is less equal than 3
Georgios Pinitasce093142017-06-19 16:11:53 +0100290 *
steniu010c7614f2017-06-23 17:00:26 +0100291 * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100292 * @note In case of average pooling the following information must be passed at compile time:
293 * -DPOOL_AVG must be provided otherwise max pooling will be performed.
294 * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
295 * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
296 * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
Georgios Pinitasce093142017-06-19 16:11:53 +0100297 *
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100298 * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
Georgios Pinitasce093142017-06-19 16:11:53 +0100299 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
300 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
301 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
302 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
303 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
304 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
305 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100306 * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
Georgios Pinitasce093142017-06-19 16:11:53 +0100307 * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
308 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
309 * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
310 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
311 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
312 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
313 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100314 */
315__kernel void pooling_layer_3_optimized(
316 TENSOR3D_DECLARATION(input),
317 TENSOR3D_DECLARATION(output))
318{
319 // Get pixels pointer
320 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
321 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
322
323 VEC_DATA_TYPE(DATA_TYPE, 4)
324 res;
325
326 // Perform pooling 3x3 for 4 output elements
327 POOLING3x3(res, input, output);
328
329 // Divide by pool region in case of average pooling
330#ifdef POOL_AVG
331 res *= calculate_avg_scale4(3, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
332#endif // POOL_AVG
333
334 vstore4(res, 0, (__global DATA_TYPE *)output.ptr);
335}
steniu010c7614f2017-06-23 17:00:26 +0100336#endif // defined(POOLING3x3) && !defined(FIXED_POINT_POSITION)
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100337
338/** Performs a pooling function of pool size equal to 7.
339 *
steniu010c7614f2017-06-23 17:00:26 +0100340 * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100341 * @note In case of average pooling the following information must be passed at compile time:
342 * -DPOOL_AVG must be provided otherwise max pooling will be performed.
343 * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
344 * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
345 * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
346 *
steniu010c7614f2017-06-23 17:00:26 +0100347 * @param[in] input_ptr Pointer to the source image. Supported data types: QS8/QS16/F16/F32
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100348 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
349 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
350 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
351 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
352 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
353 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
354 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
355 * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
356 * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
357 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
358 * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
359 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
360 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
361 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
362 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
Georgios Pinitasce093142017-06-19 16:11:53 +0100363 */
364__kernel void pooling_layer_7(
365 TENSOR3D_DECLARATION(input),
Gian Marco Iodicecb292832017-08-02 13:19:48 +0100366 TENSOR3D_DECLARATION(output))
Georgios Pinitasce093142017-06-19 16:11:53 +0100367{
368 // Get pixels pointer
369 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
370 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
371
372 // Load data
373 VEC_DATA_TYPE(DATA_TYPE, 8)
374 data0 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0));
375 VEC_DATA_TYPE(DATA_TYPE, 8)
376 data1 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0));
377 VEC_DATA_TYPE(DATA_TYPE, 8)
378 data2 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0));
379 VEC_DATA_TYPE(DATA_TYPE, 8)
380 data3 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 3, 0));
381 VEC_DATA_TYPE(DATA_TYPE, 8)
382 data4 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 4, 0));
383 VEC_DATA_TYPE(DATA_TYPE, 8)
384 data5 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 5, 0));
385 VEC_DATA_TYPE(DATA_TYPE, 8)
386 data6 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 6, 0));
387
388 // Pool operation of all rows
389 data0 = POOL_OP(data0, data1);
390 data2 = POOL_OP(data2, data3);
391 data4 = POOL_OP(data4, data5);
392 data0 = POOL_OP(data0, data2);
393 data4 = POOL_OP(data4, data6);
394 data0 = POOL_OP(data0, data4);
395
396 // Set last element
397#ifdef POOL_AVG
398 data0.s7 = 0;
Anthony Barbierac69aa12017-07-03 17:39:37 +0100399#else /* POOL_AVG */
Georgios Pinitasce093142017-06-19 16:11:53 +0100400 data0.s7 = data0.s6;
Anthony Barbierac69aa12017-07-03 17:39:37 +0100401#endif /* POOL_AVG */
Georgios Pinitasce093142017-06-19 16:11:53 +0100402
403 // Reduce result
404 VEC_DATA_TYPE(DATA_TYPE, 4)
405 reduce4 = POOL_OP(data0.s0123, data0.s4567);
406 VEC_DATA_TYPE(DATA_TYPE, 2)
407 reduce2 = POOL_OP(reduce4.s01, reduce4.s23);
408 DATA_TYPE res = POOL_OP(reduce2.s0, reduce2.s1);
409
410 // Divide by pool region in case of average pooling
411#ifdef POOL_AVG
steniu010c7614f2017-06-23 17:00:26 +0100412 res = DIV_OP(res, calculate_avg_scale(7, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));
Anthony Barbierac69aa12017-07-03 17:39:37 +0100413#endif /* POOL_AVG */
Georgios Pinitasce093142017-06-19 16:11:53 +0100414
415 // Store result
416 *(__global DATA_TYPE *)output.ptr = res;
417}
Gian Marco Iodicebf179552017-09-05 13:51:21 +0100418
419#if defined(POOL_SIZE)
420
421// Set the initial value for the pooling operation accordingly with the data type
422#if defined(POOL_AVG)
423#define INITIAL_VALUE 0
424#else // POOL_AVG
425#ifdef FIXED_POINT_POSITION
426#define MIN_VAL_EXPAND(type) type##_MIN
427#define MIN_VAL(type) MIN_VAL_EXPAND(type)
428#define INITIAL_VALUE MIN_VAL(DATA_TYPE)
429#define INITIAL_VALUE 0
430#else // FIXED_POINT_POSITION
431#if FP16
432#define INITIAL_VALUE -HALF_MAX
433#else // FP16
434#define INITIAL_VALUE -FLT_MAX
435#endif // FP16
436#endif // FIXED_POINT_POSITION
437
438#endif // POOL_AVG
439
440/** Performs a pooling function of pool size equal to N
441 *
442 * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;
443 * @note -DFP16 must be passed at compile time if half float data type is used
444 * @note Pool size must be passed using -DPOOL_SIZE e.g. -DPOOL_SIZE=13;
445 * @note In case of average pooling the following information must be passed at compile time:
446 * -DPOOL_AVG must be provided otherwise max pooling will be performed.
447 * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
448 * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
449 * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
450 *
451 * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
452 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
453 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
454 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
455 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
456 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
457 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
458 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
459 * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
460 * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
461 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
462 * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
463 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
464 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
465 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
466 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
467 */
468__kernel void pooling_layer_N(
469 TENSOR3D_DECLARATION(input),
470 TENSOR3D_DECLARATION(output))
471{
472 // Get pixels pointer
473 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
474 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
475
476 VEC_DATA_TYPE(DATA_TYPE, 8)
477 vdata = INITIAL_VALUE;
478 DATA_TYPE sdata = INITIAL_VALUE;
479
480 // Load data
481 for(int y = 0; y < POOL_SIZE; y++)
482 {
483 int x = 0;
484 for(; x <= ((int)POOL_SIZE - 8); x += 8)
485 {
486 VEC_DATA_TYPE(DATA_TYPE, 8)
487 data0 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0));
488 vdata = POOL_OP(vdata, data0);
489 }
490
491 // Leftover
492 for(; x < (int)POOL_SIZE; ++x)
493 {
494 DATA_TYPE data0 = *((__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0));
495 sdata = POOL_OP(sdata, data0);
496 }
497 }
498
499 // Reduce result
500 VEC_DATA_TYPE(DATA_TYPE, 4)
501 reduce4 = POOL_OP(vdata.s0123, vdata.s4567);
502 VEC_DATA_TYPE(DATA_TYPE, 2)
503 reduce2 = POOL_OP(reduce4.s01, reduce4.s23);
504 DATA_TYPE res = POOL_OP(reduce2.s0, reduce2.s1);
505 res = POOL_OP(res, sdata);
506
507 // Divide by pool region in case of average pooling
508#ifdef POOL_AVG
509 res = DIV_OP(res, calculate_avg_scale(POOL_SIZE, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));
510#endif /* POOL_AVG */
511
512 // Store result
513 *(__global DATA_TYPE *)output.ptr = res;
514}
515#endif // defined(POOL_SIZE)