blob: 17d893a0134b136d3f95fffbd40525bcfe8c085d [file] [log] [blame]
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +00001/*
Isabella Gottardia527e8c2018-01-31 17:49:25 +00002 * Copyright (c) 2017-2018 ARM Limited.
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25
26#if defined(POOL_AVG)
27#define POOL_OP(x, y) ((x) + (y))
28#else /* defined(POOL_AVG) */
29#define POOL_OP(x, y) (max((x), (y)))
30#endif /* defined(POOL_AVG) */
31
32#define DIV_OP(x, y) (x * (1.f / y))
33
Michalis Spyroue74b2012018-04-18 09:49:16 +010034#define DIV_OP_NHWC(x, y) (convert_float8(x) * (float8)(1.f / y))
35
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000036#if defined(POOL_L2)
37#error "L2 pooling is not supported"
38#endif /* defined(POOL_L2) */
39
Isabella Gottardia527e8c2018-01-31 17:49:25 +000040int calculate_avg_scale(const int pool_size_x, const int pool_size_y, const int upper_bound_w, const int upper_bound_h,
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000041 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
42{
43 int start_x = get_global_id(0) * stride_x - pad_x;
44 int start_y = get_global_id(1) * stride_y - pad_y;
Isabella Gottardia527e8c2018-01-31 17:49:25 +000045 const int end_x = min(start_x + pool_size_x, upper_bound_w);
46 const int end_y = min(start_y + pool_size_y, upper_bound_h);
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000047#if defined(EXCLUDE_PADDING)
48 start_x = max(0, start_x);
49 start_y = max(0, start_y);
50#endif /* defined(EXCLUDE_PADDING) */
51 return ((end_y - start_y) * (end_x - start_x));
52}
53
Michalis Spyroue74b2012018-04-18 09:49:16 +010054/** Performs a pooling function of pool size equal to N (NCHW)
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000055 *
Isabella Gottardia527e8c2018-01-31 17:49:25 +000056 * @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000057 * @note In case of average pooling the following information must be passed at compile time:
58 * -DPOOL_AVG must be provided otherwise max pooling will be performed.
59 * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
60 * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
61 * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
62 *
63 * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8
64 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
65 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
66 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
67 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
68 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
69 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
70 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
71 * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
72 * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
73 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
74 * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
75 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
76 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
77 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
78 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
79 */
Michalis Spyroue74b2012018-04-18 09:49:16 +010080__kernel void pooling_layer_MxN_quantized_nchw(
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000081 TENSOR3D_DECLARATION(input),
82 TENSOR3D_DECLARATION(output))
83{
84 // Get pixels pointer
85 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
86 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
87
88 int8 vdata = 0;
89 int sdata = 0;
90
91 // Load data
Isabella Gottardia527e8c2018-01-31 17:49:25 +000092 for(int y = 0; y < POOL_SIZE_Y; y++)
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000093 {
94 int x = 0;
Isabella Gottardia527e8c2018-01-31 17:49:25 +000095 for(; x <= ((int)POOL_SIZE_X - 8); x += 8)
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +000096 {
97 uchar8 data = vload8(0, (__global uchar *)tensor3D_offset(&input, x, y, 0));
98 int8 data0 = convert_int8(data);
99 vdata = POOL_OP(vdata, data0);
100 }
101
102 // Leftover
Isabella Gottardia527e8c2018-01-31 17:49:25 +0000103 for(; x < (int)POOL_SIZE_X; ++x)
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +0000104 {
105 uchar data = *((__global uchar *)tensor3D_offset(&input, x, y, 0));
106 int data0 = convert_int(data);
107 sdata = POOL_OP(sdata, data0);
108 }
109 }
110
111 // Reduce result
112 int4 reduce4 = POOL_OP(vdata.s0123, vdata.s4567);
113 int2 reduce2 = POOL_OP(reduce4.s01, reduce4.s23);
114 int res = POOL_OP(reduce2.s0, reduce2.s1);
115 res = POOL_OP(res, sdata);
116
117#if defined(POOL_AVG)
Isabella Gottardia527e8c2018-01-31 17:49:25 +0000118 res = round(DIV_OP(res, calculate_avg_scale(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)));
Anton Lokhmotovaf6204c2017-11-08 09:34:19 +0000119#endif /* defined(POOL_AVG) */
120
121 // Store result
122 *(__global uchar *)output.ptr = convert_uchar(res);
123}
Michalis Spyroue74b2012018-04-18 09:49:16 +0100124
125int calculate_avg_scale_nhwc(const int pool_size_x, const int pool_size_y, int upper_bound_w, int upper_bound_h,
126 const int pad_x, const int pad_y, const int stride_x, const int stride_y)
127{
128 int start_x = get_global_id(1) * stride_x - pad_x;
129 int start_y = get_global_id(2) * stride_y - pad_y;
130
131 const int end_x = min(start_x + pool_size_x, upper_bound_w);
132 const int end_y = min(start_y + pool_size_y, upper_bound_h);
133
134 start_x = max(0, start_x);
135 start_y = max(0, start_y);
136
137 return ((end_y - start_y) * (end_x - start_x));
138}
139
140/** Performs a pooling function of pool size equal to N (NHWC)
141 *
142 * @note Pool sizes must be passed using -DPOOL_SIZE_X and -DPOOL_SIZE_Y e.g. -DPOOL_SIZE_X=13;
143 * @note Tensors width and height must be passed at compile time using -DMAX_WIDTH and -DMAX_HEIGHT
144 * @note Strides must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
145 * @note Pad values must be passed at compile time using -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
146 * @note In case of average pooling the following information must be passed at compile time:
147 * -DPOOL_AVG must be provided otherwise max pooling will be performed.
148 *
149 * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8
150 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
151 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
152 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
153 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
154 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
155 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
156 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
157 * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
158 * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
159 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
160 * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
161 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
162 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
163 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
164 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
165 */
166__kernel void pooling_layer_MxN_quantized_nhwc(
167 TENSOR3D_DECLARATION(input),
168 TENSOR3D_DECLARATION(output))
169{
170 // Get pixels pointer
171 Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
172 Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
173
174 int8 vdata = 0;
175
176 const int idx_width = get_global_id(1) * STRIDE_X;
177 const int idx_height = get_global_id(2) * STRIDE_Y;
178
179 for(int y = 0; y < POOL_SIZE_Y; ++y)
180 {
181 int y1 = select(y, PAD_Y - idx_height, y + idx_height < PAD_Y || y + idx_height > MAX_HEIGHT);
182 for(int x = 0; x < POOL_SIZE_X; ++x)
183 {
184 int x1 = select(x, PAD_X - idx_width - 1, x + idx_width < PAD_X || x + idx_width > MAX_WIDTH);
185 x1 = select(x1, PAD_X - idx_width - 1, y != y1);
186 uchar8 data = vload8(0, (__global uchar *)tensor3D_offset(&input, 0, x1 - PAD_X, y1 - PAD_Y));
187 int8 data0 = convert_int8(data);
188 vdata = POOL_OP(vdata, data0);
189 }
190 }
191
192#if defined(POOL_AVG)
193 // Divide by pool region in case of average pooling
194 vdata = convert_int8(round(DIV_OP_NHWC(vdata, calculate_avg_scale_nhwc(POOL_SIZE_X, POOL_SIZE_Y, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y))));
195#endif /* defined(POOL_AVG) */
196
197 // Store result
198 vstore8(convert_uchar8(vdata), 0, (__global uchar *)output.ptr);
199}