blob: 79343d49c735f1778f687fd06031c5691ff4fce2 [file] [log] [blame]
Michalis Spyrou16934a52018-08-21 18:03:58 +01001/*
2 * Copyright (c) 2018 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software withoutput restriction, including withoutput limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KOUTD, EXPRESS OR
17 * IMPLIED, OUTCLUDOUTG BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONOUTFROUTGEMENT. OUT NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER OUT AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISOUTG FROM,
21 * OUT OF OR OUT CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALOUTGS OUT THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
25
Isabella Gottardicc6129c2018-12-14 11:40:40 +000026#if defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(WIDTH_IN) && defined(HEIGHT_IN)
Michalis Spyrou16934a52018-08-21 18:03:58 +010027/** Calculate the space to batch conversion.
28 *
29 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
30 * @note The block shape tensor rank must be passed at compile time using -DBLOCK_SHAPE_DIM. e.g. -DBLOCK_SHAPE_DIM=2
31 *
32 * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
33 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
34 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
35 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
36 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
37 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
38 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
39 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image
40 * @param[in] paddings_ptr Pointer to the second source image. Supported data types: S32
41 * @param[in] paddings_stride_x Stride of the paddinds tensor in X dimension (in bytes)
42 * @param[in] paddings_step_x paddings_stride_x * number of elements along X processed per workitem(in bytes)
43 * @param[in] paddings_stride_y Stride of the paddinds tensor in Y dimension (in bytes)
44 * @param[in] paddings_step_y paddings_stride_y * number of elements along Y processed per workitem(in bytes)
45 * @param[in] paddingse_offset_first_element_in_bytes The offset of the first element in the second source image
46 * @param[in] block_shape_ptr Pointer to the block shape tensor. Supported data types: S32
47 * @param[in] block_shape_stride_x Stride of the block shape tensor in X dimension (in bytes)
48 * @param[in] block_shape_step_x block_shape_stride_x * number of elements along X processed per workitem(in bytes)
49 * @param[in] block_shape_stride_y Stride of the block shape tensor in Y dimension (in bytes)
50 * @param[in] block_shape_step_y block_shape_stride_y * number of elements along Y processed per workitem(in bytes)
51 * @param[in] block_shape_offset_first_element_in_bytes The offset of the first element in the block shapetensor
52 * @param[in] batch_id The output tensor batch id
53 * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
54 * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
55 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
56 * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
57 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
58 * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
59 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
60 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
61 */
Michalis Spyrou13a51e12018-09-18 13:09:30 +010062__kernel void space_to_batch_nchw(
Michalis Spyrou16934a52018-08-21 18:03:58 +010063 TENSOR4D_DECLARATION(input),
64 IMAGE_DECLARATION(paddings),
65 VECTOR_DECLARATION(block_shape),
66 const int batch_id,
67 TENSOR3D_DECLARATION(output))
68{
69 Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
70 Image pad = CONVERT_TO_IMAGE_STRUCT_NO_STEP(paddings);
71 Vector block = CONVERT_TO_VECTOR_STRUCT_NO_STEP(block_shape);
72 Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
73
Michalis Spyrouedf26ea2018-11-21 14:17:42 +000074 const int pad_left_x = *((__global int *)offset(&pad, 0, 0));
75 const int pad_right_x = *((__global int *)offset(&pad, 1, 0));
76 const int pad_left_y = *((__global int *)offset(&pad, 0, 1));
77 const int pad_right_y = *((__global int *)offset(&pad, 1, 1));
Michalis Spyrou16934a52018-08-21 18:03:58 +010078
79 int block_x = *((__global int *)vector_offset(&block, 0));
80 int block_y = *((__global int *)vector_offset(&block, 1));
81
82 const int out_x = get_global_id(0);
83 const int out_y = get_global_id(1);
84 const int z = get_global_id(2);
85
Isabella Gottardicc6129c2018-12-14 11:40:40 +000086 const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x);
87 const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x);
88
89 if(((pos_y >= pad_left_y) && (pos_y < pad_left_y + HEIGHT_IN) && (pos_x >= pad_left_x) && (pos_x < pad_left_x + WIDTH_IN)))
Michalis Spyrou16934a52018-08-21 18:03:58 +010090 {
Isabella Gottardicc6129c2018-12-14 11:40:40 +000091 const int w = batch_id % BATCH_IN;
92 const int in_x = pos_x - pad_left_x;
93 const int in_y = pos_y - pad_left_y;
94
Michalis Spyrou16934a52018-08-21 18:03:58 +010095 *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_x, in_y, z, w));
96 }
97}
Michalis Spyrou13a51e12018-09-18 13:09:30 +010098/** Calculate the space to batch conversion. (NHWC)
99 *
100 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
101 * @note The block shape tensor rank must be passed at compile time using -DBLOCK_SHAPE_DIM. e.g. -DBLOCK_SHAPE_DIM=2
102 *
103 * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
104 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
105 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
106 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
107 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
108 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
109 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
110 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image
111 * @param[in] paddings_ptr Pointer to the second source image. Supported data types: S32
112 * @param[in] paddings_stride_x Stride of the paddinds tensor in X dimension (in bytes)
113 * @param[in] paddings_step_x paddings_stride_x * number of elements along X processed per workitem(in bytes)
114 * @param[in] paddings_stride_y Stride of the paddinds tensor in Y dimension (in bytes)
115 * @param[in] paddings_step_y paddings_stride_y * number of elements along Y processed per workitem(in bytes)
116 * @param[in] paddingse_offset_first_element_in_bytes The offset of the first element in the second source image
117 * @param[in] block_shape_ptr Pointer to the block shape tensor. Supported data types: S32
118 * @param[in] block_shape_stride_x Stride of the block shape tensor in X dimension (in bytes)
119 * @param[in] block_shape_step_x block_shape_stride_x * number of elements along X processed per workitem(in bytes)
120 * @param[in] block_shape_stride_y Stride of the block shape tensor in Y dimension (in bytes)
121 * @param[in] block_shape_step_y block_shape_stride_y * number of elements along Y processed per workitem(in bytes)
122 * @param[in] block_shape_offset_first_element_in_bytes The offset of the first element in the block shapetensor
123 * @param[in] batch_id The output tensor batch id
124 * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
125 * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
126 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
127 * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
128 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
129 * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
130 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
131 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
132 */
133__kernel void space_to_batch_nhwc(
134 TENSOR4D_DECLARATION(input),
135 IMAGE_DECLARATION(paddings),
136 VECTOR_DECLARATION(block_shape),
137 const int batch_id,
138 TENSOR3D_DECLARATION(output))
139{
140 Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
141 Image pad = CONVERT_TO_IMAGE_STRUCT_NO_STEP(paddings);
142 Vector block = CONVERT_TO_VECTOR_STRUCT_NO_STEP(block_shape);
143 Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
144
Michalis Spyrouedf26ea2018-11-21 14:17:42 +0000145 const int pad_left_x = *((__global int *)offset(&pad, 0, 0));
146 const int pad_right_x = *((__global int *)offset(&pad, 1, 0));
147 const int pad_left_y = *((__global int *)offset(&pad, 0, 1));
148 const int pad_right_y = *((__global int *)offset(&pad, 1, 1));
Michalis Spyrou13a51e12018-09-18 13:09:30 +0100149
150 int block_x = *((__global int *)vector_offset(&block, 0));
151 int block_y = *((__global int *)vector_offset(&block, 1));
152
153 const int out_x = get_global_id(1);
154 const int out_y = get_global_id(2);
155 const int z = get_global_id(0);
156
Isabella Gottardicc6129c2018-12-14 11:40:40 +0000157 const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x);
158 const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x);
159
160 if(((pos_y >= pad_left_y) && (pos_y < pad_left_y + HEIGHT_IN) && (pos_x >= pad_left_x) && (pos_x < pad_left_x + WIDTH_IN)))
Michalis Spyrou13a51e12018-09-18 13:09:30 +0100161 {
Isabella Gottardicc6129c2018-12-14 11:40:40 +0000162 const int w = batch_id % BATCH_IN;
163 const int in_x = pos_x - pad_left_x;
164 const int in_y = pos_y - pad_left_y;
165
Michalis Spyrou13a51e12018-09-18 13:09:30 +0100166 *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, z, in_x, in_y, w));
167 }
168}
Isabella Gottardicc6129c2018-12-14 11:40:40 +0000169#endif // defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(WIDTH_IN) && defined(HEIGHT_IN)
Michalis Spyrou16934a52018-08-21 18:03:58 +0100170
Isabella Gottardicc6129c2018-12-14 11:40:40 +0000171#if defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y) && defined(WIDTH_IN) && defined(HEIGHT_IN)
Michalis Spyrou16934a52018-08-21 18:03:58 +0100172/** Calculate the space to batch conversion.
173 *
174 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
175 * @note The input tensor batch size must be passed at compile time using -DBATCH_SIZE. e.g. -DBATCH_SIZE=2
176 * @note The block shape x must be passed at compile time using -DBLOCK_SHAPE_X. e.g. -DBLOCK_SHAPE_X=2
177 * @note The block shape y must be passed at compile time using -DBLOCK_SHAPE_Y. e.g. -DBLOCK_SHAPE_Y=2
178 * @note The starting pad value of x must be passed at compile time using -DPAD_LEFT_X. e.g. -DPAD_LEFT_X=2
179 * @note The ending pad value of x must be passed at compile time using -DPAD_RIGHT_X. e.g. -DPAD_RIGHT_X=2
180 * @note The starting pad value of y must be passed at compile time using -DPAD_LEFT_Y. e.g. -DPAD_LEFT_Y=2
181 * @note The ending pad value of y must be passed at compile time using -DPAD_RIGHT_Y. e.g. -DPAD_RIGHT_X=2
182 *
183 * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
184 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
185 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
186 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
187 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
188 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
189 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
190 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image
191 * @param[in] batch_id The output tensor batch id
192 * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
193 * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
194 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
195 * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
196 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
197 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
198 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
199 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
200 */
Michalis Spyrou13a51e12018-09-18 13:09:30 +0100201__kernel void space_to_batch_static_nchw(
Michalis Spyrou16934a52018-08-21 18:03:58 +0100202 TENSOR4D_DECLARATION(input),
203 const int batch_id,
204 TENSOR3D_DECLARATION(output))
205{
206 Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
207 Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
208
209 int block_x = BLOCK_SHAPE_X;
Michalis Spyrouedf26ea2018-11-21 14:17:42 +0000210 int block_y = BLOCK_SHAPE_Y;
Michalis Spyrou16934a52018-08-21 18:03:58 +0100211
212 const int out_x = get_global_id(0);
213 const int out_y = get_global_id(1);
214 const int z = get_global_id(2);
215
Isabella Gottardicc6129c2018-12-14 11:40:40 +0000216 const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x);
217 const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x);
218
219 if(pos_y >= PAD_LEFT_Y && pos_y < PAD_LEFT_Y + HEIGHT_IN && pos_x >= PAD_LEFT_X && pos_x < PAD_LEFT_X + WIDTH_IN)
Michalis Spyrou16934a52018-08-21 18:03:58 +0100220 {
Isabella Gottardicc6129c2018-12-14 11:40:40 +0000221 const int w = batch_id % BATCH_IN;
222 const int in_x = pos_x - PAD_LEFT_X;
223 const int in_y = pos_y - PAD_LEFT_Y;
224
Michalis Spyrou16934a52018-08-21 18:03:58 +0100225 *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, in_x, in_y, z, w));
226 }
227}
Michalis Spyrou13a51e12018-09-18 13:09:30 +0100228/** Calculate the space to batch conversion. (NHWC)
229 *
230 * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float
231 * @note The input tensor batch size must be passed at compile time using -DBATCH_SIZE. e.g. -DBATCH_SIZE=2
232 * @note The block shape x must be passed at compile time using -DBLOCK_SHAPE_X. e.g. -DBLOCK_SHAPE_X=2
233 * @note The block shape y must be passed at compile time using -DBLOCK_SHAPE_Y. e.g. -DBLOCK_SHAPE_Y=2
234 * @note The starting pad value of x must be passed at compile time using -DPAD_LEFT_X. e.g. -DPAD_LEFT_X=2
235 * @note The ending pad value of x must be passed at compile time using -DPAD_RIGHT_X. e.g. -DPAD_RIGHT_X=2
236 * @note The starting pad value of y must be passed at compile time using -DPAD_LEFT_Y. e.g. -DPAD_LEFT_Y=2
237 * @note The ending pad value of y must be passed at compile time using -DPAD_RIGHT_Y. e.g. -DPAD_RIGHT_X=2
238 *
239 * @param[in] input_ptr Pointer to the source tensor. Supported data types: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
240 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes)
241 * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
242 * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
243 * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
244 * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
245 * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
246 * @param[in] input_offset_first_element_in_bytes The offset of the first element in the first source image
247 * @param[in] batch_id The output tensor batch id
248 * @param[out] output_ptr Pointer to the destination tensor. Supported data types: same as @p input_ptr
249 * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
250 * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
251 * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
252 * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
253 * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
254 * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
255 * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
256 */
257__kernel void space_to_batch_static_nhwc(
258 TENSOR4D_DECLARATION(input),
259 const int batch_id,
260 TENSOR3D_DECLARATION(output))
261{
262 Tensor4D in = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(input, 0);
263 Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(output);
264
265 int block_x = BLOCK_SHAPE_X;
Michalis Spyrouedf26ea2018-11-21 14:17:42 +0000266 int block_y = BLOCK_SHAPE_Y;
Michalis Spyrou13a51e12018-09-18 13:09:30 +0100267
268 const int out_x = get_global_id(1);
269 const int out_y = get_global_id(2);
270 const int z = get_global_id(0);
271
Isabella Gottardicc6129c2018-12-14 11:40:40 +0000272 const int pos_x = out_x * block_x + ((batch_id / BATCH_IN) % block_x);
273 const int pos_y = out_y * block_y + ((batch_id / BATCH_IN) / block_x);
274
275 if(pos_y >= PAD_LEFT_Y && pos_y < PAD_LEFT_Y + HEIGHT_IN && pos_x >= PAD_LEFT_X && pos_x < PAD_LEFT_X + WIDTH_IN)
Michalis Spyrou13a51e12018-09-18 13:09:30 +0100276 {
Isabella Gottardicc6129c2018-12-14 11:40:40 +0000277 const int w = batch_id % BATCH_IN;
278 const int in_x = pos_x - PAD_LEFT_X;
279 const int in_y = pos_y - PAD_LEFT_Y;
280
Michalis Spyrou13a51e12018-09-18 13:09:30 +0100281 *((__global DATA_TYPE *)out.ptr) = *((__global DATA_TYPE *)tensor4D_offset(&in, z, in_x, in_y, w));
282 }
283}
Isabella Gottardicc6129c2018-12-14 11:40:40 +0000284#endif // defined(BATCH_SIZE) && defined(DATA_TYPE) && defined(BLOCK_SHAPE_X) && defined(BLOCK_SHAPE_Y) && defined(PAD_LEFT_X) && defined(PAD_RIGHT_X) && defined(PAD_LEFT_Y) && defined(PAD_RIGHT_Y) && defined(WIDTH_IN) && defined(HEIGHT_IN)