blob: bccfd6543a6efb48b83998f74a8d634f8738b351 [file] [log] [blame]
Adnan AlSinan7075fe22021-07-05 13:12:52 +01001/*
2 * Copyright (c) 2016-2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "helpers.h"
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +010025#include "tile_helpers.h"
Adnan AlSinan7075fe22021-07-05 13:12:52 +010026
Adnan AlSinan17975a62021-11-08 17:46:39 +000027#if defined(SCALE_NEAREST_NEIGHBOUR)
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +010028//! @cond Doxygen_Suppress
29/** Performs scale on a tensor by interpolating with the NEAREAST NEIGHBOUR method. (NHWC)
Adnan AlSinan7075fe22021-07-05 13:12:52 +010030 *
31 * @note Sampling policy to used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +010032 * @note The tensor type ("BUFFER" only is supported) of the source tensor must be passed at compile time using -DSRC_TENSOR_TYPE (e.g. -DSRC_TENSOR_TYPE=BUFFER)
33 * @note The tensor type ("BUFFER" only is supported) of the destination tensor must be passed at compile time using -DDST_TENSOR_TYPE (e.g. -DDST_TENSOR_TYPE=BUFFER)
34 * @note The data type of the source tensor must be passed at compile time using -DSRC_DATA_TYPE (e.g. -DSRC_DATA_TYPE=float)
35 * @note The data type of the destination tensor must be passed at compile time using -DDST_DATA_TYPE (e.g. -DDST_DATA_TYPE=float)
36 * @note The number of N0 output channels to process must be passed at compile time using -DN0 (e.g. -DN0=2)
37 * @note The border value value must be passed at compile time using -DCONSTANT_VALUE (e.g. -DCONSTANT_VALUE=0)
38 * @note In case of F32/F16, -DIS_FLOATING_POINT must be passed at compile time
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +010039 * @note If the source tensor has more than 3 dimensions, -DBATCHED_EXECUTION must be passed at compile time
Adnan AlSinan7075fe22021-07-05 13:12:52 +010040 *
Adnan AlSinan17975a62021-11-08 17:46:39 +000041 * @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S16/F16/F32.
42 * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
43 * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
44 * @param[in] src_stride_w Stride of the source tensor in W dimension (in bytes)
45 * @param[in] src_c The size of the channels dimension of the source tensor
46 * @param[in] src_w The size of the width dimension of the source tensor
47 * @param[in] src_h The size of the height dimension of the source tensor
48 * @param[in] src_n The size of the batches dimension of the source tensor
49 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
50 * @param[in] dst_ptr Pointer to the destination tensor. Supported data types: U8/S16/F16/F32.
51 * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
52 * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
53 * @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes)
54 * @param[in] dst_c The size of the channels dimension of the destination tensor
55 * @param[in] dst_w The size of the width dimension of the destination tensor
56 * @param[in] dst_h The size of the height dimension of the destination tensor
57 * @param[in] dst_n The size of the batches dimension of the destination tensor
58 * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
59 * @param[in] scale_x The scale value to apply on the source width
60 * @param[in] scale_y The scale value to apply on the source height
Adnan AlSinan7075fe22021-07-05 13:12:52 +010061 */
Adnan AlSinan17975a62021-11-08 17:46:39 +000062//! @endcond
Adnan AlSinan7075fe22021-07-05 13:12:52 +010063__kernel void scale_nearest_neighbour_nhwc(
Adnan AlSinan17975a62021-11-08 17:46:39 +000064 TENSOR4D_T(src, SRC_TENSOR_TYPE),
65 TENSOR4D_T(dst, DST_TENSOR_TYPE),
66 const float scale_x,
67 const float scale_y)
Adnan AlSinan7075fe22021-07-05 13:12:52 +010068{
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +010069 const int cout = GET_SPATIAL_IDX(0, N0, PARTIAL_N0); // OFM
70 const int xo = GET_SPATIAL_IDX(1, 1, 0); // WIDTH
71#if defined(BATCHED_EXECUTION)
Adnan AlSinan17975a62021-11-08 17:46:39 +000072 const int yo = GET_SPATIAL_IDX(2, 1, 0) % dst_h; // HEIGHT
73 const int bout = GET_SPATIAL_IDX(2, 1, 0) / dst_h; // BATCH SIZE IDX
74#else // defined(BATCHED_EXECUTION)
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +010075 const int yo = GET_SPATIAL_IDX(2, 1, 0); // HEIGHT
Adnan AlSinan17975a62021-11-08 17:46:39 +000076 const int bout = 0; // BATCH SIZE IDX
77#endif // defined(BATCHED_EXECUTION)
Adnan AlSinan7075fe22021-07-05 13:12:52 +010078
79#ifdef SAMPLING_POLICY_TOP_LEFT
Adnan AlSinan17975a62021-11-08 17:46:39 +000080 float xi_f = (xo * scale_x);
81 float yi_f = (yo * scale_y);
Adnan AlSinan7075fe22021-07-05 13:12:52 +010082#elif SAMPLING_POLICY_CENTER
Adnan AlSinan17975a62021-11-08 17:46:39 +000083 float xi_f = ((xo + 0.5f) * scale_x);
84 float yi_f = ((yo + 0.5f) * scale_y);
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +010085#else // SAMPLING_POLICY
Adnan AlSinan7075fe22021-07-05 13:12:52 +010086#error("Unsupported sampling policy");
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +010087#endif // SAMPLING_POLICY
Adnan AlSinan7075fe22021-07-05 13:12:52 +010088
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +010089#ifdef ALIGN_CORNERS
90 xi_f = round(xi_f);
91 yi_f = round(yi_f);
92#endif // ALIGN_CORNERS
93
Adnan AlSinan17975a62021-11-08 17:46:39 +000094 const int xi0 = clamp((int)xi_f, 0, (int)src_w - 1);
95 const int yi0 = clamp((int)yi_f, 0, (int)src_h - 1);
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +010096
97 TILE(SRC_DATA_TYPE, 1, N0, in00);
98
Adnan AlSinan17975a62021-11-08 17:46:39 +000099 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi0, xi0, cout, src_w, src_h, 1, 1, false, in00);
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100100
101 TILE(uint, 1, 1, dst_indirect_y);
102
103 // Calculate the destination indirect Y
Adnan AlSinan17975a62021-11-08 17:46:39 +0000104 dst_indirect_y[0].v = xo + (yo * (int)(dst_w)) + bout * (int)(dst_w * dst_h);
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100105
106 bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
107
108 T_STORE_INDIRECT_WIDTH_SELECT(DST_DATA_TYPE, 1, N0, PARTIAL_N0, DST_TENSOR_TYPE, dst, cout, dst_stride_y, x_cond, in00, dst_indirect_y);
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100109}
Adnan AlSinan17975a62021-11-08 17:46:39 +0000110#endif /* SCALE_NEAREST_NEIGHBOUR */
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100111
Adnan AlSinan17975a62021-11-08 17:46:39 +0000112#if defined(SCALE_BILINEAR)
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100113//! @cond Doxygen_Suppress
114/** Performs scale on a tensor by interpolating with the BILINEAR method. (NHWC)
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100115 *
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100116 * @note If border mode replicate is used, is should be passed as -DBORDER_MODE_REPLICATE
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100117 * @note Sampling policy to used is passed as -DSAMPLING_POLICY_(TYPE) e.g. -DSAMPLING_POLICY_TOP_LEFT
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100118 * @note The tensor type ("BUFFER" only is supported) of the source tensor must be passed at compile time using -DSRC_TENSOR_TYPE (e.g. -DSRC_TENSOR_TYPE=BUFFER)
119 * @note The tensor type ("BUFFER" only is supported) of the destination tensor must be passed at compile time using -DDST_TENSOR_TYPE (e.g. -DDST_TENSOR_TYPE=BUFFER)
120 * @note The data type of the source tensor must be passed at compile time using -DSRC_DATA_TYPE (e.g. -DSRC_DATA_TYPE=float)
121 * @note The data type of the destination tensor must be passed at compile time using -DDST_DATA_TYPE (e.g. -DDST_DATA_TYPE=float)
122 * @note The number of N0 output channels to process must be passed at compile time using -DN0 (e.g. -DN0=2)
123 * @note The border value value must be passed at compile time using -DCONSTANT_VALUE (e.g. -DCONSTANT_VALUE=0)
124 * @note In case of F32/F16, -DIS_FLOATING_POINT must be passed at compile time
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100125 * @note If the source tensor has more than 3 dimensions, -DBATCHED_EXECUTION must be passed at compile time
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100126 *
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100127 * @note In case of QASYMM8, the following extra information must be passed at compile time:
128 * - The source offset e.g. -DOFFSET=4
129 * - The source scale e.g. -DSCALE=4
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100130 *
Adnan AlSinan17975a62021-11-08 17:46:39 +0000131 * @param[in] src_ptr Pointer to the source tensor. Supported data types: U8/S16/F16/F32.
132 * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes)
133 * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
134 * @param[in] src_stride_w Stride of the source tensor in W dimension (in bytes)
135 * @param[in] src_c The size of the channels dimension of the source tensor
136 * @param[in] src_w The size of the width dimension of the source tensor
137 * @param[in] src_h The size of the height dimension of the source tensor
138 * @param[in] src_n The size of the batches dimension of the source tensor
139 * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor
140 * @param[in] dst_ptr Pointer to the destination tensor. Supported data types: U8/S16/F16/F32.
141 * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes)
142 * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes)
143 * @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes)
144 * @param[in] dst_c The size of the channels dimension of the destination tensor
145 * @param[in] dst_w The size of the width dimension of the destination tensor
146 * @param[in] dst_h The size of the height dimension of the destination tensor
147 * @param[in] dst_n The size of the batches dimension of the destination tensor
148 * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
149 * @param[in] scale_x The scale value to apply on the source width
150 * @param[in] scale_y The scale value to apply on the source height
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100151 */
Adnan AlSinan17975a62021-11-08 17:46:39 +0000152//! @endcond
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100153__kernel void scale_bilinear_nhwc(
Adnan AlSinan17975a62021-11-08 17:46:39 +0000154 TENSOR4D_T(src, SRC_TENSOR_TYPE),
155 TENSOR4D_T(dst, DST_TENSOR_TYPE),
156 const float scale_x,
157 const float scale_y)
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100158{
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100159 const int cout = GET_SPATIAL_IDX(0, N0, PARTIAL_N0); // OFM
160 const int xo = GET_SPATIAL_IDX(1, 1, 0); // WIDTH
161#if defined(BATCHED_EXECUTION)
Adnan AlSinan17975a62021-11-08 17:46:39 +0000162 const int yo = GET_SPATIAL_IDX(2, 1, 0) % dst_h; // HEIGHT
163 const int bout = GET_SPATIAL_IDX(2, 1, 0) / dst_h; // BATCH SIZE IDX
164#else // defined(BATCHED_EXECUTION)
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100165 const int yo = GET_SPATIAL_IDX(2, 1, 0); // HEIGHT
166 const int bout = 0; // BATCH SIZE IDX
Adnan AlSinan17975a62021-11-08 17:46:39 +0000167#endif // defined(BATCHED_EXECUTION)
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100168
169#ifdef SAMPLING_POLICY_TOP_LEFT
Adnan AlSinan17975a62021-11-08 17:46:39 +0000170 float xi_f = (xo * scale_x);
171 float yi_f = (yo * scale_y);
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100172#elif SAMPLING_POLICY_CENTER
Adnan AlSinan17975a62021-11-08 17:46:39 +0000173 float xi_f = ((xo + 0.5f) * scale_x - 0.5f);
174 float yi_f = ((yo + 0.5f) * scale_y - 0.5f);
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100175#else // SAMPLING_POLICY
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100176#error("Unsupported sampling policy");
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100177#endif // SAMPLING_POLICY
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100178
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100179 const int xi = (int)floor(xi_f);
180 const int yi = (int)floor(yi_f);
Giorgio Arena511771f2021-08-19 13:04:56 +0100181
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100182 TILE(SRC_DATA_TYPE, 1, N0, in00);
183 TILE(SRC_DATA_TYPE, 1, N0, in01);
184 TILE(SRC_DATA_TYPE, 1, N0, in10);
185 TILE(SRC_DATA_TYPE, 1, N0, in11);
186
187 // Initialize the tiles to CONSTANT_VALUE
188 in00[0].v = CONSTANT_VALUE;
189 in01[0].v = CONSTANT_VALUE;
190 in10[0].v = CONSTANT_VALUE;
191 in11[0].v = CONSTANT_VALUE;
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100192
193#ifndef BORDER_MODE_REPLICATE
Adnan AlSinan17975a62021-11-08 17:46:39 +0000194 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi, xi, cout, src_w, src_h, 1, 1, true, in00);
195 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi, xi + 1, cout, src_w, src_h, 1, 1, true, in01);
196 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi + 1, xi, cout, src_w, src_h, 1, 1, true, in10);
197 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi + 1, xi + 1, cout, src_w, src_h, 1, 1, true, in11);
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100198#else // BORDER_MODE_REPLICATE
Adnan AlSinan17975a62021-11-08 17:46:39 +0000199 const int xi0 = clamp(xi, 0, (int)src_w - 1);
200 const int yi0 = clamp(yi, 0, (int)src_h - 1);
201 const int xi1 = clamp(xi + 1, 0, (int)src_w - 1);
202 const int yi1 = clamp(yi + 1, 0, (int)src_h - 1);
Giorgio Arena511771f2021-08-19 13:04:56 +0100203
Adnan AlSinan17975a62021-11-08 17:46:39 +0000204 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi0, xi0, cout, src_w, src_h, 1, 1, false, in00);
205 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi0, xi1, cout, src_w, src_h, 1, 1, false, in01);
206 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi1, xi0, cout, src_w, src_h, 1, 1, false, in10);
207 T_LOAD_NHWC_WITH_DILATION(SRC_DATA_TYPE, 1, 1, N0, SRC_TENSOR_TYPE, src, bout, yi1, xi1, cout, src_w, src_h, 1, 1, false, in11);
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100208#endif // BORDER_MODE_REPLICATE
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100209
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100210 TILE(DST_DATA_TYPE, 1, N0, out);
Giorgio Arena511771f2021-08-19 13:04:56 +0100211
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100212#if defined(IS_FLOATING_POINT)
213 const SRC_DATA_TYPE a = (SRC_DATA_TYPE)(xi_f - (float)xi);
214 const SRC_DATA_TYPE b = (SRC_DATA_TYPE)(1.f - a);
215 const SRC_DATA_TYPE a1 = (SRC_DATA_TYPE)(yi_f - (float)yi);
216 const SRC_DATA_TYPE b1 = (SRC_DATA_TYPE)(1.f - a1);
Giorgio Arena511771f2021-08-19 13:04:56 +0100217
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100218 // Calculate the output
219 out[0].v = ((in00[0].v * b * b1) + (in01[0].v * a * b1) + (in10[0].v * b * a1) + (in11[0].v * a * a1));
220#else // defined(IS_FLOATING_POINT)
221 TILE(float, 1, N0, out_f);
222 TILE(float, 1, N0, in00_f);
223 TILE(float, 1, N0, in01_f);
224 TILE(float, 1, N0, in10_f);
225 TILE(float, 1, N0, in11_f);
Adnan AlSinan7075fe22021-07-05 13:12:52 +0100226
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100227 const float a = (xi_f - (float)xi);
228 const float b = (1.f - a);
229 const float a1 = (yi_f - (float)yi);
230 const float b1 = (1.f - a1);
231
232 // Dequantize
233 LOOP_UNROLLING(int, n0, 0, 1, N0,
234 {
235 in00_f[0].s[n0] = ((float)in00[0].s[n0] - (float)OFFSET) * (float)SCALE;
236 in01_f[0].s[n0] = ((float)in01[0].s[n0] - (float)OFFSET) * (float)SCALE;
237 in10_f[0].s[n0] = ((float)in10[0].s[n0] - (float)OFFSET) * (float)SCALE;
238 in11_f[0].s[n0] = ((float)in11[0].s[n0] - (float)OFFSET) * (float)SCALE;
239 })
240
241 // Calculate the output in the floating-point domain
242 out_f[0].v = ((in00_f[0].v * b * b1) + (in01_f[0].v * a * b1) + (in10_f[0].v * b * a1) + (in11_f[0].v * a * a1));
243
244 // Quantize
245 LOOP_UNROLLING(int, n0, 0, 1, N0,
246 {
247 out[0].s[n0] = CONVERT_SAT(out_f[0].s[n0] / (float)SCALE + (float)OFFSET, DST_DATA_TYPE);
248 })
249#endif // defined(IS_FLOATING_POINT)
250
251 TILE(uint, 1, 1, dst_indirect_y);
252
253 // Calculate the destination indirect Y
Adnan AlSinan17975a62021-11-08 17:46:39 +0000254 dst_indirect_y[0].v = xo + (yo * (int)(dst_w)) + bout * (int)(dst_w * dst_h);
Gian Marco Iodice7bc1a772021-09-08 17:14:19 +0100255
256 bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0;
257
258 T_STORE_INDIRECT_WIDTH_SELECT(DST_DATA_TYPE, 1, N0, PARTIAL_N0, DST_TENSOR_TYPE, dst, cout, dst_stride_y, x_cond, out, dst_indirect_y);
Adnan AlSinan17975a62021-11-08 17:46:39 +0000259}
260#endif /* SCALE_BILINEAR */