blob: 28a10872245bd2156fdfedd58bc14becc0f61ea4 [file] [log] [blame]
Sheri Zhang23adc4c2021-01-05 12:48:45 +00001/*
Gunes Bayir0eed3052022-09-04 21:00:10 +01002 * Copyright (c) 2021-2022 Arm Limited.
Sheri Zhang23adc4c2021-01-05 12:48:45 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Sheri Zhang360f5762021-01-20 12:20:20 +000024#ifndef SRC_CORE_NEON_KERNELS_SCALE_LIST_H
25#define SRC_CORE_NEON_KERNELS_SCALE_LIST_H
26
Sheri Zhang23adc4c2021-01-05 12:48:45 +000027#include "arm_compute/core/Helpers.h"
Sheri Zhang23adc4c2021-01-05 12:48:45 +000028#include "arm_compute/core/Window.h"
Sheri Zhang23adc4c2021-01-05 12:48:45 +000029#include "src/core/NEON/wrapper/wrapper.h"
Sheri Zhang23adc4c2021-01-05 12:48:45 +000030#include "src/core/utils/ScaleUtils.h"
31#include "support/Rounding.h"
32
Sheri Zhang23adc4c2021-01-05 12:48:45 +000033namespace arm_compute
34{
Sheri Zhang360f5762021-01-20 12:20:20 +000035namespace cpu
Sheri Zhang23adc4c2021-01-05 12:48:45 +000036{
Sheri Zhang360f5762021-01-20 12:20:20 +000037#define DECLARE_SCALE_KERNEL(func_name) \
38 void func_name(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy, \
39 InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset, \
40 bool align_corners, const Window &window)
41
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +010042DECLARE_SCALE_KERNEL(s16_neon_scale);
43DECLARE_SCALE_KERNEL(u8_neon_scale);
Gunes Bayirc4f27432022-09-11 15:59:19 +010044DECLARE_SCALE_KERNEL(s8_neon_scale);
Sheri Zhang360f5762021-01-20 12:20:20 +000045DECLARE_SCALE_KERNEL(qasymm8_neon_scale);
46DECLARE_SCALE_KERNEL(qasymm8_signed_neon_scale);
47
48#undef DECLARE_SCALE_KERNEL
49
50template <typename T>
51void nearest_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, float sampling_offset,
52 bool align_corners, const Window &window)
Sheri Zhang23adc4c2021-01-05 12:48:45 +000053{
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +010054 ARM_COMPUTE_UNUSED(offsets);
Sheri Zhang23adc4c2021-01-05 12:48:45 +000055
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +010056 // Compute the ratio between source and destination dimensions
57 const float scale_x = scale_utils::calculate_resize_ratio(src->info()->dimension(1), dst->info()->dimension(1), align_corners);
58 const float scale_y = scale_utils::calculate_resize_ratio(src->info()->dimension(2), dst->info()->dimension(2), align_corners);
Sheri Zhang23adc4c2021-01-05 12:48:45 +000059
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +010060 const int in_stride_y = src->info()->strides_in_bytes()[1];
61 const int in_stride_z = src->info()->strides_in_bytes()[2];
62 const int in_stride_w = src->info()->strides_in_bytes()[3];
63 const int out_stride_y = dst->info()->strides_in_bytes()[1];
64 const int out_stride_z = dst->info()->strides_in_bytes()[2];
65 const int out_stride_w = dst->info()->strides_in_bytes()[3];
66 const int out_dim_ch = dst->info()->dimension(0);
67 const int step_cout = 16 / sizeof(T);
Sheri Zhang23adc4c2021-01-05 12:48:45 +000068
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +010069 Window window_execution = window;
70 window_execution.set(Window::DimX, Window::Dimension(0, 1, 1));
71 Window win_in_out(window);
72 win_in_out.set(Window::DimY, Window::Dimension(0, 0, 0));
73 win_in_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
74 Iterator in(src, win_in_out);
75 Iterator out(dst, win_in_out);
Sheri Zhang23adc4c2021-01-05 12:48:45 +000076
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +010077 const int xo_start = window_execution.y().start();
78 const int xo_end = window_execution.y().end();
79 const int xo_step = window_execution.y().step();
80 const int yo_start = window_execution.z().start();
81 const int yo_end = window_execution.z().end();
82 const int yo_step = window_execution.z().step();
83 const int bo_start = window_execution[3].start();
84 const int bo_end = window_execution[3].end();
85 const int bo_step = window_execution[3].step();
86
87 for(int bo = bo_start; bo < bo_end; bo += bo_step)
Sheri Zhang23adc4c2021-01-05 12:48:45 +000088 {
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +010089 const uint8_t *in_ptr_base = in.ptr() + bo * in_stride_w;
90 uint8_t *out_ptr_base = out.ptr() + bo * out_stride_w;
Sheri Zhang23adc4c2021-01-05 12:48:45 +000091
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +010092 for(int yo = yo_start; yo < yo_end; yo += yo_step)
Sheri Zhang23adc4c2021-01-05 12:48:45 +000093 {
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +010094 // Floating-point coordinate
95 float yi_f = ((yo + sampling_offset) * scale_y);
96 int yi = 0;
97 if(align_corners)
98 {
99 yi = utils::rounding::round_half_away_from_zero(yi_f);
100 }
101 else
102 {
103 yi = static_cast<int>(std::floor(yi_f));
104 }
105
106 for(int xo = xo_start; xo < xo_end; xo += xo_step)
107 {
108 // Floating-point coordinate
109 float xi_f = ((xo + sampling_offset) * scale_x);
110 int xi = 0;
111 if(align_corners)
112 {
113 xi = utils::rounding::round_half_away_from_zero(xi_f);
114 }
115 else
116 {
117 xi = static_cast<int>(std::floor(xi_f));
118 }
119
120 const uint8_t *in_ptr = in_ptr_base + xi * in_stride_y + yi * in_stride_z;
121 uint8_t *out_ptr = out_ptr_base + xo * out_stride_y + yo * out_stride_z;
122
123 int cout = 0;
124 for(; cout <= (out_dim_ch - step_cout); cout += step_cout)
125 {
126 auto out0 = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T)));
127 wrapper::vstore(reinterpret_cast<T *>(out_ptr + cout * sizeof(T)), out0);
128 }
129
130 for(; cout < out_dim_ch; ++cout)
131 {
Gunes Bayir0eed3052022-09-04 21:00:10 +0100132 auto out0 = *(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T)));
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100133 *(reinterpret_cast<T *>(out_ptr + cout * sizeof(T))) = out0;
134 }
135 }
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000136 }
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100137 }
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000138}
139
Sheri Zhang360f5762021-01-20 12:20:20 +0000140template <typename T>
141void bilinear_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy,
142 BorderMode border_mode, PixelValue constant_border_value, float sampling_offset,
143 bool align_corners, const Window &window)
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000144{
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100145 ARM_COMPUTE_UNUSED(offsets);
146 ARM_COMPUTE_UNUSED(dx);
147 ARM_COMPUTE_UNUSED(dy);
148 using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000149
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100150 // Compute the ratio between source and destination dimensions
151 const float scale_x = scale_utils::calculate_resize_ratio(src->info()->dimension(1), dst->info()->dimension(1), align_corners);
152 const float scale_y = scale_utils::calculate_resize_ratio(src->info()->dimension(2), dst->info()->dimension(2), align_corners);
153
154 const int in_stride_y = src->info()->strides_in_bytes()[1];
155 const int in_stride_z = src->info()->strides_in_bytes()[2];
156 const int in_stride_w = src->info()->strides_in_bytes()[3];
157 const int out_stride_y = dst->info()->strides_in_bytes()[1];
158 const int out_stride_z = dst->info()->strides_in_bytes()[2];
159 const int out_stride_w = dst->info()->strides_in_bytes()[3];
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000160 const int in_dim_w = src->info()->dimension(1);
161 const int in_dim_h = src->info()->dimension(2);
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100162 const int out_dim_ch = dst->info()->dimension(0);
163 const int step_cout = 16 / sizeof(T);
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000164
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100165 Window window_execution = window;
166 window_execution.set(Window::DimX, Window::Dimension(0, 1, 1));
167 Window win_in_out(window);
168 win_in_out.set(Window::DimY, Window::Dimension(0, 0, 0));
169 win_in_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
170 Iterator in(src, win_in_out);
171 Iterator out(dst, win_in_out);
172
173 const int xo_start = window_execution.y().start();
174 const int xo_end = window_execution.y().end();
175 const int xo_step = window_execution.y().step();
176 const int yo_start = window_execution.z().start();
177 const int yo_end = window_execution.z().end();
178 const int yo_step = window_execution.z().step();
179 const int bo_start = window_execution[3].start();
180 const int bo_end = window_execution[3].end();
181 const int bo_step = window_execution[3].step();
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000182
183 if(border_mode == BorderMode::CONSTANT)
184 {
Sheri Zhang360f5762021-01-20 12:20:20 +0000185#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
186 using ConstType = typename std::conditional<std::is_same<T, float16_t>::value, half, T>::type;
187#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
188 using ConstType = T;
189#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
190 const T const_border_value = static_cast<T>(constant_border_value.get<ConstType>());
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100191
192 for(int bo = bo_start; bo < bo_end; bo += bo_step)
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000193 {
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100194 const uint8_t *in_ptr_base = in.ptr() + bo * in_stride_w;
195 uint8_t *out_ptr_base = out.ptr() + bo * out_stride_w;
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000196
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100197 for(int yo = yo_start; yo < yo_end; yo += yo_step)
198 {
199 // Floating-point coordinate
200 const float yi_f = ((yo + sampling_offset) * scale_y - sampling_offset);
201 // Integer coordinate
202 const auto yi = static_cast<int>(std::floor(yi_f));
203 // Weight for the y coordinate
204 const auto a1 = (yi_f - static_cast<float>(yi));
205 const auto b1 = (1.f - a1);
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000206
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100207 for(int xo = xo_start; xo < xo_end; xo += xo_step)
208 {
209 // Floating-point coordinate
210 const float xi_f = ((xo + sampling_offset) * scale_x - sampling_offset);
211 // Integer coordinate
212 const auto xi = static_cast<int>(std::floor(xi_f));
213 // Weight for the x coordinate
214 const auto a = (xi_f - static_cast<float>(xi));
215 const auto b = (1.f - a);
216
217 const auto s00_s = static_cast<T>(b * b1);
218 const auto s01_s = static_cast<T>(a * b1);
219 const auto s10_s = static_cast<T>(b * a1);
220 const auto s11_s = static_cast<T>(a * a1);
221
222 const uint8_t *in_ptr = in_ptr_base + xi * in_stride_y + yi * in_stride_z;
223 uint8_t *out_ptr = out_ptr_base + xo * out_stride_y + yo * out_stride_z;
224
225 int cout = 0;
226 for(; cout <= (out_dim_ch - step_cout); cout += step_cout)
227 {
228 auto in00 = wrapper::vdup_n(static_cast<T>(const_border_value), ExactTagType{});
229 auto in01 = wrapper::vdup_n(static_cast<T>(const_border_value), ExactTagType{});
230 auto in10 = wrapper::vdup_n(static_cast<T>(const_border_value), ExactTagType{});
231 auto in11 = wrapper::vdup_n(static_cast<T>(const_border_value), ExactTagType{});
232 if((yi >= 0) && (yi < in_dim_h))
233 {
234 if((xi >= 0) && (xi < in_dim_w))
235 {
236 in00 = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T)));
237 }
238 if(((xi + 1) >= 0) && ((xi + 1) < in_dim_w))
239 {
240 in01 = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + in_stride_y));
241 }
242 }
243 if(((yi + 1) >= 0) && ((yi + 1) < in_dim_h))
244 {
245 if((xi >= 0) && (xi < in_dim_w))
246 {
247 in10 = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + in_stride_z));
248 }
249 if(((xi + 1) >= 0) && ((xi + 1) < in_dim_w))
250 {
251 in11 = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + in_stride_y + in_stride_z));
252 }
253 }
254
255 const auto s00 = wrapper::vdup_n(s00_s, ExactTagType{});
256 const auto s01 = wrapper::vdup_n(s01_s, ExactTagType{});
257 const auto s10 = wrapper::vdup_n(s10_s, ExactTagType{});
258 const auto s11 = wrapper::vdup_n(s11_s, ExactTagType{});
259 auto out0 = wrapper::vdup_n(static_cast<T>(0), ExactTagType{});
260 out0 = wrapper::vmla(out0, in00, s00);
261 out0 = wrapper::vmla(out0, in01, s01);
262 out0 = wrapper::vmla(out0, in10, s10);
263 out0 = wrapper::vmla(out0, in11, s11);
264 wrapper::vstore(reinterpret_cast<T *>(out_ptr + cout * sizeof(T)), out0);
265 }
266
267 for(; cout < out_dim_ch; ++cout)
268 {
269 auto in00 = static_cast<T>(const_border_value);
270 auto in01 = static_cast<T>(const_border_value);
271 auto in10 = static_cast<T>(const_border_value);
272 auto in11 = static_cast<T>(const_border_value);
273 if((yi >= 0) && (yi < in_dim_h))
274 {
275 if((xi >= 0) && (xi < in_dim_w))
276 {
277 in00 = *(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T)));
278 }
279 if(((xi + 1) >= 0) && ((xi + 1) < in_dim_w))
280 {
281 in01 = *(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + in_stride_y));
282 }
283 }
284 if(((yi + 1) >= 0) && ((yi + 1) < in_dim_h))
285 {
286 if((xi >= 0) && (xi < in_dim_w))
287 {
288 in10 = *(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + in_stride_z));
289 }
290 if(((xi + 1) >= 0) && ((xi + 1) < in_dim_w))
291 {
292 in11 = *(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + in_stride_y + in_stride_z));
293 }
294 }
295 auto out0 = static_cast<T>(0);
296 out0 += in00 * s00_s;
297 out0 += in01 * s01_s;
298 out0 += in10 * s10_s;
299 out0 += in11 * s11_s;
300 *(reinterpret_cast<T *>(out_ptr + cout * sizeof(T))) = out0;
301 }
302 }
303 }
304 }
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000305 }
306 else if(border_mode == BorderMode::REPLICATE)
307 {
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100308 for(int bo = bo_start; bo < bo_end; bo += bo_step)
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000309 {
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100310 const uint8_t *in_ptr = in.ptr() + bo * in_stride_w;
311 uint8_t *out_ptr = out.ptr() + bo * out_stride_w;
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000312
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100313 for(int yo = yo_start; yo < yo_end; yo += yo_step)
314 {
315 // Floating-point coordinate
316 const float yi_f = ((yo + sampling_offset) * scale_y - sampling_offset);
317 // Integer coordinate
318 const auto yi = static_cast<int>(std::floor(yi_f));
319 // Weight for the y coordinate
320 const auto a1 = (yi_f - static_cast<float>(yi));
321 const auto b1 = (1.f - a1);
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000322
Gunes Bayir0eed3052022-09-04 21:00:10 +0100323 const int yi0 = utility::clamp<int>(yi, 0, in_dim_h - 1);
324 const int yi1 = utility::clamp<int>(yi + 1, 0, in_dim_h - 1);
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000325
Gunes Bayir0eed3052022-09-04 21:00:10 +0100326 const int yi0_offset = yi0 * in_stride_z;
327 const int yi1_offset = yi1 * in_stride_z;
328
329 const int y_offset = yo * out_stride_z;
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100330 for(int xo = xo_start; xo < xo_end; xo += xo_step)
331 {
332 // Floating-point coordinate
333 const float xi_f = ((xo + sampling_offset) * scale_x - sampling_offset);
334 // Integer coordinate
335 const auto xi = static_cast<int>(std::floor(xi_f));
336 // Weight for the x coordinate
337 const auto a = (xi_f - static_cast<float>(xi));
338 const auto b = (1.f - a);
339
340 const auto s00_s = static_cast<T>(b * b1);
341 const auto s01_s = static_cast<T>(a * b1);
342 const auto s10_s = static_cast<T>(b * a1);
343 const auto s11_s = static_cast<T>(a * a1);
344
Gunes Bayir0eed3052022-09-04 21:00:10 +0100345 const auto s00 = wrapper::vdup_n(s00_s, ExactTagType{});
346 const auto s01 = wrapper::vdup_n(s01_s, ExactTagType{});
347 const auto s10 = wrapper::vdup_n(s10_s, ExactTagType{});
348 const auto s11 = wrapper::vdup_n(s11_s, ExactTagType{});
349
350 const int xi0 = utility::clamp<int>(xi, 0, in_dim_w - 1);
351 const int xi1 = utility::clamp<int>(xi + 1, 0, in_dim_w - 1);
352
353 const int xi0_offset = xi0 * in_stride_y;
354 const int xi1_offset = xi1 * in_stride_y;
355
356 const int offset = xo * out_stride_y + y_offset;
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100357
358 int cout = 0;
359 for(; cout <= (out_dim_ch - step_cout); cout += step_cout)
360 {
Gunes Bayirc4f27432022-09-11 15:59:19 +0100361 const auto in00 = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + xi0_offset + yi0_offset));
362 const auto in01 = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + xi1_offset + yi0_offset));
363 const auto in10 = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + xi0_offset + yi1_offset));
364 const auto in11 = wrapper::vloadq(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + xi1_offset + yi1_offset));
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100365
Gunes Bayir0eed3052022-09-04 21:00:10 +0100366 auto out0 = wrapper::vmul(in00, s00);
367 out0 = wrapper::vmla(out0, in01, s01);
368 out0 = wrapper::vmla(out0, in10, s10);
369 out0 = wrapper::vmla(out0, in11, s11);
370 wrapper::vstore(reinterpret_cast<T *>(out_ptr + offset + cout * sizeof(T)), out0);
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100371 }
372
373 for(; cout < out_dim_ch; ++cout)
374 {
Gunes Bayirc4f27432022-09-11 15:59:19 +0100375 const T in00 = *(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + xi0_offset + yi0_offset));
376 const T in01 = *(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + xi1_offset + yi0_offset));
377 const T in10 = *(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + xi0_offset + yi1_offset));
378 const T in11 = *(reinterpret_cast<const T *>(in_ptr + cout * sizeof(T) + xi1_offset + yi1_offset));
Gunes Bayir0eed3052022-09-04 21:00:10 +0100379
380 T out0 = in00 * s00_s;
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100381 out0 += in01 * s01_s;
382 out0 += in10 * s10_s;
383 out0 += in11 * s11_s;
Gunes Bayir0eed3052022-09-04 21:00:10 +0100384 *(reinterpret_cast<T *>(out_ptr + offset + cout * sizeof(T))) = out0;
Gian Marco Iodice8b8405a2021-10-01 17:48:02 +0100385 }
386 }
387 }
388 }
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000389 }
390 else
391 {
392 ARM_COMPUTE_ERROR("Not implemented");
393 }
394}
Sheri Zhang360f5762021-01-20 12:20:20 +0000395
396template <typename T>
397void common_neon_scale(const ITensor *src, ITensor *dst, const ITensor *offsets, const ITensor *dx, const ITensor *dy,
398 InterpolationPolicy policy, BorderMode border_mode, PixelValue constant_border_value, float sampling_offset,
399 bool align_corners, const Window &window)
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000400{
401 if(policy == InterpolationPolicy::BILINEAR)
402 {
Sheri Zhang360f5762021-01-20 12:20:20 +0000403 bilinear_neon_scale<T>(src, dst, offsets, dx, dy, border_mode, constant_border_value, sampling_offset, align_corners, window);
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000404 }
405 else if(policy == InterpolationPolicy::NEAREST_NEIGHBOR)
406 {
Sheri Zhang360f5762021-01-20 12:20:20 +0000407 nearest_neon_scale<T>(src, dst, offsets, sampling_offset, align_corners, window);
Sheri Zhang23adc4c2021-01-05 12:48:45 +0000408 }
409}
410} // namespace cpu
Sheri Zhang360f5762021-01-20 12:20:20 +0000411} // namespace arm_compute
412
413#endif /* SRC_CORE_NEON_KERNELS_SCALE_LIST_H */