blob: 05c7742b0310a4b2ecf1674958ff44656d0995a6 [file] [log] [blame]
Georgios Pinitas11d84152021-04-28 10:20:18 +01001/*
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +01002 * Copyright (c) 2016-2023 Arm Limited.
Georgios Pinitas11d84152021-04-28 10:20:18 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas7891a732021-08-20 21:39:25 +010024#include "src/cpu/kernels/CpuCastKernel.h"
Georgios Pinitas11d84152021-04-28 10:20:18 +010025
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/Validate.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010031
32#include "src/core/common/Registrars.h"
Georgios Pinitas11d84152021-04-28 10:20:18 +010033#include "src/core/CPP/Validate.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010034#include "src/core/helpers/AutoConfiguration.h"
35#include "src/core/helpers/WindowHelpers.h"
Georgios Pinitas11d84152021-04-28 10:20:18 +010036#include "src/core/NEON/NEFixedPoint.h"
37#include "src/core/NEON/NEMath.h"
38#include "src/core/NEON/wrapper/wrapper.h"
Yair Schwarzbaum298b2c02022-02-01 08:55:56 +020039#include "src/cpu/kernels/cast/list.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010040#include "support/SaturateCast.h"
Yair Schwarzbaum298b2c02022-02-01 08:55:56 +020041
Georgios Pinitas11d84152021-04-28 10:20:18 +010042namespace arm_compute
43{
44namespace cpu
45{
46namespace kernels
47{
48namespace
49{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010050static const std::vector<CpuCastKernel::CastKernel> available_kernels = {
51 {"neon_qs8_cast",
52 [](const CastDataTypeISASelectorData &data)
53 { return data.src_dt == DataType::QASYMM8_SIGNED && data.dst_dt == DataType::F16 && data.isa.fp16; },
54 REGISTER_FP16_NEON(arm_compute::cpu::neon_qasymm8_signed_to_fp16_cast)},
55 {"neon_qu8_cast",
56 [](const CastDataTypeISASelectorData &data)
57 { return data.src_dt == DataType::QASYMM8 && data.dst_dt == DataType::F16 && data.isa.fp16; },
58 REGISTER_FP16_NEON(arm_compute::cpu::neon_u8_to_fp16_cast)},
59 {"neon_u8_cast",
60 [](const CastDataTypeISASelectorData &data)
61 { return data.src_dt == DataType::U8 && data.dst_dt == DataType::F16 && data.isa.fp16; },
62 REGISTER_FP16_NEON(arm_compute::cpu::neon_u8_to_fp16_cast)},
63 {"neon_fp16_cast",
64 [](const CastDataTypeISASelectorData &data) { return data.src_dt == DataType::F16 && data.isa.fp16; },
65 REGISTER_FP16_NEON(arm_compute::cpu::neon_fp16_to_other_dt_cast)},
66 {"neon_fp32_to_fp16_cast",
67 [](const CastDataTypeISASelectorData &data)
68 { return data.src_dt == DataType::F32 && data.dst_dt == DataType::F16 && data.isa.fp16; },
69 REGISTER_FP16_NEON(arm_compute::cpu::neon_fp32_to_fp16_cast)},
70 {"neon_s32_cast",
71 [](const CastDataTypeISASelectorData &data)
72 { return data.src_dt == DataType::S32 && data.dst_dt == DataType::F16 && data.isa.fp16; },
73 REGISTER_FP16_NEON(arm_compute::cpu::neon_s32_to_fp16_cast)},
Yair Schwarzbaum298b2c02022-02-01 08:55:56 +020074};
75
Georgios Pinitas11d84152021-04-28 10:20:18 +010076Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy)
77{
78 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src);
79 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +010080 ARM_COMPUTE_UNUSED(policy);
81 ARM_COMPUTE_RETURN_ERROR_ON(src == dst);
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +010082#ifdef __aarch64__
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010083 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8,
84 DataType::U8, DataType::S16, DataType::U16, DataType::F16,
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +010085 DataType::F32, DataType::S32, DataType::S64, DataType::U64);
Pablo Marquez Tello29e27b02023-08-03 14:47:31 +010086
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010087 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8,
88 DataType::U8, DataType::S16, DataType::U16, DataType::F16,
Pablo Marquez Tello29e27b02023-08-03 14:47:31 +010089 DataType::U32, DataType::S32, DataType::F32, DataType::S64);
90
91#else // __aarch64__
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010092 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8,
93 DataType::U8, DataType::S16, DataType::U16, DataType::F16,
Georgios Pinitas11d84152021-04-28 10:20:18 +010094 DataType::F32, DataType::S32);
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +010095
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010096 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8,
97 DataType::U8, DataType::S16, DataType::U16, DataType::F16,
Georgios Pinitas11d84152021-04-28 10:20:18 +010098 DataType::U32, DataType::S32, DataType::F32);
Pablo Marquez Tello29e27b02023-08-03 14:47:31 +010099#endif // __aarch64__
Georgios Pinitas11d84152021-04-28 10:20:18 +0100100
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100101 ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::QASYMM8_SIGNED &&
102 (dst->data_type() != DataType::S16 && dst->data_type() != DataType::S32 &&
103 dst->data_type() != DataType::F16 && dst->data_type() != DataType::F32),
Georgios Pinitas11d84152021-04-28 10:20:18 +0100104 "Only data_types supported [in] QASYMM8 -> [out] U16, S16, S32, F16, F32");
105
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100106 ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::QASYMM8 &&
107 (dst->data_type() != DataType::S16 && dst->data_type() != DataType::U16 &&
108 dst->data_type() != DataType::S32 && dst->data_type() != DataType::F16 &&
109 dst->data_type() != DataType::F32),
Georgios Pinitas11d84152021-04-28 10:20:18 +0100110 "Only data_types supported [in] QASYMM8 -> [out] U16, S16, S32, F16, F32");
111
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100112 ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::U8 &&
113 (dst->data_type() != DataType::S16 && dst->data_type() != DataType::U16 &&
114 dst->data_type() != DataType::S32 && dst->data_type() != DataType::F16 &&
115 dst->data_type() != DataType::F32),
Georgios Pinitas11d84152021-04-28 10:20:18 +0100116 "Only data_types supported [in] U8 -> [out] U16, S16, S32, F16, F32");
117
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100118 ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::U16 &&
119 (dst->data_type() != DataType::U8 && dst->data_type() != DataType::U32),
Georgios Pinitas11d84152021-04-28 10:20:18 +0100120 "Only data_types supported [in] U16 -> [out] U8, U32");
121
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100122 ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::S16 &&
123 (dst->data_type() != DataType::QASYMM8_SIGNED &&
124 dst->data_type() != DataType::U8 && dst->data_type() != DataType::S32),
Georgios Pinitas11d84152021-04-28 10:20:18 +0100125 "Only data_types supported [in] S16 -> [out] U8, S32");
126
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100127 ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::F16 &&
128 (dst->data_type() != DataType::QASYMM8_SIGNED &&
129 dst->data_type() != DataType::QASYMM8 && dst->data_type() != DataType::U8 &&
130 dst->data_type() != DataType::F32 && dst->data_type() != DataType::S32),
Georgios Pinitas11d84152021-04-28 10:20:18 +0100131 "Only data_types supported [in] F16 -> [out] QASYMM8, F32, S32, U8");
132
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100133 ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::F32 &&
134 (dst->data_type() != DataType::QASYMM8_SIGNED &&
135 dst->data_type() != DataType::QASYMM8 && dst->data_type() != DataType::F16 &&
136 dst->data_type() != DataType::S32 && dst->data_type() != DataType::U8),
Adnan AlSinan40a9d3e2023-09-15 13:46:17 +0100137 "Only data_types supported [in] F32 -> [out] QASYMM8, F16, S32, U8");
Georgios Pinitas11d84152021-04-28 10:20:18 +0100138
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100139 ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::S32 &&
140 (dst->data_type() != DataType::QASYMM8_SIGNED &&
141 dst->data_type() != DataType::QASYMM8 && dst->data_type() != DataType::F16 &&
142 dst->data_type() != DataType::F32 && dst->data_type() != DataType::U8 &&
143 dst->data_type() != DataType::S64),
Pablo Marquez Tello29e27b02023-08-03 14:47:31 +0100144 "Only data_types supported [in] S32 -> [out] QASYMM8, F16, F32, U8, S64");
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100145#ifdef __aarch64__
Pablo Marquez Tello29e27b02023-08-03 14:47:31 +0100146 ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::S64 && dst->data_type() != DataType::F32,
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100147 "Only data_types supported [in] S64 -> [out] F32");
148
Pablo Marquez Tello29e27b02023-08-03 14:47:31 +0100149 ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == DataType::U64 && dst->data_type() != DataType::F32,
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100150 "Only data_types supported [in] U64 -> [out] F32");
151#endif // __aarch64__
Georgios Pinitas11d84152021-04-28 10:20:18 +0100152
153 // Validate in case of configured dst
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100154 if (dst->total_size() > 0)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100155 {
156 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
157 }
158
159 return Status{};
160}
161} // namespace
162
163void CpuCastKernel::configure(const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy)
164{
165 ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
166
167 // Auto initialize dst shape if not initialized (We can only auto-configure the shape, datatype must be given)
168 set_shape_if_empty(*dst, src->tensor_shape());
169
170 _policy = policy;
171
172 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, policy));
173
174 // Configure kernel window
175 Window win = calculate_max_window(*src, Steps());
176
177 ICPPKernel::configure(win);
178}
179
180Status CpuCastKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, ConvertPolicy policy)
181{
182 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, policy));
183 return Status{};
184}
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100185#ifdef __aarch64__
186namespace
187{
188template <typename T1, typename T2>
189inline void internal_neon_convert(const T1 *src_ptr, T2 *dst_ptr)
190{
191 ARM_COMPUTE_UNUSED(src_ptr);
192 ARM_COMPUTE_UNUSED(dst_ptr);
193}
194
195template <>
Pablo Marquez Tello29e27b02023-08-03 14:47:31 +0100196inline void internal_neon_convert<int32_t, int64_t>(const int32_t *src_ptr, int64_t *dst_ptr)
197{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100198 const int32x4x4_t texels = {
199 {vld1q_s32(src_ptr), vld1q_s32(src_ptr + 4), vld1q_s32(src_ptr + 8), vld1q_s32(src_ptr + 12)}};
Pablo Marquez Tello29e27b02023-08-03 14:47:31 +0100200 vst1q_s64(dst_ptr, vmovl_s32(vget_low_s32(texels.val[0])));
201 vst1q_s64(dst_ptr + 2, vmovl_s32(vget_high_s32(texels.val[0])));
202 vst1q_s64(dst_ptr + 4, vmovl_s32(vget_low_s32(texels.val[1])));
203 vst1q_s64(dst_ptr + 6, vmovl_s32(vget_high_s32(texels.val[1])));
204 vst1q_s64(dst_ptr + 8, vmovl_s32(vget_low_s32(texels.val[2])));
205 vst1q_s64(dst_ptr + 10, vmovl_s32(vget_high_s32(texels.val[2])));
206 vst1q_s64(dst_ptr + 12, vmovl_s32(vget_low_s32(texels.val[3])));
207 vst1q_s64(dst_ptr + 14, vmovl_s32(vget_high_s32(texels.val[3])));
208}
209
210template <>
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100211inline void internal_neon_convert<int64_t, float>(const int64_t *src_ptr, float *dst_ptr)
212{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100213 const float64x2x4_t texels0 = {{vcvtq_f64_s64(vld1q_s64(src_ptr)), vcvtq_f64_s64(vld1q_s64(src_ptr + 2)),
214 vcvtq_f64_s64(vld1q_s64(src_ptr + 4)), vcvtq_f64_s64(vld1q_s64(src_ptr + 6))}};
215 const float64x2x4_t texels1 = {{vcvtq_f64_s64(vld1q_s64(src_ptr + 8)), vcvtq_f64_s64(vld1q_s64(src_ptr + 10)),
216 vcvtq_f64_s64(vld1q_s64(src_ptr + 12)), vcvtq_f64_s64(vld1q_s64(src_ptr + 14))}};
217 const float32x4x4_t texels = {{vcombine_f32(vcvt_f32_f64(texels0.val[0]), vcvt_f32_f64(texels0.val[1])),
218 vcombine_f32(vcvt_f32_f64(texels0.val[2]), vcvt_f32_f64(texels0.val[3])),
219 vcombine_f32(vcvt_f32_f64(texels1.val[0]), vcvt_f32_f64(texels1.val[1])),
220 vcombine_f32(vcvt_f32_f64(texels1.val[2]), vcvt_f32_f64(texels1.val[3]))}};
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100221 vst1q_f32(dst_ptr, texels.val[0]);
222 vst1q_f32(dst_ptr + 4, texels.val[1]);
223 vst1q_f32(dst_ptr + 8, texels.val[2]);
224 vst1q_f32(dst_ptr + 12, texels.val[3]);
225}
226
227template <>
228inline void internal_neon_convert<uint64_t, float>(const uint64_t *src_ptr, float *dst_ptr)
229{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100230 const float64x2x4_t texels0 = {{vcvtq_f64_u64(vld1q_u64(src_ptr)), vcvtq_f64_u64(vld1q_u64(src_ptr + 2)),
231 vcvtq_f64_u64(vld1q_u64(src_ptr + 4)), vcvtq_f64_u64(vld1q_u64(src_ptr + 6))}};
232 const float64x2x4_t texels1 = {{vcvtq_f64_u64(vld1q_u64(src_ptr + 8)), vcvtq_f64_u64(vld1q_u64(src_ptr + 10)),
233 vcvtq_f64_u64(vld1q_u64(src_ptr + 12)), vcvtq_f64_u64(vld1q_u64(src_ptr + 14))}};
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100234
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100235 const float32x4x4_t texels = {{vcombine_f32(vcvt_f32_f64(texels0.val[0]), vcvt_f32_f64(texels0.val[1])),
236 vcombine_f32(vcvt_f32_f64(texels0.val[2]), vcvt_f32_f64(texels0.val[3])),
237 vcombine_f32(vcvt_f32_f64(texels1.val[0]), vcvt_f32_f64(texels1.val[1])),
238 vcombine_f32(vcvt_f32_f64(texels1.val[2]), vcvt_f32_f64(texels1.val[3]))}};
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100239
240 vst1q_f32(dst_ptr, texels.val[0]);
241 vst1q_f32(dst_ptr + 4, texels.val[1]);
242 vst1q_f32(dst_ptr + 8, texels.val[2]);
243 vst1q_f32(dst_ptr + 12, texels.val[3]);
244}
245
246template <typename T1, typename T2>
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100247inline void
248convert64(Iterator &src, Iterator &dst, const Window &win, int window_start_x, int window_end_x, int window_step_x)
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100249{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100250 execute_window_loop(
251 win,
252 [&](const Coordinates &)
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100253 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100254 const auto src_ptr = reinterpret_cast<const T1 *>(src.ptr());
255 const auto dst_ptr = reinterpret_cast<T2 *>(dst.ptr());
256 int x = window_start_x;
257 for (; x <= (window_end_x - window_step_x); x += window_step_x)
258 {
259 internal_neon_convert<T1, T2>(src_ptr + x, dst_ptr + x);
260 }
261 for (; x < window_end_x; ++x)
262 {
263 *(dst_ptr + x) = static_cast<T2>(*(src_ptr + x));
264 }
265 },
266 src, dst);
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100267}
268} // namespace
269#endif // __aarch64__
Georgios Pinitas11d84152021-04-28 10:20:18 +0100270
271void CpuCastKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
272{
273 ARM_COMPUTE_UNUSED(info);
274 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
275 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
276
277 const auto window_start_x = static_cast<int>(window.x().start());
278 const auto window_end_x = static_cast<int>(window.x().end());
279 const int window_step_x = 16;
280
281 const ITensor *_src = tensors.get_const_tensor(TensorType::ACL_SRC);
282 ITensor *_dst = tensors.get_tensor(TensorType::ACL_DST);
283 ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
284 ARM_COMPUTE_ERROR_ON(_src == _dst);
285
286 ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
287
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100288 Window win{window};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100289 win.set(Window::DimX, Window::Dimension(0, 1, 1));
290
291 Iterator src(_src, win);
292 Iterator dst(_dst, win);
293
Adnan AlSinan40a9d3e2023-09-15 13:46:17 +0100294 /*ukernel runs only when using fp16, so we validate it isn't a nullptr only before using it */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100295 const auto *uk = CpuCastKernel::get_implementation(
296 CastDataTypeISASelectorData{_src->info()->data_type(), _dst->info()->data_type(), CPUInfo::get().get_isa()});
Yair Schwarzbaum298b2c02022-02-01 08:55:56 +0200297
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100298 switch (_src->info()->data_type())
Georgios Pinitas11d84152021-04-28 10:20:18 +0100299 {
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100300#ifdef __aarch64__
301 case DataType::U64:
302 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100303 switch (_dst->info()->data_type())
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100304 {
305 case DataType::F32:
306 {
307 convert64<uint64_t, float>(src, dst, win, window_start_x, window_end_x, window_step_x);
308 break;
309 }
310 default:
311 ARM_COMPUTE_ERROR("dst data type not supported");
312 }
313 break;
314 }
315 case DataType::S64:
316 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100317 switch (_dst->info()->data_type())
Pablo Marquez Tello4a1c9172023-07-18 14:51:24 +0100318 {
319 case DataType::F32:
320 {
321 convert64<int64_t, float>(src, dst, win, window_start_x, window_end_x, window_step_x);
322 break;
323 }
324 default:
325 ARM_COMPUTE_ERROR("dst data type not supported");
326 }
327 break;
328 }
329#endif // __aarch64__
330
Georgios Pinitas11d84152021-04-28 10:20:18 +0100331 case DataType::QASYMM8_SIGNED:
332 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100333 switch (_dst->info()->data_type())
Georgios Pinitas11d84152021-04-28 10:20:18 +0100334 {
335 case DataType::S16:
336 {
337 /* Up-conversion QASYMM8_SIGNED -> S16 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100338 execute_window_loop(
339 win,
340 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100341 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100342 const auto src_ptr = reinterpret_cast<const int8_t *>(src.ptr());
343 const auto dst_ptr = reinterpret_cast<int16_t *>(dst.ptr());
344 int x = window_start_x;
Georgios Pinitas11d84152021-04-28 10:20:18 +0100345
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100346 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100347 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100348 const int8x16_t texels_s8 = vld1q_s8(src_ptr + x);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100349
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100350 const int16x8x2_t texels = {
351 {vmovl_s8(vget_low_s8(texels_s8)), vmovl_s8(vget_high_s8(texels_s8))}};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100352
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100353 vst1q_s16(dst_ptr + x, texels.val[0]);
354 vst1q_s16(dst_ptr + x + 8, texels.val[1]);
355 }
356
357 // Compute left-over elements
358 for (; x < window_end_x; ++x)
359 {
360 *(dst_ptr + x) = static_cast<int16_t>(*(src_ptr + x));
361 }
362 },
363 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100364 break;
365 }
366 case DataType::S32:
367 {
368 /* Up-conversion QASYMM8_SIGNED -> S32 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100369 execute_window_loop(
370 win,
371 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100372 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100373 const auto src_ptr = reinterpret_cast<const int8_t *>(src.ptr());
374 const auto dst_ptr = reinterpret_cast<int32_t *>(dst.ptr());
375 int x = window_start_x;
Georgios Pinitas11d84152021-04-28 10:20:18 +0100376
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100377 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100378 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100379 const int8x16_t texels_s8 = vld1q_s8(src_ptr + x);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100380
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100381 const int16x8x2_t texels = {
382 {vmovl_s8(vget_low_s8(texels_s8)), vmovl_s8(vget_high_s8(texels_s8))}};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100383
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100384 vst1q_s32(dst_ptr + x, vmovl_s16(vget_low_s16(texels.val[0])));
385 vst1q_s32(dst_ptr + x + 4, vmovl_s16(vget_high_s16(texels.val[0])));
386 vst1q_s32(dst_ptr + x + 8, vmovl_s16(vget_low_s16(texels.val[1])));
387 vst1q_s32(dst_ptr + x + 12, vmovl_s16(vget_high_s16(texels.val[1])));
388 }
389
390 // Compute left-over elements
391 for (; x < window_end_x; ++x)
392 {
393 *(dst_ptr + x) = static_cast<int32_t>(*(src_ptr + x));
394 }
395 },
396 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100397 break;
398 }
399 case DataType::F32:
400 {
401 /* Up-conversion QASYMM8_SIGNED -> F32 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100402 execute_window_loop(
403 win,
404 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100405 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100406 const auto src_ptr = reinterpret_cast<const int8_t *>(src.ptr());
407 const auto dst_ptr = reinterpret_cast<float *>(dst.ptr());
Georgios Pinitas11d84152021-04-28 10:20:18 +0100408
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100409 int x = window_start_x;
410 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100411 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100412 const int8x16_t texels_s8 = vld1q_s8(src_ptr + x);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100413
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100414 const int16x8x2_t texels = {
415 {vmovl_s8(vget_low_s8(texels_s8)), vmovl_s8(vget_high_s8(texels_s8))}};
416 vst1q_f32(dst_ptr + x, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0]))));
417 vst1q_f32(dst_ptr + x + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0]))));
418 vst1q_f32(dst_ptr + x + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1]))));
419 vst1q_f32(dst_ptr + x + 12, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1]))));
420 }
421
422 // Compute left-over elements
423 for (; x < window_end_x; ++x)
424 {
425 *(dst_ptr + x) = static_cast<float>(*(src_ptr + x));
426 }
427 },
428 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100429 break;
430 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100431 case DataType::F16:
432 {
433 /* Up-conversion QASYMM8_SIGNED -> F16 */
Yair Schwarzbaum298b2c02022-02-01 08:55:56 +0200434 ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
435 uk->ukernel(_src, _dst, info, _policy, window);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100436 break;
437 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100438 default:
439 ARM_COMPUTE_ERROR("dst data type not supported");
440 }
441 break;
442 }
443
444 case DataType::QASYMM8:
445 case DataType::U8:
446 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100447 switch (_dst->info()->data_type())
Georgios Pinitas11d84152021-04-28 10:20:18 +0100448 {
449 case DataType::S16:
450 {
451 /* Up-conversion U8 -> S16 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100452 execute_window_loop(
453 win,
454 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100455 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100456 const auto src_ptr = reinterpret_cast<const uint8_t *>(src.ptr());
457 const auto dst_ptr = reinterpret_cast<int16_t *>(dst.ptr());
Georgios Pinitas11d84152021-04-28 10:20:18 +0100458
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100459 int x = window_start_x;
460 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100461 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100462 const uint8x16_t texels_u8 = vld1q_u8(src_ptr + x);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100463
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100464 const int16x8x2_t texels = {{vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))),
465 vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8)))}};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100466
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100467 vst1q_s16(dst_ptr + x, texels.val[0]);
468 vst1q_s16(dst_ptr + x + 8, texels.val[1]);
469 }
470
471 // Compute left-over elements
472 for (; x < window_end_x; ++x)
473 {
474 *(dst_ptr + x) = static_cast<int32_t>(*(src_ptr + x));
475 }
476 },
477 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100478 break;
479 }
480 case DataType::S32:
481 {
482 /* Up-conversion U8 -> S32 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100483 execute_window_loop(
484 win,
485 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100486 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100487 const auto src_ptr = reinterpret_cast<const uint8_t *>(src.ptr());
488 const auto dst_ptr = reinterpret_cast<int32_t *>(dst.ptr());
Georgios Pinitas11d84152021-04-28 10:20:18 +0100489
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100490 int x = window_start_x;
491 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100492 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100493 const uint8x16_t texels_u8 = vld1q_u8(src_ptr + x);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100494
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100495 const int16x8x2_t texels = {{vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))),
496 vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8)))}};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100497
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100498 vst1q_s32(dst_ptr + x, vmovl_s16(vget_low_s16(texels.val[0])));
499 vst1q_s32(dst_ptr + x + 4, vmovl_s16(vget_high_s16(texels.val[0])));
500 vst1q_s32(dst_ptr + x + 8, vmovl_s16(vget_low_s16(texels.val[1])));
501 vst1q_s32(dst_ptr + x + 12, vmovl_s16(vget_high_s16(texels.val[1])));
502 }
503
504 // Compute left-over elements
505 for (; x < window_end_x; ++x)
506 {
507 *(dst_ptr + x) = static_cast<uint32_t>(*(src_ptr + x));
508 }
509 },
510 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100511 break;
512 }
513 case DataType::F32:
514 {
515 /* Up-conversion U8 -> F32 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100516 execute_window_loop(
517 win,
518 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100519 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100520 const auto src_ptr = reinterpret_cast<const uint8_t *>(src.ptr());
521 const auto dst_ptr = reinterpret_cast<float *>(dst.ptr());
Georgios Pinitas11d84152021-04-28 10:20:18 +0100522
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100523 int x = window_start_x;
524 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100525 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100526 const uint8x16_t texels_u8 = vld1q_u8(src_ptr + x);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100527
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100528 const int16x8x2_t texels = {{vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))),
529 vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8)))}};
530 vst1q_f32(dst_ptr + x, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0]))));
531 vst1q_f32(dst_ptr + x + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0]))));
532 vst1q_f32(dst_ptr + x + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1]))));
533 vst1q_f32(dst_ptr + x + 12, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1]))));
534 }
535
536 // Compute left-over elements
537 for (; x < window_end_x; ++x)
538 {
539 *(dst_ptr + x) = static_cast<uint32_t>(*(src_ptr + x));
540 }
541 },
542 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100543 break;
544 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100545 case DataType::F16:
546 {
Yair Schwarzbaum298b2c02022-02-01 08:55:56 +0200547 /* Up-conversion U8 -> FP16 */
548 ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
549 uk->ukernel(_src, _dst, info, _policy, window);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100550 break;
551 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100552 case DataType::U16:
553 {
554 /* Up-conversion U8 -> U16 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100555 execute_window_loop(
556 win,
557 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100558 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100559 const auto src_ptr = reinterpret_cast<const uint8_t *>(src.ptr());
560 const auto dst_ptr = reinterpret_cast<uint16_t *>(dst.ptr());
Georgios Pinitas11d84152021-04-28 10:20:18 +0100561
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100562 int x = window_start_x;
563 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100564 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100565 const uint8x16_t texels_u8 = vld1q_u8(src_ptr + x);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100566
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100567 const uint16x8x2_t texels = {
568 {vmovl_u8(vget_low_u8(texels_u8)), vmovl_u8(vget_high_u8(texels_u8))}};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100569
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100570 vst1q_u16(dst_ptr + x, texels.val[0]);
571 vst1q_u16(dst_ptr + x + 8, texels.val[1]);
572 }
573
574 // Compute left-over elements
575 for (; x < window_end_x; ++x)
576 {
577 *(dst_ptr + x) = static_cast<uint16_t>(*(src_ptr + x));
578 }
579 },
580 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100581 break;
582 }
583 default:
584 ARM_COMPUTE_ERROR("dst data type not supported");
585 }
586 break;
587 }
588 case DataType::S16:
589 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100590 switch (_dst->info()->data_type())
Georgios Pinitas11d84152021-04-28 10:20:18 +0100591 {
592 case DataType::QASYMM8_SIGNED:
593 {
594 /* Down-conversion S16 -> QASYMM8_SIGNED */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100595 if (ConvertPolicy::SATURATE == _policy)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100596 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100597 execute_window_loop(
598 win,
599 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100600 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100601 const auto src_ptr = reinterpret_cast<const int16_t *>(src.ptr());
602 const auto dst_ptr = reinterpret_cast<int8_t *>(dst.ptr());
603
604 int x = window_start_x;
605 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100606 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100607 const int16x8x2_t texels = {{vld1q_s16(src_ptr + x), vld1q_s16(src_ptr + x + 8)}};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100608
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100609 vst1q_s8(dst_ptr + x,
610 vcombine_s8(vqmovn_s16(texels.val[0]), vqmovn_s16(texels.val[1])));
611 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100612
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100613 // Compute left-over elements
614 for (; x < window_end_x; ++x)
615 {
616 *(dst_ptr + x) = utils::cast::saturate_cast<int8_t>(*(src_ptr + x));
617 }
618 },
619 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100620 }
621 else
622 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100623 execute_window_loop(
624 win,
625 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100626 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100627 const auto src_ptr = reinterpret_cast<const int16_t *>(src.ptr());
628 const auto dst_ptr = reinterpret_cast<int8_t *>(dst.ptr());
629
630 int x = window_start_x;
631 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100632 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100633 const int16x8x2_t texels = {{vld1q_s16(src_ptr + x), vld1q_s16(src_ptr + x + 8)}};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100634
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100635 vst1q_s8(dst_ptr + x,
636 vcombine_s8(vmovn_s16(texels.val[0]), vmovn_s16(texels.val[1])));
637 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100638
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100639 // Compute left-over elements
640 for (; x < window_end_x; ++x)
641 {
642 *(dst_ptr + x) = static_cast<int8_t>(*(src_ptr + x));
643 }
644 },
645 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100646 }
647 break;
648 }
649 case DataType::U8:
650 {
651 /* Down-conversion S16 -> U8 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100652 if (ConvertPolicy::SATURATE == _policy)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100653 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100654 execute_window_loop(
655 win,
656 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100657 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100658 const auto src_ptr = reinterpret_cast<const int16_t *>(src.ptr());
659 const auto dst_ptr = reinterpret_cast<uint8_t *>(dst.ptr());
660
661 int x = window_start_x;
662 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100663 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100664 const int16x8x2_t texels = {{vld1q_s16(src_ptr + x), vld1q_s16(src_ptr + x + 8)}};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100665
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100666 vst1q_u8(dst_ptr + x,
667 vcombine_u8(vqmovun_s16(texels.val[0]), vqmovun_s16(texels.val[1])));
668 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100669
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100670 // Compute left-over elements
671 for (; x < window_end_x; ++x)
672 {
673 *(dst_ptr + x) = utils::cast::saturate_cast<uint8_t>(*(src_ptr + x));
674 }
675 },
676 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100677 }
678 else
679 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100680 execute_window_loop(
681 win,
682 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100683 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100684 const auto src_ptr = reinterpret_cast<const int16_t *>(src.ptr());
685 const auto dst_ptr = reinterpret_cast<uint8_t *>(dst.ptr());
686
687 int x = window_start_x;
688 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100689 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100690 const int16x8x2_t texels = {{vld1q_s16(src_ptr + x), vld1q_s16(src_ptr + x + 8)}};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100691
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100692 vst1q_u8(dst_ptr + x, vcombine_u8(vmovn_u16(vreinterpretq_u16_s16(texels.val[0])),
693 vmovn_u16(vreinterpretq_u16_s16(texels.val[1]))));
694 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100695
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100696 // Compute left-over elements
697 for (; x < window_end_x; ++x)
698 {
699 *(dst_ptr + x) = static_cast<uint8_t>(*(src_ptr + x));
700 }
701 },
702 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100703 }
704 break;
705 }
706 case DataType::S32:
707 {
708 /* Up-conversion S16 -> S32 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100709 execute_window_loop(
710 win,
711 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100712 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100713 const auto src_ptr = reinterpret_cast<const int16_t *>(src.ptr());
714 const auto dst_ptr = reinterpret_cast<int32_t *>(dst.ptr());
715
716 int x = window_start_x;
717 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100718 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100719 const int16x8x2_t texels = {{vld1q_s16(src_ptr + x), vld1q_s16(src_ptr + x + 8)}};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100720
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100721 const int32x4x4_t texels_s32 = {
722 {vmovl_s16(vget_low_s16(texels.val[0])), vmovl_s16(vget_high_s16(texels.val[0])),
723 vmovl_s16(vget_low_s16(texels.val[1])), vmovl_s16(vget_high_s16(texels.val[1]))}};
724
725 vst1q_s32(dst_ptr + x, texels_s32.val[0]);
726 vst1q_s32(dst_ptr + x + 4, texels_s32.val[1]);
727 vst1q_s32(dst_ptr + x + 8, texels_s32.val[2]);
728 vst1q_s32(dst_ptr + x + 12, texels_s32.val[3]);
729 }
730
731 // Compute left-over elements
732 for (; x < window_end_x; ++x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100733 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100734 *(dst_ptr + x) = static_cast<int32_t>(*(src_ptr + x));
735 }
736 },
737 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100738 break;
739 }
740 default:
741 ARM_COMPUTE_ERROR("dst data type not supported");
742 }
743 break;
744 }
Yair Schwarzbaum298b2c02022-02-01 08:55:56 +0200745
Georgios Pinitas11d84152021-04-28 10:20:18 +0100746 case DataType::U16:
747 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100748 switch (_dst->info()->data_type())
Georgios Pinitas11d84152021-04-28 10:20:18 +0100749 {
750 case DataType::U8:
751 {
752 /* Down-conversion U16 -> U8 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100753 if (ConvertPolicy::SATURATE == _policy)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100754 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100755 execute_window_loop(
756 win,
757 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100758 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100759 const auto src_ptr = reinterpret_cast<const uint16_t *>(src.ptr());
760 const auto dst_ptr = reinterpret_cast<uint8_t *>(dst.ptr());
761
762 int x = window_start_x;
763 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100764 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100765 const uint16x8x2_t texels = {{vld1q_u16(src_ptr + x), vld1q_u16(src_ptr + x + 8)}};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100766
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100767 vst1q_u8(dst_ptr + x,
768 vcombine_u8(vqmovn_u16(texels.val[0]), vqmovn_u16(texels.val[1])));
769 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100770
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100771 // Compute left-over elements
772 for (; x < window_end_x; ++x)
773 {
774 *(dst_ptr + x) = utils::cast::saturate_cast<uint8_t>(*(src_ptr + x));
775 }
776 },
777 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100778 }
779 else
780 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100781 execute_window_loop(
782 win,
783 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100784 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100785 const auto src_ptr = reinterpret_cast<const uint16_t *>(src.ptr());
786 const auto dst_ptr = reinterpret_cast<uint8_t *>(dst.ptr());
787
788 int x = window_start_x;
789 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100790 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100791 const uint16x8x2_t texels = {{vld1q_u16(src_ptr + x), vld1q_u16(src_ptr + x + 8)}};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100792
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100793 vst1q_u8(dst_ptr + x,
794 vcombine_u8(vmovn_u16(texels.val[0]), vmovn_u16(texels.val[1])));
795 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100796
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100797 // Compute left-over elements
798 for (; x < window_end_x; ++x)
799 {
800 *(dst_ptr + x) = static_cast<uint8_t>(*(src_ptr + x));
801 }
802 },
803 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100804 }
805 break;
806 }
807 case DataType::U32:
808 {
809 /* Up-conversion U16 -> U32 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100810 execute_window_loop(
811 win,
812 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100813 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100814 const auto src_ptr = reinterpret_cast<const uint16_t *>(src.ptr());
815 const auto dst_ptr = reinterpret_cast<uint32_t *>(dst.ptr());
816
817 int x = window_start_x;
818 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100819 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100820 const uint16x8x2_t texels = {{vld1q_u16(src_ptr + x), vld1q_u16(src_ptr + x + 8)}};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100821
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100822 vst1q_u32(dst_ptr + x, vmovl_u16(vget_low_u16(texels.val[0])));
823 vst1q_u32(dst_ptr + x + 4, vmovl_u16(vget_high_u16(texels.val[0])));
824 vst1q_u32(dst_ptr + x + 8, vmovl_u16(vget_low_u16(texels.val[1])));
825 vst1q_u32(dst_ptr + x + 12, vmovl_u16(vget_high_u16(texels.val[1])));
826 }
827 // Compute left-over elements
828 for (; x < window_end_x; ++x)
829 {
830 *(dst_ptr + x) = static_cast<uint32_t>(*(src_ptr + x));
831 }
832 },
833 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100834 break;
835 }
836 default:
837 ARM_COMPUTE_ERROR("dst data type not supported");
838 }
839 break;
840 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100841 case DataType::F16:
Yair Schwarzbaum298b2c02022-02-01 08:55:56 +0200842 {
843 /* conversion F16 -> any data type */
844 ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
845 uk->ukernel(_src, _dst, info, _policy, window);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100846 break;
Yair Schwarzbaum298b2c02022-02-01 08:55:56 +0200847 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100848 case DataType::F32:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100849 switch (_dst->info()->data_type())
Georgios Pinitas11d84152021-04-28 10:20:18 +0100850 {
Georgios Pinitas11d84152021-04-28 10:20:18 +0100851 case DataType::F16:
852 {
853 /* Down-conversion F32 -> F16 */
Yair Schwarzbaum298b2c02022-02-01 08:55:56 +0200854 ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
855 uk->ukernel(_src, _dst, info, _policy, window);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100856 break;
857 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100858 case DataType::S32:
859 {
860 /* Conversion F32 -> S32 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100861 execute_window_loop(
862 win,
863 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100864 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100865 const auto src_ptr = reinterpret_cast<const float *>(src.ptr());
866 const auto dst_ptr = reinterpret_cast<int32_t *>(dst.ptr());
867
868 int x = window_start_x;
869 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100870 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100871 const float32x4x4_t texels = {{
Georgios Pinitas11d84152021-04-28 10:20:18 +0100872 vld1q_f32(src_ptr + x),
873 vld1q_f32(src_ptr + x + 4),
874 vld1q_f32(src_ptr + x + 8),
875 vld1q_f32(src_ptr + x + 12),
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100876 }};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100877
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100878 vst1q_s32(dst_ptr + x, vcvtq_s32_f32(texels.val[0]));
879 vst1q_s32(dst_ptr + x + 4, vcvtq_s32_f32(texels.val[1]));
880 vst1q_s32(dst_ptr + x + 8, vcvtq_s32_f32(texels.val[2]));
881 vst1q_s32(dst_ptr + x + 12, vcvtq_s32_f32(texels.val[3]));
882 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100883
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100884 // Compute left-over elements
885 for (; x < window_end_x; ++x)
886 {
887 *(dst_ptr + x) = static_cast<int32_t>(*(src_ptr + x));
888 }
889 },
890 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100891 break;
892 }
893 case DataType::QASYMM8:
894 case DataType::U8:
895 {
896 /* Down-conversion F32 -> U8 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100897 execute_window_loop(
898 win,
899 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100900 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100901 const auto src_ptr = reinterpret_cast<const float *>(src.ptr());
902 const auto dst_ptr = reinterpret_cast<uint8_t *>(dst.ptr());
903
904 int x = window_start_x;
905 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100906 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100907 const float32x4x4_t texels = {{
Georgios Pinitas11d84152021-04-28 10:20:18 +0100908 vld1q_f32(src_ptr + x),
909 vld1q_f32(src_ptr + x + 4),
910 vld1q_f32(src_ptr + x + 8),
911 vld1q_f32(src_ptr + x + 12),
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100912 }};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100913
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100914 vst1_u8(dst_ptr + x,
915 vqmovn_u16(vcombine_u16(vqmovun_s32(vcvtq_s32_f32(texels.val[0])),
916 vqmovun_s32(vcvtq_s32_f32(texels.val[1])))));
917 vst1_u8(dst_ptr + x + 8,
918 vqmovn_u16(vcombine_u16(vqmovun_s32(vcvtq_s32_f32(texels.val[2])),
919 vqmovun_s32(vcvtq_s32_f32(texels.val[3])))));
920 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100921
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100922 // Compute left-over elements
923 for (; x < window_end_x; ++x)
924 {
925 *(dst_ptr + x) = utils::cast::saturate_cast<uint8_t>(*(src_ptr + x));
926 }
927 },
928 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100929 break;
930 }
931 case DataType::QASYMM8_SIGNED:
932 {
933 /* Down-conversion F32 -> QASYMM8_SIGNED */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100934 execute_window_loop(
935 win,
936 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100937 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100938 const auto src_ptr = reinterpret_cast<const float *>(src.ptr());
939 const auto dst_ptr = reinterpret_cast<int8_t *>(dst.ptr());
940
941 int x = window_start_x;
942 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100943 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100944 const float32x4x4_t texels = {{
Georgios Pinitas11d84152021-04-28 10:20:18 +0100945 vld1q_f32(src_ptr + x),
946 vld1q_f32(src_ptr + x + 4),
947 vld1q_f32(src_ptr + x + 8),
948 vld1q_f32(src_ptr + x + 12),
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100949 }};
Georgios Pinitas11d84152021-04-28 10:20:18 +0100950
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100951 vst1_s8(dst_ptr + x,
952 vqmovn_s16(vcombine_s16(vqmovn_s32(vcvtq_s32_f32(texels.val[0])),
953 vqmovn_s32(vcvtq_s32_f32(texels.val[1])))));
954 vst1_s8(dst_ptr + x + 8,
955 vqmovn_s16(vcombine_s16(vqmovn_s32(vcvtq_s32_f32(texels.val[2])),
956 vqmovn_s32(vcvtq_s32_f32(texels.val[3])))));
957 }
958 // Compute left-over elements
959 for (; x < window_end_x; ++x)
960 {
961 *(dst_ptr + x) = utils::cast::saturate_cast<int8_t>(*(src_ptr + x));
962 }
963 },
964 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100965 break;
966 }
967
968 default:
969 ARM_COMPUTE_ERROR("dst data type not supported");
970 }
971 break;
Georgios Pinitas11d84152021-04-28 10:20:18 +0100972 case DataType::S32:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100973 switch (_dst->info()->data_type())
Georgios Pinitas11d84152021-04-28 10:20:18 +0100974 {
Pablo Marquez Tello29e27b02023-08-03 14:47:31 +0100975#if __aarch64__
976 case DataType::S64:
977 {
978 convert64<int32_t, int64_t>(src, dst, win, window_start_x, window_end_x, window_step_x);
979 break;
980 }
981#endif // __aarch64__
Georgios Pinitas11d84152021-04-28 10:20:18 +0100982 case DataType::F16:
983 {
984 /* Down-conversion S32 -> F16 */
Yair Schwarzbaum298b2c02022-02-01 08:55:56 +0200985 ARM_COMPUTE_ERROR_ON(uk->ukernel == nullptr);
986 uk->ukernel(_src, _dst, info, _policy, window);
Georgios Pinitas11d84152021-04-28 10:20:18 +0100987 break;
988 }
Georgios Pinitas11d84152021-04-28 10:20:18 +0100989 case DataType::F32:
990 {
991 /* Conversion S32 -> F32 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100992 execute_window_loop(
993 win,
994 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +0100995 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100996 const auto src_ptr = reinterpret_cast<const int32_t *>(src.ptr());
997 const auto dst_ptr = reinterpret_cast<float *>(dst.ptr());
998
999 int x = window_start_x;
1000 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +01001001 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001002 const int32x4x4_t texels = {{
Georgios Pinitas11d84152021-04-28 10:20:18 +01001003 vld1q_s32(src_ptr + x),
1004 vld1q_s32(src_ptr + x + 4),
1005 vld1q_s32(src_ptr + x + 8),
1006 vld1q_s32(src_ptr + x + 12),
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001007 }};
Georgios Pinitas11d84152021-04-28 10:20:18 +01001008
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001009 vst1q_f32(dst_ptr + x, vcvtq_f32_s32(texels.val[0]));
1010 vst1q_f32(dst_ptr + x + 4, vcvtq_f32_s32(texels.val[1]));
1011 vst1q_f32(dst_ptr + x + 8, vcvtq_f32_s32(texels.val[2]));
1012 vst1q_f32(dst_ptr + x + 12, vcvtq_f32_s32(texels.val[3]));
1013 }
Georgios Pinitas11d84152021-04-28 10:20:18 +01001014
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001015 // Compute left-over elements
1016 for (; x < window_end_x; ++x)
1017 {
1018 *(dst_ptr + x) = static_cast<float>(*(src_ptr + x));
1019 }
1020 },
1021 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +01001022 break;
1023 }
1024 case DataType::QASYMM8_SIGNED:
1025 {
1026 /* Down-conversion S32 -> QASYMM8_SIGNED */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001027 if (ConvertPolicy::SATURATE == _policy)
Georgios Pinitas11d84152021-04-28 10:20:18 +01001028 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001029 execute_window_loop(
1030 win,
1031 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +01001032 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001033 const auto src_ptr = reinterpret_cast<const int32_t *>(src.ptr());
1034 const auto dst_ptr = reinterpret_cast<int8_t *>(dst.ptr());
1035
1036 int x = window_start_x;
1037 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +01001038 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001039 const int32x4x4_t texels = {{
Georgios Pinitas11d84152021-04-28 10:20:18 +01001040 vld1q_s32(src_ptr + x),
1041 vld1q_s32(src_ptr + x + 4),
1042 vld1q_s32(src_ptr + x + 8),
1043 vld1q_s32(src_ptr + x + 12),
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001044 }};
1045 vst1_s8(dst_ptr + x, vqmovn_s16(vcombine_s16(vqmovn_s32(texels.val[0]),
1046 vqmovn_s32(texels.val[1]))));
1047 vst1_s8(dst_ptr + x + 8, vqmovn_s16(vcombine_s16(vqmovn_s32(texels.val[2]),
1048 vqmovn_s32(texels.val[3]))));
1049 }
Georgios Pinitas11d84152021-04-28 10:20:18 +01001050
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001051 // Compute left-over elements
1052 for (; x < window_end_x; ++x)
1053 {
1054 *(dst_ptr + x) = utils::cast::saturate_cast<int8_t>(*(src_ptr + x));
1055 }
1056 },
1057 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +01001058 }
1059 else
1060 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001061 execute_window_loop(
1062 win,
1063 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +01001064 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001065 const auto src_ptr = reinterpret_cast<const int32_t *>(src.ptr());
1066 const auto dst_ptr = reinterpret_cast<int8_t *>(dst.ptr());
1067
1068 int x = window_start_x;
1069 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +01001070 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001071 const int32x4x4_t texels = {{vld1q_s32(src_ptr + x), vld1q_s32(src_ptr + x + 4),
1072 vld1q_s32(src_ptr + x + 8),
1073 vld1q_s32(src_ptr + x + 12)}};
Georgios Pinitas11d84152021-04-28 10:20:18 +01001074
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001075 vst1_s8(dst_ptr + x, vmovn_s16(vcombine_s16(vmovn_s32(texels.val[0]),
1076 vmovn_s32(texels.val[1]))));
1077 vst1_s8(dst_ptr + x + 8, vmovn_s16(vcombine_s16(vmovn_s32(texels.val[2]),
1078 vmovn_s32(texels.val[3]))));
1079 }
Georgios Pinitas11d84152021-04-28 10:20:18 +01001080
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001081 // Compute left-over elements
1082 for (; x < window_end_x; ++x)
1083 {
1084 *(dst_ptr + x) = static_cast<int8_t>(*(src_ptr + x));
1085 }
1086 },
1087 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +01001088 }
1089 break;
1090 }
1091 case DataType::QASYMM8:
1092 case DataType::U8:
1093 {
1094 /* Down-conversion S32 -> U8 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001095 if (ConvertPolicy::SATURATE == _policy)
Georgios Pinitas11d84152021-04-28 10:20:18 +01001096 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001097 execute_window_loop(
1098 win,
1099 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +01001100 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001101 const auto src_ptr = reinterpret_cast<const int32_t *>(src.ptr());
1102 const auto dst_ptr = reinterpret_cast<uint8_t *>(dst.ptr());
1103
1104 int x = window_start_x;
1105 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +01001106 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001107 const int32x4x4_t texels = {{vld1q_s32(src_ptr + x), vld1q_s32(src_ptr + x + 4),
1108 vld1q_s32(src_ptr + x + 8),
1109 vld1q_s32(src_ptr + x + 12)}};
1110 vst1_u8(dst_ptr + x, vqmovn_u16(vcombine_u16(vqmovun_s32(texels.val[0]),
1111 vqmovun_s32(texels.val[1]))));
1112 vst1_u8(dst_ptr + x + 8, vqmovn_u16(vcombine_u16(vqmovun_s32(texels.val[2]),
1113 vqmovun_s32(texels.val[3]))));
1114 }
Georgios Pinitas11d84152021-04-28 10:20:18 +01001115
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001116 // Compute left-over elements
1117 for (; x < window_end_x; ++x)
1118 {
1119 *(dst_ptr + x) = utils::cast::saturate_cast<uint8_t>(*(src_ptr + x));
1120 }
1121 },
1122 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +01001123 }
1124 else
1125 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001126 execute_window_loop(
1127 win,
1128 [&](const Coordinates &)
Georgios Pinitas11d84152021-04-28 10:20:18 +01001129 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001130 const auto src_ptr = reinterpret_cast<const int32_t *>(src.ptr());
1131 const auto dst_ptr = reinterpret_cast<uint8_t *>(dst.ptr());
1132
1133 int x = window_start_x;
1134 for (; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitas11d84152021-04-28 10:20:18 +01001135 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001136 const int32x4x4_t texels = {{vld1q_s32(src_ptr + x), vld1q_s32(src_ptr + x + 4),
1137 vld1q_s32(src_ptr + x + 8),
1138 vld1q_s32(src_ptr + x + 12)}};
Georgios Pinitas11d84152021-04-28 10:20:18 +01001139
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001140 vst1_u8(dst_ptr + x,
1141 vmovn_u16(vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(texels.val[0])),
1142 vmovn_u32(vreinterpretq_u32_s32(texels.val[1])))));
1143 vst1_u8(dst_ptr + x + 8,
1144 vmovn_u16(vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(texels.val[2])),
1145 vmovn_u32(vreinterpretq_u32_s32(texels.val[3])))));
1146 }
Georgios Pinitas11d84152021-04-28 10:20:18 +01001147
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001148 // Compute left-over elements
1149 for (; x < window_end_x; ++x)
1150 {
1151 *(dst_ptr + x) = static_cast<uint8_t>(*(src_ptr + x));
1152 }
1153 },
1154 src, dst);
Georgios Pinitas11d84152021-04-28 10:20:18 +01001155 }
1156 break;
1157 }
1158 default:
1159 ARM_COMPUTE_ERROR("dst data type not supported");
1160 }
1161 break;
1162 default:
1163 ARM_COMPUTE_ERROR("Not supported");
1164 }
1165}
1166
1167const char *CpuCastKernel::name() const
1168{
1169 return "CpuCastKernel.cpp";
1170}
Yair Schwarzbaum298b2c02022-02-01 08:55:56 +02001171
1172const std::vector<CpuCastKernel::CastKernel> &CpuCastKernel::get_available_kernels()
1173{
1174 return available_kernels;
1175}
1176
Georgios Pinitas11d84152021-04-28 10:20:18 +01001177} // namespace kernels
1178} // namespace cpu
1179} // namespace arm_compute