blob: d97a20be65e7fa1d517516e969bea477043bb6ed [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/NEON/NEFixedPoint.h"
30#include "arm_compute/core/TensorInfo.h"
31#include "arm_compute/core/Validate.h"
32
33#include <arm_neon.h>
34
35using namespace arm_compute;
36
37namespace arm_compute
38{
39class Coordinates;
40} // namespace arm_compute
41
42NEDepthConvertKernel::NEDepthConvertKernel()
Georgios Pinitase2229412017-07-12 12:30:40 +010043 : _input(nullptr), _output(nullptr), _policy(), _shift(0), _fixed_point_position_input(0), _fixed_point_position_output(0)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010044{
45}
46
Georgios Pinitase2229412017-07-12 12:30:40 +010047void NEDepthConvertKernel::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010048{
Georgios Pinitas21efeb42017-07-04 12:47:17 +010049 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::F32);
Georgios Pinitase2229412017-07-12 12:30:40 +010050
51 _input = input;
52 _output = input;
53 _policy = policy;
54 _shift = shift;
55
56 if(output != nullptr)
57 {
58 // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
59 set_shape_if_empty(*output->info(), input->info()->tensor_shape());
60
61 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::U32, DataType::S32, DataType::F32);
62 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
63
64 // Set output
65 _output = output;
66 }
67
68 // Set initial fixed point position of input and output
69 _fixed_point_position_input = input->info()->fixed_point_position();
70 _fixed_point_position_output = _output->info()->fixed_point_position();
71
72 // Set the fixed point position to the output tensor if needed
73 if(is_data_type_fixed_point(input->info()->data_type()) && is_data_type_fixed_point(_output->info()->data_type()))
74 {
75 // If in-place set the fixed point position of the output tensor to be equal to shift
76 _fixed_point_position_output = (_input == _output) ? static_cast<int>(_shift) : _fixed_point_position_output;
77 // Set fixed point position to output tensor
78 _output->info()->set_fixed_point_position(_fixed_point_position_output);
79 }
80
81 ARM_COMPUTE_ERROR_ON(shift >= 8 && (!is_data_type_fixed_point(input->info()->data_type()) && !is_data_type_fixed_point(output->info()->data_type())));
82 ARM_COMPUTE_ERROR_ON(input == output && (data_size_from_type(input->info()->data_type()) != data_size_from_type(output->info()->data_type())));
Anthony Barbier6ff3b192017-09-04 18:44:23 +010083
Anthony Barbier6ff3b192017-09-04 18:44:23 +010084 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U16
85 && output->info()->data_type() != DataType::S32),
86 "Only data_types supported [in] U8 -> [out] U16, S16, S32");
87
Georgios Pinitase2229412017-07-12 12:30:40 +010088 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::F32),
89 "Only data_types supported [in] QS8 -> [out] QS8, F32");
Georgios Pinitas21efeb42017-07-04 12:47:17 +010090
Anthony Barbier6ff3b192017-09-04 18:44:23 +010091 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32),
92 "Only data_types supported [in] U16 -> [out] U8, U32");
93
94 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::S32),
95 "Only data_types supported [in] S16 -> [out] U8, S32");
96
Georgios Pinitase2229412017-07-12 12:30:40 +010097 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::QS16 && output->info()->data_type() != DataType::F32),
98 "Only data_types supported [in] QS16 -> [out] QS16, F32");
Georgios Pinitas21efeb42017-07-04 12:47:17 +010099
100 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::QS16),
101 "Only data_types supported [in] F32 -> [out] QS8, QS16");
102
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100103 constexpr unsigned int num_elems_processed_per_iteration = 16;
Georgios Pinitase2229412017-07-12 12:30:40 +0100104
105 // Configure kernel window
106 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
107
108 AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
109 if(output != nullptr)
110 {
111 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
112 update_window_and_padding(win, input_access, output_access);
113 output_access.set_valid_region(win, input->info()->valid_region());
114 }
115 else
116 {
117 // In-place computation
118 update_window_and_padding(win, input_access);
119 }
120 ICPPKernel::configure(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100121}
122
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100123void NEDepthConvertKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100124{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100125 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100126 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Georgios Pinitase2229412017-07-12 12:30:40 +0100127 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100128 ARM_COMPUTE_ERROR_ON(nullptr == _input);
129 ARM_COMPUTE_ERROR_ON(nullptr == _output);
130 ARM_COMPUTE_ERROR_ON(_input == _output);
131
132 Iterator input(_input, window);
133 Iterator output(_output, window);
134
Georgios Pinitase2229412017-07-12 12:30:40 +0100135 bool in_place = (_input == _output);
136
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100137 switch(_input->info()->data_type())
138 {
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100139 case DataType::U8:
140 {
141 const int16x8_t b = vdupq_n_s16(_shift);
142
143 switch(_output->info()->data_type())
144 {
145 case DataType::S16:
146 {
147 /* Up-conversion U8 -> S16 */
148 execute_window_loop(window, [&](const Coordinates & id)
149 {
150 const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
151
152 const int16x8x2_t texels =
153 {
154 {
155 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b),
156 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b)
157 }
158 };
159
160 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), texels.val[0]);
161 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, texels.val[1]);
162 },
163 input, output);
164 break;
165 }
166 case DataType::S32:
167 {
168 /* Up-conversion U8 -> S32 */
169 execute_window_loop(window, [&](const Coordinates & id)
170 {
171 const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
172
173 const int16x8x2_t texels =
174 {
175 {
176 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b),
177 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b)
178 }
179 };
180
181 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), vmovl_s16(vget_low_s16(texels.val[0])));
182 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, vmovl_s16(vget_high_s16(texels.val[0])));
183 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 8, vmovl_s16(vget_low_s16(texels.val[1])));
184 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 12, vmovl_s16(vget_high_s16(texels.val[1])));
185 },
186 input, output);
187 break;
188 }
189 case DataType::U16:
190 {
191 /* Up-conversion U8 -> U16 */
192 execute_window_loop(window, [&](const Coordinates & id)
193 {
194 const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
195
196 const uint16x8x2_t texels =
197 {
198 {
199 vshlq_u16(vmovl_u8(vget_low_u8(texels_u8)), b),
200 vshlq_u16(vmovl_u8(vget_high_u8(texels_u8)), b)
201 }
202 };
203
204 vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()), texels.val[0]);
205 vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()) + 8, texels.val[1]);
206 },
207 input, output);
208 break;
209 }
210 default:
211 ARM_COMPUTE_ERROR("Output data type not supported");
212 }
213 break;
214 }
Georgios Pinitase2229412017-07-12 12:30:40 +0100215 case DataType::QS8:
216 {
217 switch(_output->info()->data_type())
218 {
219 case DataType::QS8:
220 {
221 const int relative_shift = _fixed_point_position_output - _fixed_point_position_input;
222 /* Fixed point position conversion QS8 -> QS8 */
223 if(relative_shift != 0 || !in_place)
224 {
225 const auto relative_shift_vec = vdupq_n_qs8(relative_shift);
226 execute_window_loop(window, [&](const Coordinates & id)
227 {
228 const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr()));
229 vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqrshlq_s8(texels_qs8, relative_shift_vec));
230 },
231 input, output);
232 }
233 break;
234 }
235 case DataType::F32:
236 {
237 /* Up-conversion QS8 -> F32 */
238 execute_window_loop(window, [&](const Coordinates & id)
239 {
240 const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr()));
241
242 float32x4x2_t texels_low = vcvt_f32_qs8(vget_low_s8(texels_qs8), _fixed_point_position_input);
243 float32x4x2_t texels_high = vcvt_f32_qs8(vget_high_s8(texels_qs8), _fixed_point_position_input);
244
245 vst1q_f32(reinterpret_cast<float *>(output.ptr()), texels_low.val[0]);
246 vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, texels_low.val[1]);
247 vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, texels_high.val[0]);
248 vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, texels_high.val[1]);
249 },
250 input, output);
251 break;
252 }
253 default:
254 ARM_COMPUTE_ERROR("Output data type not supported");
255 }
256 break;
257 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100258 case DataType::S16:
259 {
260 switch(_output->info()->data_type())
261 {
262 case DataType::U8:
263 {
264 const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
265
266 /* Down-conversion S16 -> U8 */
267 if(ConvertPolicy::SATURATE == _policy)
268 {
269 execute_window_loop(window, [&](const Coordinates & id)
270 {
271 const int16x8x2_t texels =
272 {
273 {
274 vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
275 vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
276 }
277 };
278
279 vst1q_u8(output.ptr(), vcombine_u8(vqmovun_s16(texels.val[0]), vqmovun_s16(texels.val[1])));
280 },
281 input, output);
282 }
283 else
284 {
285 execute_window_loop(window, [&](const Coordinates & id)
286 {
287 const int16x8x2_t texels =
288 {
289 {
290 vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
291 vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
292 }
293 };
294
295 vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(vreinterpretq_u16_s16(texels.val[0])),
296 vmovn_u16(vreinterpretq_u16_s16(texels.val[1]))));
297 },
298 input, output);
299 }
300 break;
301 }
302 case DataType::S32:
303 {
304 const int32x4_t b = vdupq_n_s32(_shift);
305
306 /* Up-conversion S16 -> S32 */
307 execute_window_loop(window, [&](const Coordinates & id)
308 {
309 const int16x8x2_t texels =
310 {
311 {
312 vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())),
313 vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8)
314 }
315 };
316
317 const int32x4x4_t texels_s32 =
318 {
319 {
320 vshlq_s32(vmovl_s16(vget_low_s16(texels.val[0])), b),
321 vshlq_s32(vmovl_s16(vget_high_s16(texels.val[0])), b),
322 vshlq_s32(vmovl_s16(vget_low_s16(texels.val[1])), b),
323 vshlq_s32(vmovl_s16(vget_high_s16(texels.val[1])), b)
324 }
325 };
326
327 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), texels_s32.val[0]);
328 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, texels_s32.val[1]);
329 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 8, texels_s32.val[2]);
330 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 12, texels_s32.val[3]);
331 },
332 input, output);
333 break;
334 }
335 default:
336 ARM_COMPUTE_ERROR("Output data type not supported");
337 }
338 break;
339 }
340 case DataType::U16:
341 {
342 switch(_output->info()->data_type())
343 {
344 case DataType::U8:
345 {
346 const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
347
348 /* Down-conversion U16 -> U8 */
349 if(ConvertPolicy::SATURATE == _policy)
350 {
351 execute_window_loop(window, [&](const Coordinates & id)
352 {
353 const uint16x8x2_t texels =
354 {
355 {
356 vqshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())), b),
357 vqshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8), b)
358 }
359 };
360
361 vst1q_u8(output.ptr(), vcombine_u8(vqmovn_u16(texels.val[0]), vqmovn_u16(texels.val[1])));
362 },
363 input, output);
364 }
365 else
366 {
367 execute_window_loop(window, [&](const Coordinates & id)
368 {
369 const uint16x8x2_t texels =
370 {
371 {
372 vshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())), b),
373 vshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8), b)
374 }
375 };
376
377 vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(texels.val[0]), vmovn_u16(texels.val[1])));
378 },
379 input, output);
380 }
381 break;
382 }
383 case DataType::U32:
384 {
385 const int32x4_t b = vdupq_n_s32(_shift);
386
387 /* Up-conversion U16 -> U32 */
388 execute_window_loop(window, [&](const Coordinates & id)
389 {
390 const uint16x8x2_t texels =
391 {
392 {
393 vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())),
394 vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8)
395 }
396 };
397
398 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()), vshlq_u32(vmovl_u16(vget_low_u16(texels.val[0])), b));
399 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 4, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[0])), b));
400 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 8, vshlq_u32(vmovl_u16(vget_low_u16(texels.val[1])), b));
401 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 12, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[1])), b));
402 },
403 input, output);
404 break;
405 }
406 default:
407 ARM_COMPUTE_ERROR("Output data type not supported");
408 }
409 break;
410 }
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100411 case DataType::QS16:
412 {
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100413 switch(_output->info()->data_type())
414 {
Georgios Pinitase2229412017-07-12 12:30:40 +0100415 case DataType::QS16:
416 {
417 const int relative_shift = _fixed_point_position_output - _fixed_point_position_input;
418 /* Fixed point position conversion QS16 -> QS16 */
419 if(relative_shift != 0 || !in_place)
420 {
421 const auto relative_shift_vec = vdupq_n_qs16(relative_shift);
422 execute_window_loop(window, [&](const Coordinates & id)
423 {
424 const qint16x8x2_t texels_qs16 =
425 {
426 {
427 vld1q_qs16(reinterpret_cast<qint16_t *>(input.ptr())),
428 vld1q_qs16(reinterpret_cast<qint16_t *>(input.ptr()) + 8)
429 }
430 };
431 vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqrshlq_s16(texels_qs16.val[0], relative_shift_vec));
432 vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()) + 8, vqrshlq_s16(texels_qs16.val[1], relative_shift_vec));
433 },
434 input, output);
435 }
436 break;
437 }
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100438 case DataType::F32:
439 {
440 /* Up-conversion QS16 -> F32 */
441 execute_window_loop(window, [&](const Coordinates & id)
442 {
Georgios Pinitase2229412017-07-12 12:30:40 +0100443 const int16x8x2_t texels_qs16 =
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100444 {
445 {
446 vld1q_s16(reinterpret_cast<qint16_t *>(input.ptr())),
447 vld1q_s16(reinterpret_cast<qint16_t *>(input.ptr()) + 8)
448 }
449 };
450
Georgios Pinitase2229412017-07-12 12:30:40 +0100451 vst1q_f32(reinterpret_cast<float *>(output.ptr()), vcvt_f32_qs16(vget_low_s16(texels_qs16.val[0]), _fixed_point_position_input));
452 vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[0]), _fixed_point_position_input));
453 vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, vcvt_f32_qs16(vget_low_s16(texels_qs16.val[1]), _fixed_point_position_input));
454 vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[1]), _fixed_point_position_input));
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100455 },
456 input, output);
457 break;
458 }
459 default:
460 ARM_COMPUTE_ERROR("Output data type not supported");
461 }
462 break;
463 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100464 case DataType::F32:
465 {
466 switch(_output->info()->data_type())
467 {
468 case DataType::QS8:
469 {
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100470 /* Down-conversion F32 -> QS8 */
471 execute_window_loop(window, [&](const Coordinates & id)
472 {
473 const float32x4x4_t texels_f32 =
474 {
475 {
476 vld1q_f32(reinterpret_cast<const float *>(input.ptr())),
477 vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 4),
478 vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 8),
479 vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 12)
480 }
481 };
482
Georgios Pinitase2229412017-07-12 12:30:40 +0100483 const qint8x16_t texels_s8 = vqcvtq_qs8_f32(texels_f32, _fixed_point_position_output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100484
485 vst1q_s8(reinterpret_cast<int8_t *>(output.ptr()), texels_s8);
486 },
487 input, output);
488 break;
489 }
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100490 case DataType::QS16:
491 {
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100492 /* Down-conversion F32 -> QS16 */
493 execute_window_loop(window, [&](const Coordinates & id)
494 {
495 const float32x4x2_t texels_f32_1 =
496 {
497 {
498 vld1q_f32(reinterpret_cast<const float *>(input.ptr())),
499 vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 4),
500 }
501 };
502 const float32x4x2_t texels_f32_2 =
503 {
504 {
505 vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 8),
506 vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 12)
507 }
508 };
509
Georgios Pinitase2229412017-07-12 12:30:40 +0100510 vst1q_s16(reinterpret_cast<qint16_t *>(output.ptr()), vqcvtq_qs16_f32(texels_f32_1, _fixed_point_position_output));
511 vst1q_s16(reinterpret_cast<qint16_t *>(output.ptr()) + 8, vqcvtq_qs16_f32(texels_f32_2, _fixed_point_position_output));
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100512 },
513 input, output);
514 break;
515 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100516 default:
517 ARM_COMPUTE_ERROR("Output data type not supported");
518 }
519 break;
520 }
521 default:
522 ARM_COMPUTE_ERROR("Not supported");
523 }
524}