blob: f7203701e7fc89b3f04bbc66619986865c278066 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/NEON/NEFixedPoint.h"
30#include "arm_compute/core/TensorInfo.h"
31#include "arm_compute/core/Validate.h"
32
33#include <arm_neon.h>
34
35using namespace arm_compute;
36
37namespace arm_compute
38{
39class Coordinates;
40} // namespace arm_compute
41
42NEDepthConvertKernel::NEDepthConvertKernel()
Georgios Pinitase2229412017-07-12 12:30:40 +010043 : _input(nullptr), _output(nullptr), _policy(), _shift(0), _fixed_point_position_input(0), _fixed_point_position_output(0)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010044{
45}
46
Georgios Pinitase2229412017-07-12 12:30:40 +010047void NEDepthConvertKernel::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010048{
Georgios Pinitas21efeb42017-07-04 12:47:17 +010049 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::F32);
Georgios Pinitase2229412017-07-12 12:30:40 +010050
51 _input = input;
52 _output = input;
53 _policy = policy;
54 _shift = shift;
55
56 if(output != nullptr)
57 {
58 // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
59 set_shape_if_empty(*output->info(), input->info()->tensor_shape());
60
61 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::U32, DataType::S32, DataType::F32);
62 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
63
64 // Set output
65 _output = output;
66 }
67
68 // Set initial fixed point position of input and output
69 _fixed_point_position_input = input->info()->fixed_point_position();
70 _fixed_point_position_output = _output->info()->fixed_point_position();
71
72 // Set the fixed point position to the output tensor if needed
73 if(is_data_type_fixed_point(input->info()->data_type()) && is_data_type_fixed_point(_output->info()->data_type()))
74 {
75 // If in-place set the fixed point position of the output tensor to be equal to shift
76 _fixed_point_position_output = (_input == _output) ? static_cast<int>(_shift) : _fixed_point_position_output;
77 // Set fixed point position to output tensor
78 _output->info()->set_fixed_point_position(_fixed_point_position_output);
79 }
80
81 ARM_COMPUTE_ERROR_ON(shift >= 8 && (!is_data_type_fixed_point(input->info()->data_type()) && !is_data_type_fixed_point(output->info()->data_type())));
82 ARM_COMPUTE_ERROR_ON(input == output && (data_size_from_type(input->info()->data_type()) != data_size_from_type(output->info()->data_type())));
Anthony Barbier6ff3b192017-09-04 18:44:23 +010083
Anthony Barbier6ff3b192017-09-04 18:44:23 +010084 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U16
85 && output->info()->data_type() != DataType::S32),
86 "Only data_types supported [in] U8 -> [out] U16, S16, S32");
87
Georgios Pinitase2229412017-07-12 12:30:40 +010088 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::F32),
89 "Only data_types supported [in] QS8 -> [out] QS8, F32");
Georgios Pinitas21efeb42017-07-04 12:47:17 +010090
Anthony Barbier6ff3b192017-09-04 18:44:23 +010091 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32),
92 "Only data_types supported [in] U16 -> [out] U8, U32");
93
94 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::S32),
95 "Only data_types supported [in] S16 -> [out] U8, S32");
96
Georgios Pinitase2229412017-07-12 12:30:40 +010097 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::QS16 && output->info()->data_type() != DataType::F32),
98 "Only data_types supported [in] QS16 -> [out] QS16, F32");
Georgios Pinitas21efeb42017-07-04 12:47:17 +010099
100 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::QS16),
101 "Only data_types supported [in] F32 -> [out] QS8, QS16");
102
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100103 constexpr unsigned int num_elems_processed_per_iteration = 16;
Georgios Pinitase2229412017-07-12 12:30:40 +0100104
105 // Configure kernel window
106 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
107
108 AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
109 if(output != nullptr)
110 {
111 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
112 update_window_and_padding(win, input_access, output_access);
113 output_access.set_valid_region(win, input->info()->valid_region());
114 }
115 else
116 {
117 // In-place computation
118 update_window_and_padding(win, input_access);
119 }
120 ICPPKernel::configure(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100121}
122
123void NEDepthConvertKernel::run(const Window &window)
124{
125 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Georgios Pinitase2229412017-07-12 12:30:40 +0100126 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100127 ARM_COMPUTE_ERROR_ON(nullptr == _input);
128 ARM_COMPUTE_ERROR_ON(nullptr == _output);
129 ARM_COMPUTE_ERROR_ON(_input == _output);
130
131 Iterator input(_input, window);
132 Iterator output(_output, window);
133
Georgios Pinitase2229412017-07-12 12:30:40 +0100134 bool in_place = (_input == _output);
135
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100136 switch(_input->info()->data_type())
137 {
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100138 case DataType::U8:
139 {
140 const int16x8_t b = vdupq_n_s16(_shift);
141
142 switch(_output->info()->data_type())
143 {
144 case DataType::S16:
145 {
146 /* Up-conversion U8 -> S16 */
147 execute_window_loop(window, [&](const Coordinates & id)
148 {
149 const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
150
151 const int16x8x2_t texels =
152 {
153 {
154 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b),
155 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b)
156 }
157 };
158
159 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), texels.val[0]);
160 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, texels.val[1]);
161 },
162 input, output);
163 break;
164 }
165 case DataType::S32:
166 {
167 /* Up-conversion U8 -> S32 */
168 execute_window_loop(window, [&](const Coordinates & id)
169 {
170 const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
171
172 const int16x8x2_t texels =
173 {
174 {
175 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b),
176 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b)
177 }
178 };
179
180 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), vmovl_s16(vget_low_s16(texels.val[0])));
181 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, vmovl_s16(vget_high_s16(texels.val[0])));
182 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 8, vmovl_s16(vget_low_s16(texels.val[1])));
183 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 12, vmovl_s16(vget_high_s16(texels.val[1])));
184 },
185 input, output);
186 break;
187 }
188 case DataType::U16:
189 {
190 /* Up-conversion U8 -> U16 */
191 execute_window_loop(window, [&](const Coordinates & id)
192 {
193 const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
194
195 const uint16x8x2_t texels =
196 {
197 {
198 vshlq_u16(vmovl_u8(vget_low_u8(texels_u8)), b),
199 vshlq_u16(vmovl_u8(vget_high_u8(texels_u8)), b)
200 }
201 };
202
203 vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()), texels.val[0]);
204 vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()) + 8, texels.val[1]);
205 },
206 input, output);
207 break;
208 }
209 default:
210 ARM_COMPUTE_ERROR("Output data type not supported");
211 }
212 break;
213 }
Georgios Pinitase2229412017-07-12 12:30:40 +0100214 case DataType::QS8:
215 {
216 switch(_output->info()->data_type())
217 {
218 case DataType::QS8:
219 {
220 const int relative_shift = _fixed_point_position_output - _fixed_point_position_input;
221 /* Fixed point position conversion QS8 -> QS8 */
222 if(relative_shift != 0 || !in_place)
223 {
224 const auto relative_shift_vec = vdupq_n_qs8(relative_shift);
225 execute_window_loop(window, [&](const Coordinates & id)
226 {
227 const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr()));
228 vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqrshlq_s8(texels_qs8, relative_shift_vec));
229 },
230 input, output);
231 }
232 break;
233 }
234 case DataType::F32:
235 {
236 /* Up-conversion QS8 -> F32 */
237 execute_window_loop(window, [&](const Coordinates & id)
238 {
239 const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr()));
240
241 float32x4x2_t texels_low = vcvt_f32_qs8(vget_low_s8(texels_qs8), _fixed_point_position_input);
242 float32x4x2_t texels_high = vcvt_f32_qs8(vget_high_s8(texels_qs8), _fixed_point_position_input);
243
244 vst1q_f32(reinterpret_cast<float *>(output.ptr()), texels_low.val[0]);
245 vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, texels_low.val[1]);
246 vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, texels_high.val[0]);
247 vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, texels_high.val[1]);
248 },
249 input, output);
250 break;
251 }
252 default:
253 ARM_COMPUTE_ERROR("Output data type not supported");
254 }
255 break;
256 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100257 case DataType::S16:
258 {
259 switch(_output->info()->data_type())
260 {
261 case DataType::U8:
262 {
263 const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
264
265 /* Down-conversion S16 -> U8 */
266 if(ConvertPolicy::SATURATE == _policy)
267 {
268 execute_window_loop(window, [&](const Coordinates & id)
269 {
270 const int16x8x2_t texels =
271 {
272 {
273 vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
274 vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
275 }
276 };
277
278 vst1q_u8(output.ptr(), vcombine_u8(vqmovun_s16(texels.val[0]), vqmovun_s16(texels.val[1])));
279 },
280 input, output);
281 }
282 else
283 {
284 execute_window_loop(window, [&](const Coordinates & id)
285 {
286 const int16x8x2_t texels =
287 {
288 {
289 vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
290 vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
291 }
292 };
293
294 vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(vreinterpretq_u16_s16(texels.val[0])),
295 vmovn_u16(vreinterpretq_u16_s16(texels.val[1]))));
296 },
297 input, output);
298 }
299 break;
300 }
301 case DataType::S32:
302 {
303 const int32x4_t b = vdupq_n_s32(_shift);
304
305 /* Up-conversion S16 -> S32 */
306 execute_window_loop(window, [&](const Coordinates & id)
307 {
308 const int16x8x2_t texels =
309 {
310 {
311 vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())),
312 vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8)
313 }
314 };
315
316 const int32x4x4_t texels_s32 =
317 {
318 {
319 vshlq_s32(vmovl_s16(vget_low_s16(texels.val[0])), b),
320 vshlq_s32(vmovl_s16(vget_high_s16(texels.val[0])), b),
321 vshlq_s32(vmovl_s16(vget_low_s16(texels.val[1])), b),
322 vshlq_s32(vmovl_s16(vget_high_s16(texels.val[1])), b)
323 }
324 };
325
326 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), texels_s32.val[0]);
327 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, texels_s32.val[1]);
328 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 8, texels_s32.val[2]);
329 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 12, texels_s32.val[3]);
330 },
331 input, output);
332 break;
333 }
334 default:
335 ARM_COMPUTE_ERROR("Output data type not supported");
336 }
337 break;
338 }
339 case DataType::U16:
340 {
341 switch(_output->info()->data_type())
342 {
343 case DataType::U8:
344 {
345 const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
346
347 /* Down-conversion U16 -> U8 */
348 if(ConvertPolicy::SATURATE == _policy)
349 {
350 execute_window_loop(window, [&](const Coordinates & id)
351 {
352 const uint16x8x2_t texels =
353 {
354 {
355 vqshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())), b),
356 vqshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8), b)
357 }
358 };
359
360 vst1q_u8(output.ptr(), vcombine_u8(vqmovn_u16(texels.val[0]), vqmovn_u16(texels.val[1])));
361 },
362 input, output);
363 }
364 else
365 {
366 execute_window_loop(window, [&](const Coordinates & id)
367 {
368 const uint16x8x2_t texels =
369 {
370 {
371 vshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())), b),
372 vshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8), b)
373 }
374 };
375
376 vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(texels.val[0]), vmovn_u16(texels.val[1])));
377 },
378 input, output);
379 }
380 break;
381 }
382 case DataType::U32:
383 {
384 const int32x4_t b = vdupq_n_s32(_shift);
385
386 /* Up-conversion U16 -> U32 */
387 execute_window_loop(window, [&](const Coordinates & id)
388 {
389 const uint16x8x2_t texels =
390 {
391 {
392 vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())),
393 vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8)
394 }
395 };
396
397 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()), vshlq_u32(vmovl_u16(vget_low_u16(texels.val[0])), b));
398 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 4, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[0])), b));
399 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 8, vshlq_u32(vmovl_u16(vget_low_u16(texels.val[1])), b));
400 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 12, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[1])), b));
401 },
402 input, output);
403 break;
404 }
405 default:
406 ARM_COMPUTE_ERROR("Output data type not supported");
407 }
408 break;
409 }
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100410 case DataType::QS16:
411 {
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100412 switch(_output->info()->data_type())
413 {
Georgios Pinitase2229412017-07-12 12:30:40 +0100414 case DataType::QS16:
415 {
416 const int relative_shift = _fixed_point_position_output - _fixed_point_position_input;
417 /* Fixed point position conversion QS16 -> QS16 */
418 if(relative_shift != 0 || !in_place)
419 {
420 const auto relative_shift_vec = vdupq_n_qs16(relative_shift);
421 execute_window_loop(window, [&](const Coordinates & id)
422 {
423 const qint16x8x2_t texels_qs16 =
424 {
425 {
426 vld1q_qs16(reinterpret_cast<qint16_t *>(input.ptr())),
427 vld1q_qs16(reinterpret_cast<qint16_t *>(input.ptr()) + 8)
428 }
429 };
430 vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqrshlq_s16(texels_qs16.val[0], relative_shift_vec));
431 vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()) + 8, vqrshlq_s16(texels_qs16.val[1], relative_shift_vec));
432 },
433 input, output);
434 }
435 break;
436 }
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100437 case DataType::F32:
438 {
439 /* Up-conversion QS16 -> F32 */
440 execute_window_loop(window, [&](const Coordinates & id)
441 {
Georgios Pinitase2229412017-07-12 12:30:40 +0100442 const int16x8x2_t texels_qs16 =
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100443 {
444 {
445 vld1q_s16(reinterpret_cast<qint16_t *>(input.ptr())),
446 vld1q_s16(reinterpret_cast<qint16_t *>(input.ptr()) + 8)
447 }
448 };
449
Georgios Pinitase2229412017-07-12 12:30:40 +0100450 vst1q_f32(reinterpret_cast<float *>(output.ptr()), vcvt_f32_qs16(vget_low_s16(texels_qs16.val[0]), _fixed_point_position_input));
451 vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[0]), _fixed_point_position_input));
452 vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, vcvt_f32_qs16(vget_low_s16(texels_qs16.val[1]), _fixed_point_position_input));
453 vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[1]), _fixed_point_position_input));
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100454 },
455 input, output);
456 break;
457 }
458 default:
459 ARM_COMPUTE_ERROR("Output data type not supported");
460 }
461 break;
462 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100463 case DataType::F32:
464 {
465 switch(_output->info()->data_type())
466 {
467 case DataType::QS8:
468 {
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100469 /* Down-conversion F32 -> QS8 */
470 execute_window_loop(window, [&](const Coordinates & id)
471 {
472 const float32x4x4_t texels_f32 =
473 {
474 {
475 vld1q_f32(reinterpret_cast<const float *>(input.ptr())),
476 vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 4),
477 vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 8),
478 vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 12)
479 }
480 };
481
Georgios Pinitase2229412017-07-12 12:30:40 +0100482 const qint8x16_t texels_s8 = vqcvtq_qs8_f32(texels_f32, _fixed_point_position_output);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100483
484 vst1q_s8(reinterpret_cast<int8_t *>(output.ptr()), texels_s8);
485 },
486 input, output);
487 break;
488 }
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100489 case DataType::QS16:
490 {
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100491 /* Down-conversion F32 -> QS16 */
492 execute_window_loop(window, [&](const Coordinates & id)
493 {
494 const float32x4x2_t texels_f32_1 =
495 {
496 {
497 vld1q_f32(reinterpret_cast<const float *>(input.ptr())),
498 vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 4),
499 }
500 };
501 const float32x4x2_t texels_f32_2 =
502 {
503 {
504 vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 8),
505 vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 12)
506 }
507 };
508
Georgios Pinitase2229412017-07-12 12:30:40 +0100509 vst1q_s16(reinterpret_cast<qint16_t *>(output.ptr()), vqcvtq_qs16_f32(texels_f32_1, _fixed_point_position_output));
510 vst1q_s16(reinterpret_cast<qint16_t *>(output.ptr()) + 8, vqcvtq_qs16_f32(texels_f32_2, _fixed_point_position_output));
Georgios Pinitas21efeb42017-07-04 12:47:17 +0100511 },
512 input, output);
513 break;
514 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100515 default:
516 ARM_COMPUTE_ERROR("Output data type not supported");
517 }
518 break;
519 }
520 default:
521 ARM_COMPUTE_ERROR("Not supported");
522 }
523}