blob: 5abd6a122dfdd777791d4552a148391b10e24901 [file] [log] [blame]
Michele Di Giorgio32982d82017-07-07 14:44:43 +01001/*
Georgios Pinitas574775c2019-02-18 20:08:02 +00002 * Copyright (c) 2017-2019 ARM Limited.
Michele Di Giorgio32982d82017-07-07 14:44:43 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
25
Gian Marco Iodice06b184a2017-08-29 16:05:25 +010026#include "arm_compute/core/AccessWindowStatic.h"
Georgios Pinitas574775c2019-02-18 20:08:02 +000027#include "arm_compute/core/CPP/Validate.h"
Michele Di Giorgio32982d82017-07-07 14:44:43 +010028#include "arm_compute/core/Error.h"
29#include "arm_compute/core/Helpers.h"
Georgios Pinitas574775c2019-02-18 20:08:02 +000030#include "arm_compute/core/NEON/NEAsymm.h"
Michalis Spyrouba27e442019-05-28 10:04:57 +010031#include "arm_compute/core/NEON/NESymm.h"
Georgios Pinitas574775c2019-02-18 20:08:02 +000032#include "arm_compute/core/NEON/wrapper/wrapper.h"
Michele Di Giorgio32982d82017-07-07 14:44:43 +010033#include "arm_compute/core/Utils.h"
34#include "arm_compute/core/Validate.h"
35#include "arm_compute/core/Window.h"
36
37#include <arm_neon.h>
38
Georgios Pinitas574775c2019-02-18 20:08:02 +000039namespace arm_compute
40{
Alex Gilday60954c62018-03-05 16:22:48 +000041namespace
42{
Georgios Pinitas574775c2019-02-18 20:08:02 +000043Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
Alex Gilday60954c62018-03-05 16:22:48 +000044{
Georgios Pinitas574775c2019-02-18 20:08:02 +000045 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
Michalis Spyrou29a01c92019-08-22 11:44:04 +010046 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
Alex Gilday60954c62018-03-05 16:22:48 +000047
48 if(output->tensor_shape().total_size() > 0)
49 {
Georgios Pinitas574775c2019-02-18 20:08:02 +000050 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output);
51 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
Alex Gilday60954c62018-03-05 16:22:48 +000052 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
53 }
54
55 return Status{};
56}
57
Georgios Pinitas574775c2019-02-18 20:08:02 +000058std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
Alex Gilday60954c62018-03-05 16:22:48 +000059{
Georgios Pinitas574775c2019-02-18 20:08:02 +000060 // Configure kernel window
61 Window win = calculate_max_window(*input, Steps());
62
Alex Gilday60954c62018-03-05 16:22:48 +000063 // Output tensor auto initialization if not yet initialized
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +010064 auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32);
Alex Gilday60954c62018-03-05 16:22:48 +000065
Georgios Pinitas574775c2019-02-18 20:08:02 +000066 // NEDequantizationLayerKernel doesn't need padding so update_window_and_padding() can be skipped
67 Coordinates coord;
68 coord.set_num_dimensions(output->num_dimensions());
69 output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
Alex Gilday60954c62018-03-05 16:22:48 +000070
Georgios Pinitas574775c2019-02-18 20:08:02 +000071 return std::make_tuple(Status{}, win);
72}
Alex Gilday60954c62018-03-05 16:22:48 +000073
Georgios Pinitas574775c2019-02-18 20:08:02 +000074template <typename T>
75inline void store_result(T *ptr, const float32x4x4_t &v)
76{
77 ARM_COMPUTE_UNUSED(ptr, v);
78}
Alex Gilday60954c62018-03-05 16:22:48 +000079
Georgios Pinitas574775c2019-02-18 20:08:02 +000080template <>
81inline void store_result<float>(float *ptr, const float32x4x4_t &v)
82{
83 wrapper::vstore(ptr, v.val[0]);
84 wrapper::vstore(ptr + 4, v.val[1]);
85 wrapper::vstore(ptr + 8, v.val[2]);
86 wrapper::vstore(ptr + 12, v.val[3]);
87}
Alex Gilday60954c62018-03-05 16:22:48 +000088
Georgios Pinitas574775c2019-02-18 20:08:02 +000089#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
90template <>
91inline void store_result<float16_t>(float16_t *ptr, const float32x4x4_t &v)
92{
93 wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1])));
94 wrapper::vstore(ptr + 8, vcombine_f16(vcvt_f16_f32(v.val[2]), vcvt_f16_f32(v.val[3])));
95}
96#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
97
98template <typename T>
Michalis Spyrouba27e442019-05-28 10:04:57 +010099inline void store_result(T *ptr, const float32x4x2_t &v)
100{
101 ARM_COMPUTE_UNUSED(ptr, v);
102}
103
104template <>
105inline void store_result<float>(float *ptr, const float32x4x2_t &v)
106{
107 wrapper::vstore(ptr, v.val[0]);
108 wrapper::vstore(ptr + 4, v.val[1]);
109}
110
111#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
112template <>
113inline void store_result<float16_t>(float16_t *ptr, const float32x4x2_t &v)
114{
115 wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1])));
116}
117#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
118
119template <typename T>
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100120void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Window &window)
Georgios Pinitas574775c2019-02-18 20:08:02 +0000121{
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100122 const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform();
123 const float scale = qinfo.scale;
124 const int32_t offset = qinfo.offset;
Georgios Pinitas574775c2019-02-18 20:08:02 +0000125
126 const int window_step_x = 16;
127 const auto window_start_x = static_cast<int>(window.x().start());
128 const auto window_end_x = static_cast<int>(window.x().end());
129
130 // Collapse window and reset first dimension to handle tail calculations manually
131 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
132 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
133
134 // Create iterators
135 Iterator in(input, win_collapsed);
136 Iterator out(output, win_collapsed);
137
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100138 execute_window_loop(win_collapsed, [&](const Coordinates &)
Georgios Pinitas574775c2019-02-18 20:08:02 +0000139 {
140 const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
141 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
142
143 int x = window_start_x;
144 for(; x <= (window_end_x - window_step_x); x += window_step_x)
145 {
146 const auto vin = wrapper::vloadq(in_ptr + x);
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100147 const auto vdeq = vdequantize(vin, scale, offset);
Georgios Pinitas574775c2019-02-18 20:08:02 +0000148
149 store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
150 }
151
152 // Compute left-over elements
153 for(; x < window_end_x; ++x)
154 {
155 uint8_t val = *(in_ptr + x);
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100156 *(out_ptr + x) = static_cast<T>(dequantize(val, scale, offset));
Georgios Pinitas574775c2019-02-18 20:08:02 +0000157 }
158 },
159 in, out);
Alex Gilday60954c62018-03-05 16:22:48 +0000160}
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100161
162template <typename T>
Michalis Spyrou3f632f32019-08-22 16:52:00 +0100163void run_dequantization_qasymm8_per_channel_nchw(const ITensor *input, ITensor *output, const Window &window)
Michalis Spyrou29a01c92019-08-22 11:44:04 +0100164{
165 const std::vector<float> scale = input->info()->quantization_info().scale();
166 const std::vector<int32_t> offset = input->info()->quantization_info().offset();
167
168 const int window_step_x = 16;
169 const auto window_start_x = static_cast<int>(window.x().start());
170 const auto window_end_x = static_cast<int>(window.x().end());
171
172 // Reset first dimension to handle tail calculations manually
173 Window win(window);
174 win.set(Window::DimX, Window::Dimension(0, 1, 1));
175
176 // Create iterators
177 Iterator in(input, win);
178 Iterator out(output, win);
179
180 execute_window_loop(win, [&](const Coordinates & id)
181 {
182 const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
183 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
184
185 int x = window_start_x;
186 for(; x <= (window_end_x - window_step_x); x += window_step_x)
187 {
188 const auto vin = wrapper::vloadq(in_ptr + x);
189 const auto vdeq = vdequantize(vin, scale[id.z()], offset[id.z()]);
190
191 store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
192 }
193
194 // Compute left-over elements
195 for(; x < window_end_x; ++x)
196 {
197 uint8_t val = *(in_ptr + x);
198 *(out_ptr + x) = static_cast<T>(dequantize(val, scale[id.z()], offset[id.z()]));
199 }
200 },
201 in, out);
202}
203
204template <typename T>
Michalis Spyrou3f632f32019-08-22 16:52:00 +0100205void run_dequantization_qasymm8_per_channel_nhwc(const ITensor *input, ITensor *output, const Window &window)
206{
207 const std::vector<float> scale = input->info()->quantization_info().scale();
208 const std::vector<int32_t> offset = input->info()->quantization_info().offset();
209
210 const int window_step_x = 16;
211 const auto window_start_x = static_cast<int>(window.x().start());
212 const auto window_end_x = static_cast<int>(window.x().end());
213
214 // Reset first dimension to handle tail calculations manually
215 Window win(window);
216 win.set(Window::DimX, Window::Dimension(0, 1, 1));
217
218 // Create iterators
219 Iterator in(input, win);
220 Iterator out(output, win);
221
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100222 execute_window_loop(win, [&](const Coordinates &)
Michalis Spyrou3f632f32019-08-22 16:52:00 +0100223 {
224 const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
225 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
226
227 int x = window_start_x;
228 for(; x <= (window_end_x - window_step_x); x += window_step_x)
229 {
230 const float32x4x4_t vscale =
231 {
232 {
233 scale[x + 0], scale[x + 1], scale[x + 2], scale[x + 3],
234 scale[x + 4], scale[x + 5], scale[x + 6], scale[x + 7],
235 scale[x + 8], scale[x + 9], scale[x + 10], scale[x + 11],
236 scale[x + 12], scale[x + 13], scale[x + 14], scale[x + 15]
237 }
238 };
239 const int32x4x4_t voffset =
240 {
241 {
242 offset[x + 0], offset[x + 1], offset[x + 2], offset[x + 3],
243 offset[x + 4], offset[x + 5], offset[x + 6], offset[x + 7],
244 offset[x + 8], offset[x + 9], offset[x + 10], offset[x + 11],
245 offset[x + 12], offset[x + 13], offset[x + 14], offset[x + 15]
246 }
247 };
248 const auto vin = wrapper::vloadq(in_ptr + x);
249 const auto vdeq = vdequantize(vin, vscale, voffset);
250
251 store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
252 }
253
254 // Compute left-over elements
255 for(; x < window_end_x; ++x)
256 {
257 uint8_t val = *(in_ptr + x);
258 *(out_ptr + x) = static_cast<T>(dequantize(val, scale[x], offset[x]));
259 }
260 },
261 in, out);
262}
263
264template <typename T>
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100265void run_dequantization_qsymm8(const ITensor *input, ITensor *output, const Window &window)
266{
267 const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform();
268 const float scale = qinfo.scale;
269
270 const int window_step_x = 16;
271 const auto window_start_x = static_cast<int>(window.x().start());
272 const auto window_end_x = static_cast<int>(window.x().end());
273
274 // Collapse window and reset first dimension to handle tail calculations manually
275 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
276 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
277
278 // Create iterators
279 Iterator in(input, win_collapsed);
280 Iterator out(output, win_collapsed);
281
282 execute_window_loop(win_collapsed, [&](const Coordinates &)
283 {
284 const auto in_ptr = reinterpret_cast<const int8_t *>(in.ptr());
285 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
286
287 int x = window_start_x;
288 for(; x <= (window_end_x - window_step_x); x += window_step_x)
289 {
290 const auto vin = wrapper::vloadq(in_ptr + x);
291 const auto vdeq = vdequantize(vin, scale);
292
293 store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
294 }
295
296 // Compute left-over elements
297 for(; x < window_end_x; ++x)
298 {
Manuel Bottini10c53f12019-07-17 16:11:53 +0100299 int8_t val = *(in_ptr + x);
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100300 *(out_ptr + x) = static_cast<T>(dequantize(val, scale));
301 }
302 },
303 in, out);
304}
305
306template <typename T>
Michalis Spyrouba27e442019-05-28 10:04:57 +0100307void run_dequantization_qsymm16(const ITensor *input, ITensor *output, const Window &window)
308{
309 const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform();
310 const float scale = qinfo.scale;
311
312 const int window_step_x = 8;
313 const auto window_start_x = static_cast<int>(window.x().start());
314 const auto window_end_x = static_cast<int>(window.x().end());
315
316 // Collapse window and reset first dimension to handle tail calculations manually
317 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
318 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
319
320 // Create iterators
321 Iterator in(input, win_collapsed);
322 Iterator out(output, win_collapsed);
323
324 execute_window_loop(win_collapsed, [&](const Coordinates &)
325 {
326 const auto in_ptr = reinterpret_cast<const int16_t *>(in.ptr());
327 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
328
329 int x = window_start_x;
330 for(; x <= (window_end_x - window_step_x); x += window_step_x)
331 {
332 const auto vin = wrapper::vloadq(in_ptr + x);
333 const auto vdeq = vdequantize_int16(vin, scale);
334
335 store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
336 }
337
338 // Compute left-over elements
339 for(; x < window_end_x; ++x)
340 {
341 int16_t val = *(in_ptr + x);
342 *(out_ptr + x) = static_cast<T>(dequantize_qsymm16(val, scale));
343 }
344 },
345 in, out);
346}
347
348template <typename T>
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100349void run_dequantization_core(const ITensor *input, ITensor *output, const Window &window)
350{
351 switch(input->info()->data_type())
352 {
353 case DataType::QASYMM8:
354 run_dequantization_qasymm8<T>(input, output, window);
355 break;
Michalis Spyrou29a01c92019-08-22 11:44:04 +0100356 case DataType::QASYMM8_PER_CHANNEL:
Michalis Spyrou3f632f32019-08-22 16:52:00 +0100357 input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qasymm8_per_channel_nhwc<T>(input, output, window) : run_dequantization_qasymm8_per_channel_nchw<T>(input, output, window);
Michalis Spyrou29a01c92019-08-22 11:44:04 +0100358 break;
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100359 case DataType::QSYMM8:
360 run_dequantization_qsymm8<T>(input, output, window);
361 break;
Michalis Spyrouba27e442019-05-28 10:04:57 +0100362 case DataType::QSYMM16:
363 run_dequantization_qsymm16<T>(input, output, window);
364 break;
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100365 default:
366 ARM_COMPUTE_ERROR("Unsupported data type.");
367 }
368}
Alex Gilday60954c62018-03-05 16:22:48 +0000369} // namespace
370
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100371NEDequantizationLayerKernel::NEDequantizationLayerKernel()
Georgios Pinitas574775c2019-02-18 20:08:02 +0000372 : _input(nullptr), _output(nullptr)
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100373{
374}
375
Georgios Pinitas574775c2019-02-18 20:08:02 +0000376void NEDequantizationLayerKernel::configure(const ITensor *input, ITensor *output)
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100377{
Georgios Pinitas574775c2019-02-18 20:08:02 +0000378 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
379 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100380
Georgios Pinitas574775c2019-02-18 20:08:02 +0000381 _input = input;
382 _output = output;
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100383
Alex Gilday60954c62018-03-05 16:22:48 +0000384 // Configure kernel window
Georgios Pinitas574775c2019-02-18 20:08:02 +0000385 auto win_config = validate_and_configure_window(input->info(), output->info());
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100386
Alex Gilday60954c62018-03-05 16:22:48 +0000387 ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
Gian Marco Iodice06b184a2017-08-29 16:05:25 +0100388
Alex Gilday60954c62018-03-05 16:22:48 +0000389 INEKernel::configure(std::get<1>(win_config));
390}
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100391
Georgios Pinitas574775c2019-02-18 20:08:02 +0000392Status NEDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
Alex Gilday60954c62018-03-05 16:22:48 +0000393{
Georgios Pinitas574775c2019-02-18 20:08:02 +0000394 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
395 ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
Alex Gilday60954c62018-03-05 16:22:48 +0000396 return Status{};
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100397}
398
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100399void NEDequantizationLayerKernel::run(const Window &window, const ThreadInfo &info)
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100400{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100401 ARM_COMPUTE_UNUSED(info);
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100402 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
403 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
404
Georgios Pinitas574775c2019-02-18 20:08:02 +0000405 switch(_output->info()->data_type())
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100406 {
Georgios Pinitas574775c2019-02-18 20:08:02 +0000407 case DataType::F32:
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100408 run_dequantization_core<float>(_input, _output, window);
Georgios Pinitas574775c2019-02-18 20:08:02 +0000409 break;
410#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
411 case DataType::F16:
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100412 run_dequantization_core<float16_t>(_input, _output, window);
Georgios Pinitas574775c2019-02-18 20:08:02 +0000413 break;
414#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
415 default:
416 ARM_COMPUTE_ERROR("Unsupported data type.");
417 }
418}
Michalis Spyrouba27e442019-05-28 10:04:57 +0100419} // namespace arm_compute