blob: f555df3828bfc947af738cc20ebfda910d20a65e [file] [log] [blame]
Michele Di Giorgio32982d82017-07-07 14:44:43 +01001/*
Georgios Pinitas574775c2019-02-18 20:08:02 +00002 * Copyright (c) 2017-2019 ARM Limited.
Michele Di Giorgio32982d82017-07-07 14:44:43 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
25
Gian Marco Iodice06b184a2017-08-29 16:05:25 +010026#include "arm_compute/core/AccessWindowStatic.h"
Georgios Pinitas574775c2019-02-18 20:08:02 +000027#include "arm_compute/core/CPP/Validate.h"
Michele Di Giorgio32982d82017-07-07 14:44:43 +010028#include "arm_compute/core/Error.h"
29#include "arm_compute/core/Helpers.h"
Georgios Pinitas574775c2019-02-18 20:08:02 +000030#include "arm_compute/core/NEON/NEAsymm.h"
Michalis Spyrouba27e442019-05-28 10:04:57 +010031#include "arm_compute/core/NEON/NESymm.h"
Georgios Pinitas574775c2019-02-18 20:08:02 +000032#include "arm_compute/core/NEON/wrapper/wrapper.h"
Michele Di Giorgio32982d82017-07-07 14:44:43 +010033#include "arm_compute/core/Utils.h"
34#include "arm_compute/core/Validate.h"
35#include "arm_compute/core/Window.h"
36
37#include <arm_neon.h>
38
Georgios Pinitas574775c2019-02-18 20:08:02 +000039namespace arm_compute
40{
Alex Gilday60954c62018-03-05 16:22:48 +000041namespace
42{
Georgios Pinitas574775c2019-02-18 20:08:02 +000043Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
Alex Gilday60954c62018-03-05 16:22:48 +000044{
Georgios Pinitas574775c2019-02-18 20:08:02 +000045 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
Georgios Pinitas8217c8e2019-11-11 18:24:22 +000046 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8, DataType::QSYMM16);
Alex Gilday60954c62018-03-05 16:22:48 +000047
48 if(output->tensor_shape().total_size() > 0)
49 {
Georgios Pinitas574775c2019-02-18 20:08:02 +000050 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output);
51 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
Alex Gilday60954c62018-03-05 16:22:48 +000052 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
53 }
54
55 return Status{};
56}
57
Georgios Pinitas574775c2019-02-18 20:08:02 +000058std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
Alex Gilday60954c62018-03-05 16:22:48 +000059{
Georgios Pinitas574775c2019-02-18 20:08:02 +000060 // Configure kernel window
61 Window win = calculate_max_window(*input, Steps());
62
Alex Gilday60954c62018-03-05 16:22:48 +000063 // Output tensor auto initialization if not yet initialized
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +010064 auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32);
Alex Gilday60954c62018-03-05 16:22:48 +000065
Georgios Pinitas574775c2019-02-18 20:08:02 +000066 // NEDequantizationLayerKernel doesn't need padding so update_window_and_padding() can be skipped
67 Coordinates coord;
68 coord.set_num_dimensions(output->num_dimensions());
69 output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
Alex Gilday60954c62018-03-05 16:22:48 +000070
Georgios Pinitas574775c2019-02-18 20:08:02 +000071 return std::make_tuple(Status{}, win);
72}
Alex Gilday60954c62018-03-05 16:22:48 +000073
Georgios Pinitas574775c2019-02-18 20:08:02 +000074template <typename T>
75inline void store_result(T *ptr, const float32x4x4_t &v)
76{
77 ARM_COMPUTE_UNUSED(ptr, v);
78}
Alex Gilday60954c62018-03-05 16:22:48 +000079
Georgios Pinitas574775c2019-02-18 20:08:02 +000080template <>
81inline void store_result<float>(float *ptr, const float32x4x4_t &v)
82{
83 wrapper::vstore(ptr, v.val[0]);
84 wrapper::vstore(ptr + 4, v.val[1]);
85 wrapper::vstore(ptr + 8, v.val[2]);
86 wrapper::vstore(ptr + 12, v.val[3]);
87}
Alex Gilday60954c62018-03-05 16:22:48 +000088
Georgios Pinitas574775c2019-02-18 20:08:02 +000089#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
90template <>
91inline void store_result<float16_t>(float16_t *ptr, const float32x4x4_t &v)
92{
93 wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1])));
94 wrapper::vstore(ptr + 8, vcombine_f16(vcvt_f16_f32(v.val[2]), vcvt_f16_f32(v.val[3])));
95}
96#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
97
98template <typename T>
Michalis Spyrouba27e442019-05-28 10:04:57 +010099inline void store_result(T *ptr, const float32x4x2_t &v)
100{
101 ARM_COMPUTE_UNUSED(ptr, v);
102}
103
104template <>
105inline void store_result<float>(float *ptr, const float32x4x2_t &v)
106{
107 wrapper::vstore(ptr, v.val[0]);
108 wrapper::vstore(ptr + 4, v.val[1]);
109}
110
111#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
112template <>
113inline void store_result<float16_t>(float16_t *ptr, const float32x4x2_t &v)
114{
115 wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1])));
116}
117#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
118
119template <typename T>
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100120void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Window &window)
Georgios Pinitas574775c2019-02-18 20:08:02 +0000121{
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100122 const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform();
123 const float scale = qinfo.scale;
124 const int32_t offset = qinfo.offset;
Georgios Pinitas574775c2019-02-18 20:08:02 +0000125
126 const int window_step_x = 16;
127 const auto window_start_x = static_cast<int>(window.x().start());
128 const auto window_end_x = static_cast<int>(window.x().end());
129
130 // Collapse window and reset first dimension to handle tail calculations manually
131 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
132 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
133
134 // Create iterators
135 Iterator in(input, win_collapsed);
136 Iterator out(output, win_collapsed);
137
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100138 execute_window_loop(win_collapsed, [&](const Coordinates &)
Georgios Pinitas574775c2019-02-18 20:08:02 +0000139 {
140 const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
141 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
142
143 int x = window_start_x;
144 for(; x <= (window_end_x - window_step_x); x += window_step_x)
145 {
146 const auto vin = wrapper::vloadq(in_ptr + x);
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100147 const auto vdeq = vdequantize(vin, scale, offset);
Georgios Pinitas574775c2019-02-18 20:08:02 +0000148
149 store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
150 }
151
152 // Compute left-over elements
153 for(; x < window_end_x; ++x)
154 {
155 uint8_t val = *(in_ptr + x);
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100156 *(out_ptr + x) = static_cast<T>(dequantize(val, scale, offset));
Georgios Pinitas574775c2019-02-18 20:08:02 +0000157 }
158 },
159 in, out);
Alex Gilday60954c62018-03-05 16:22:48 +0000160}
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100161
162template <typename T>
Georgios Pinitas8217c8e2019-11-11 18:24:22 +0000163void run_dequantization_qsymm8_per_channel_nchw(const ITensor *input, ITensor *output, const Window &window)
Michalis Spyrou29a01c92019-08-22 11:44:04 +0100164{
Georgios Pinitas8217c8e2019-11-11 18:24:22 +0000165 const auto scale = input->info()->quantization_info().scale();
Michalis Spyrou29a01c92019-08-22 11:44:04 +0100166
167 const int window_step_x = 16;
168 const auto window_start_x = static_cast<int>(window.x().start());
169 const auto window_end_x = static_cast<int>(window.x().end());
170
171 // Reset first dimension to handle tail calculations manually
172 Window win(window);
173 win.set(Window::DimX, Window::Dimension(0, 1, 1));
174
175 // Create iterators
176 Iterator in(input, win);
177 Iterator out(output, win);
178
179 execute_window_loop(win, [&](const Coordinates & id)
180 {
Georgios Pinitas8217c8e2019-11-11 18:24:22 +0000181 const auto in_ptr = reinterpret_cast<const int8_t *>(in.ptr());
Michalis Spyrou29a01c92019-08-22 11:44:04 +0100182 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
183
184 int x = window_start_x;
185 for(; x <= (window_end_x - window_step_x); x += window_step_x)
186 {
187 const auto vin = wrapper::vloadq(in_ptr + x);
Georgios Pinitas8217c8e2019-11-11 18:24:22 +0000188 const auto vdeq = vdequantize(vin, scale[id.z()]);
Michalis Spyrou29a01c92019-08-22 11:44:04 +0100189
190 store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
191 }
192
193 // Compute left-over elements
194 for(; x < window_end_x; ++x)
195 {
Georgios Pinitas8217c8e2019-11-11 18:24:22 +0000196 int8_t val = *(in_ptr + x);
197 *(out_ptr + x) = static_cast<T>(dequantize(val, scale[id.z()]));
Michalis Spyrou29a01c92019-08-22 11:44:04 +0100198 }
199 },
200 in, out);
201}
202
203template <typename T>
Georgios Pinitas8217c8e2019-11-11 18:24:22 +0000204void run_dequantization_qsymm8_per_channel_nhwc(const ITensor *input, ITensor *output, const Window &window)
Michalis Spyrou3f632f32019-08-22 16:52:00 +0100205{
Georgios Pinitas8217c8e2019-11-11 18:24:22 +0000206 const auto scale = input->info()->quantization_info().scale();
Michalis Spyrou3f632f32019-08-22 16:52:00 +0100207
208 const int window_step_x = 16;
209 const auto window_start_x = static_cast<int>(window.x().start());
210 const auto window_end_x = static_cast<int>(window.x().end());
211
212 // Reset first dimension to handle tail calculations manually
213 Window win(window);
214 win.set(Window::DimX, Window::Dimension(0, 1, 1));
215
216 // Create iterators
217 Iterator in(input, win);
218 Iterator out(output, win);
219
Michalis Spyrou6bff1952019-10-02 17:22:11 +0100220 execute_window_loop(win, [&](const Coordinates &)
Michalis Spyrou3f632f32019-08-22 16:52:00 +0100221 {
Georgios Pinitas8217c8e2019-11-11 18:24:22 +0000222 const auto in_ptr = reinterpret_cast<const int8_t *>(in.ptr());
Michalis Spyrou3f632f32019-08-22 16:52:00 +0100223 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
224
225 int x = window_start_x;
226 for(; x <= (window_end_x - window_step_x); x += window_step_x)
227 {
228 const float32x4x4_t vscale =
229 {
230 {
231 scale[x + 0], scale[x + 1], scale[x + 2], scale[x + 3],
232 scale[x + 4], scale[x + 5], scale[x + 6], scale[x + 7],
233 scale[x + 8], scale[x + 9], scale[x + 10], scale[x + 11],
234 scale[x + 12], scale[x + 13], scale[x + 14], scale[x + 15]
235 }
236 };
Michalis Spyrou3f632f32019-08-22 16:52:00 +0100237 const auto vin = wrapper::vloadq(in_ptr + x);
Georgios Pinitas8217c8e2019-11-11 18:24:22 +0000238 const auto vdeq = vdequantize(vin, vscale);
Michalis Spyrou3f632f32019-08-22 16:52:00 +0100239
240 store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
241 }
242
243 // Compute left-over elements
244 for(; x < window_end_x; ++x)
245 {
Georgios Pinitas8217c8e2019-11-11 18:24:22 +0000246 int8_t val = *(in_ptr + x);
247 *(out_ptr + x) = static_cast<T>(dequantize(val, scale[x]));
Michalis Spyrou3f632f32019-08-22 16:52:00 +0100248 }
249 },
250 in, out);
251}
252
253template <typename T>
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100254void run_dequantization_qsymm8(const ITensor *input, ITensor *output, const Window &window)
255{
256 const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform();
257 const float scale = qinfo.scale;
258
259 const int window_step_x = 16;
260 const auto window_start_x = static_cast<int>(window.x().start());
261 const auto window_end_x = static_cast<int>(window.x().end());
262
263 // Collapse window and reset first dimension to handle tail calculations manually
264 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
265 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
266
267 // Create iterators
268 Iterator in(input, win_collapsed);
269 Iterator out(output, win_collapsed);
270
271 execute_window_loop(win_collapsed, [&](const Coordinates &)
272 {
273 const auto in_ptr = reinterpret_cast<const int8_t *>(in.ptr());
274 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
275
276 int x = window_start_x;
277 for(; x <= (window_end_x - window_step_x); x += window_step_x)
278 {
279 const auto vin = wrapper::vloadq(in_ptr + x);
280 const auto vdeq = vdequantize(vin, scale);
281
282 store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
283 }
284
285 // Compute left-over elements
286 for(; x < window_end_x; ++x)
287 {
Manuel Bottini10c53f12019-07-17 16:11:53 +0100288 int8_t val = *(in_ptr + x);
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100289 *(out_ptr + x) = static_cast<T>(dequantize(val, scale));
290 }
291 },
292 in, out);
293}
294
295template <typename T>
Michalis Spyrouba27e442019-05-28 10:04:57 +0100296void run_dequantization_qsymm16(const ITensor *input, ITensor *output, const Window &window)
297{
298 const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform();
299 const float scale = qinfo.scale;
300
301 const int window_step_x = 8;
302 const auto window_start_x = static_cast<int>(window.x().start());
303 const auto window_end_x = static_cast<int>(window.x().end());
304
305 // Collapse window and reset first dimension to handle tail calculations manually
306 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
307 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
308
309 // Create iterators
310 Iterator in(input, win_collapsed);
311 Iterator out(output, win_collapsed);
312
313 execute_window_loop(win_collapsed, [&](const Coordinates &)
314 {
315 const auto in_ptr = reinterpret_cast<const int16_t *>(in.ptr());
316 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
317
318 int x = window_start_x;
319 for(; x <= (window_end_x - window_step_x); x += window_step_x)
320 {
321 const auto vin = wrapper::vloadq(in_ptr + x);
322 const auto vdeq = vdequantize_int16(vin, scale);
323
324 store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
325 }
326
327 // Compute left-over elements
328 for(; x < window_end_x; ++x)
329 {
330 int16_t val = *(in_ptr + x);
331 *(out_ptr + x) = static_cast<T>(dequantize_qsymm16(val, scale));
332 }
333 },
334 in, out);
335}
336
337template <typename T>
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100338void run_dequantization_core(const ITensor *input, ITensor *output, const Window &window)
339{
340 switch(input->info()->data_type())
341 {
342 case DataType::QASYMM8:
343 run_dequantization_qasymm8<T>(input, output, window);
344 break;
Georgios Pinitas8217c8e2019-11-11 18:24:22 +0000345 case DataType::QSYMM8_PER_CHANNEL:
346 input->info()->data_layout() == DataLayout::NHWC ? run_dequantization_qsymm8_per_channel_nhwc<T>(input, output, window) : run_dequantization_qsymm8_per_channel_nchw<T>(input, output, window);
Michalis Spyrou29a01c92019-08-22 11:44:04 +0100347 break;
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100348 case DataType::QSYMM8:
349 run_dequantization_qsymm8<T>(input, output, window);
350 break;
Michalis Spyrouba27e442019-05-28 10:04:57 +0100351 case DataType::QSYMM16:
352 run_dequantization_qsymm16<T>(input, output, window);
353 break;
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100354 default:
355 ARM_COMPUTE_ERROR("Unsupported data type.");
356 }
357}
Alex Gilday60954c62018-03-05 16:22:48 +0000358} // namespace
359
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100360NEDequantizationLayerKernel::NEDequantizationLayerKernel()
Georgios Pinitas574775c2019-02-18 20:08:02 +0000361 : _input(nullptr), _output(nullptr)
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100362{
363}
364
Georgios Pinitas574775c2019-02-18 20:08:02 +0000365void NEDequantizationLayerKernel::configure(const ITensor *input, ITensor *output)
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100366{
Georgios Pinitas574775c2019-02-18 20:08:02 +0000367 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
368 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100369
Georgios Pinitas574775c2019-02-18 20:08:02 +0000370 _input = input;
371 _output = output;
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100372
Alex Gilday60954c62018-03-05 16:22:48 +0000373 // Configure kernel window
Georgios Pinitas574775c2019-02-18 20:08:02 +0000374 auto win_config = validate_and_configure_window(input->info(), output->info());
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100375
Alex Gilday60954c62018-03-05 16:22:48 +0000376 ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
Gian Marco Iodice06b184a2017-08-29 16:05:25 +0100377
Alex Gilday60954c62018-03-05 16:22:48 +0000378 INEKernel::configure(std::get<1>(win_config));
379}
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100380
Georgios Pinitas574775c2019-02-18 20:08:02 +0000381Status NEDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
Alex Gilday60954c62018-03-05 16:22:48 +0000382{
Georgios Pinitas574775c2019-02-18 20:08:02 +0000383 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
384 ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
Alex Gilday60954c62018-03-05 16:22:48 +0000385 return Status{};
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100386}
387
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100388void NEDequantizationLayerKernel::run(const Window &window, const ThreadInfo &info)
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100389{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100390 ARM_COMPUTE_UNUSED(info);
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100391 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
392 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
393
Georgios Pinitas574775c2019-02-18 20:08:02 +0000394 switch(_output->info()->data_type())
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100395 {
Georgios Pinitas574775c2019-02-18 20:08:02 +0000396 case DataType::F32:
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100397 run_dequantization_core<float>(_input, _output, window);
Georgios Pinitas574775c2019-02-18 20:08:02 +0000398 break;
399#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
400 case DataType::F16:
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100401 run_dequantization_core<float16_t>(_input, _output, window);
Georgios Pinitas574775c2019-02-18 20:08:02 +0000402 break;
403#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
404 default:
405 ARM_COMPUTE_ERROR("Unsupported data type.");
406 }
407}
Michalis Spyrouba27e442019-05-28 10:04:57 +0100408} // namespace arm_compute