blob: e52f53ea04283a5eb0625cf0d6bfc29c48dc71f3 [file] [log] [blame]
Michele Di Giorgio32982d82017-07-07 14:44:43 +01001/*
Georgios Pinitas574775c2019-02-18 20:08:02 +00002 * Copyright (c) 2017-2019 ARM Limited.
Michele Di Giorgio32982d82017-07-07 14:44:43 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
25
Gian Marco Iodice06b184a2017-08-29 16:05:25 +010026#include "arm_compute/core/AccessWindowStatic.h"
Georgios Pinitas574775c2019-02-18 20:08:02 +000027#include "arm_compute/core/CPP/Validate.h"
Michele Di Giorgio32982d82017-07-07 14:44:43 +010028#include "arm_compute/core/Error.h"
29#include "arm_compute/core/Helpers.h"
Georgios Pinitas574775c2019-02-18 20:08:02 +000030#include "arm_compute/core/NEON/NEAsymm.h"
Michalis Spyrouba27e442019-05-28 10:04:57 +010031#include "arm_compute/core/NEON/NESymm.h"
Georgios Pinitas574775c2019-02-18 20:08:02 +000032#include "arm_compute/core/NEON/wrapper/wrapper.h"
Michele Di Giorgio32982d82017-07-07 14:44:43 +010033#include "arm_compute/core/Utils.h"
34#include "arm_compute/core/Validate.h"
35#include "arm_compute/core/Window.h"
36
37#include <arm_neon.h>
38
Georgios Pinitas574775c2019-02-18 20:08:02 +000039namespace arm_compute
40{
Alex Gilday60954c62018-03-05 16:22:48 +000041namespace
42{
Georgios Pinitas574775c2019-02-18 20:08:02 +000043Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
Alex Gilday60954c62018-03-05 16:22:48 +000044{
Georgios Pinitas574775c2019-02-18 20:08:02 +000045 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
Michalis Spyrouba27e442019-05-28 10:04:57 +010046 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QSYMM8, DataType::QSYMM16);
Alex Gilday60954c62018-03-05 16:22:48 +000047
48 if(output->tensor_shape().total_size() > 0)
49 {
Georgios Pinitas574775c2019-02-18 20:08:02 +000050 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output);
51 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32);
Alex Gilday60954c62018-03-05 16:22:48 +000052 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
53 }
54
55 return Status{};
56}
57
Georgios Pinitas574775c2019-02-18 20:08:02 +000058std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
Alex Gilday60954c62018-03-05 16:22:48 +000059{
Georgios Pinitas574775c2019-02-18 20:08:02 +000060 // Configure kernel window
61 Window win = calculate_max_window(*input, Steps());
62
Alex Gilday60954c62018-03-05 16:22:48 +000063 // Output tensor auto initialization if not yet initialized
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +010064 auto_init_if_empty(*output, input->tensor_shape(), 1, DataType::F32);
Alex Gilday60954c62018-03-05 16:22:48 +000065
Georgios Pinitas574775c2019-02-18 20:08:02 +000066 // NEDequantizationLayerKernel doesn't need padding so update_window_and_padding() can be skipped
67 Coordinates coord;
68 coord.set_num_dimensions(output->num_dimensions());
69 output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
Alex Gilday60954c62018-03-05 16:22:48 +000070
Georgios Pinitas574775c2019-02-18 20:08:02 +000071 return std::make_tuple(Status{}, win);
72}
Alex Gilday60954c62018-03-05 16:22:48 +000073
Georgios Pinitas574775c2019-02-18 20:08:02 +000074template <typename T>
75inline void store_result(T *ptr, const float32x4x4_t &v)
76{
77 ARM_COMPUTE_UNUSED(ptr, v);
78}
Alex Gilday60954c62018-03-05 16:22:48 +000079
Georgios Pinitas574775c2019-02-18 20:08:02 +000080template <>
81inline void store_result<float>(float *ptr, const float32x4x4_t &v)
82{
83 wrapper::vstore(ptr, v.val[0]);
84 wrapper::vstore(ptr + 4, v.val[1]);
85 wrapper::vstore(ptr + 8, v.val[2]);
86 wrapper::vstore(ptr + 12, v.val[3]);
87}
Alex Gilday60954c62018-03-05 16:22:48 +000088
Georgios Pinitas574775c2019-02-18 20:08:02 +000089#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
90template <>
91inline void store_result<float16_t>(float16_t *ptr, const float32x4x4_t &v)
92{
93 wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1])));
94 wrapper::vstore(ptr + 8, vcombine_f16(vcvt_f16_f32(v.val[2]), vcvt_f16_f32(v.val[3])));
95}
96#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
97
98template <typename T>
Michalis Spyrouba27e442019-05-28 10:04:57 +010099inline void store_result(T *ptr, const float32x4x2_t &v)
100{
101 ARM_COMPUTE_UNUSED(ptr, v);
102}
103
104template <>
105inline void store_result<float>(float *ptr, const float32x4x2_t &v)
106{
107 wrapper::vstore(ptr, v.val[0]);
108 wrapper::vstore(ptr + 4, v.val[1]);
109}
110
111#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
112template <>
113inline void store_result<float16_t>(float16_t *ptr, const float32x4x2_t &v)
114{
115 wrapper::vstore(ptr, vcombine_f16(vcvt_f16_f32(v.val[0]), vcvt_f16_f32(v.val[1])));
116}
117#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
118
119template <typename T>
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100120void run_dequantization_qasymm8(const ITensor *input, ITensor *output, const Window &window)
Georgios Pinitas574775c2019-02-18 20:08:02 +0000121{
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100122 const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform();
123 const float scale = qinfo.scale;
124 const int32_t offset = qinfo.offset;
Georgios Pinitas574775c2019-02-18 20:08:02 +0000125
126 const int window_step_x = 16;
127 const auto window_start_x = static_cast<int>(window.x().start());
128 const auto window_end_x = static_cast<int>(window.x().end());
129
130 // Collapse window and reset first dimension to handle tail calculations manually
131 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
132 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
133
134 // Create iterators
135 Iterator in(input, win_collapsed);
136 Iterator out(output, win_collapsed);
137
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100138 execute_window_loop(win_collapsed, [&](const Coordinates &)
Georgios Pinitas574775c2019-02-18 20:08:02 +0000139 {
140 const auto in_ptr = reinterpret_cast<const uint8_t *>(in.ptr());
141 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
142
143 int x = window_start_x;
144 for(; x <= (window_end_x - window_step_x); x += window_step_x)
145 {
146 const auto vin = wrapper::vloadq(in_ptr + x);
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100147 const auto vdeq = vdequantize(vin, scale, offset);
Georgios Pinitas574775c2019-02-18 20:08:02 +0000148
149 store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
150 }
151
152 // Compute left-over elements
153 for(; x < window_end_x; ++x)
154 {
155 uint8_t val = *(in_ptr + x);
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100156 *(out_ptr + x) = static_cast<T>(dequantize(val, scale, offset));
Georgios Pinitas574775c2019-02-18 20:08:02 +0000157 }
158 },
159 in, out);
Alex Gilday60954c62018-03-05 16:22:48 +0000160}
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100161
162template <typename T>
163void run_dequantization_qsymm8(const ITensor *input, ITensor *output, const Window &window)
164{
165 const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform();
166 const float scale = qinfo.scale;
167
168 const int window_step_x = 16;
169 const auto window_start_x = static_cast<int>(window.x().start());
170 const auto window_end_x = static_cast<int>(window.x().end());
171
172 // Collapse window and reset first dimension to handle tail calculations manually
173 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
174 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
175
176 // Create iterators
177 Iterator in(input, win_collapsed);
178 Iterator out(output, win_collapsed);
179
180 execute_window_loop(win_collapsed, [&](const Coordinates &)
181 {
182 const auto in_ptr = reinterpret_cast<const int8_t *>(in.ptr());
183 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
184
185 int x = window_start_x;
186 for(; x <= (window_end_x - window_step_x); x += window_step_x)
187 {
188 const auto vin = wrapper::vloadq(in_ptr + x);
189 const auto vdeq = vdequantize(vin, scale);
190
191 store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
192 }
193
194 // Compute left-over elements
195 for(; x < window_end_x; ++x)
196 {
Manuel Bottini10c53f12019-07-17 16:11:53 +0100197 int8_t val = *(in_ptr + x);
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100198 *(out_ptr + x) = static_cast<T>(dequantize(val, scale));
199 }
200 },
201 in, out);
202}
203
204template <typename T>
Michalis Spyrouba27e442019-05-28 10:04:57 +0100205void run_dequantization_qsymm16(const ITensor *input, ITensor *output, const Window &window)
206{
207 const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform();
208 const float scale = qinfo.scale;
209
210 const int window_step_x = 8;
211 const auto window_start_x = static_cast<int>(window.x().start());
212 const auto window_end_x = static_cast<int>(window.x().end());
213
214 // Collapse window and reset first dimension to handle tail calculations manually
215 Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
216 win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
217
218 // Create iterators
219 Iterator in(input, win_collapsed);
220 Iterator out(output, win_collapsed);
221
222 execute_window_loop(win_collapsed, [&](const Coordinates &)
223 {
224 const auto in_ptr = reinterpret_cast<const int16_t *>(in.ptr());
225 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
226
227 int x = window_start_x;
228 for(; x <= (window_end_x - window_step_x); x += window_step_x)
229 {
230 const auto vin = wrapper::vloadq(in_ptr + x);
231 const auto vdeq = vdequantize_int16(vin, scale);
232
233 store_result<T>(reinterpret_cast<T *>(out_ptr + x), vdeq);
234 }
235
236 // Compute left-over elements
237 for(; x < window_end_x; ++x)
238 {
239 int16_t val = *(in_ptr + x);
240 *(out_ptr + x) = static_cast<T>(dequantize_qsymm16(val, scale));
241 }
242 },
243 in, out);
244}
245
246template <typename T>
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100247void run_dequantization_core(const ITensor *input, ITensor *output, const Window &window)
248{
249 switch(input->info()->data_type())
250 {
251 case DataType::QASYMM8:
252 run_dequantization_qasymm8<T>(input, output, window);
253 break;
254 case DataType::QSYMM8:
255 run_dequantization_qsymm8<T>(input, output, window);
256 break;
Michalis Spyrouba27e442019-05-28 10:04:57 +0100257 case DataType::QSYMM16:
258 run_dequantization_qsymm16<T>(input, output, window);
259 break;
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100260 default:
261 ARM_COMPUTE_ERROR("Unsupported data type.");
262 }
263}
Alex Gilday60954c62018-03-05 16:22:48 +0000264} // namespace
265
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100266NEDequantizationLayerKernel::NEDequantizationLayerKernel()
Georgios Pinitas574775c2019-02-18 20:08:02 +0000267 : _input(nullptr), _output(nullptr)
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100268{
269}
270
Georgios Pinitas574775c2019-02-18 20:08:02 +0000271void NEDequantizationLayerKernel::configure(const ITensor *input, ITensor *output)
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100272{
Georgios Pinitas574775c2019-02-18 20:08:02 +0000273 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
274 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100275
Georgios Pinitas574775c2019-02-18 20:08:02 +0000276 _input = input;
277 _output = output;
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100278
Alex Gilday60954c62018-03-05 16:22:48 +0000279 // Configure kernel window
Georgios Pinitas574775c2019-02-18 20:08:02 +0000280 auto win_config = validate_and_configure_window(input->info(), output->info());
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100281
Alex Gilday60954c62018-03-05 16:22:48 +0000282 ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
Gian Marco Iodice06b184a2017-08-29 16:05:25 +0100283
Alex Gilday60954c62018-03-05 16:22:48 +0000284 INEKernel::configure(std::get<1>(win_config));
285}
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100286
Georgios Pinitas574775c2019-02-18 20:08:02 +0000287Status NEDequantizationLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
Alex Gilday60954c62018-03-05 16:22:48 +0000288{
Georgios Pinitas574775c2019-02-18 20:08:02 +0000289 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
290 ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
Alex Gilday60954c62018-03-05 16:22:48 +0000291 return Status{};
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100292}
293
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100294void NEDequantizationLayerKernel::run(const Window &window, const ThreadInfo &info)
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100295{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100296 ARM_COMPUTE_UNUSED(info);
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100297 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
298 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
299
Georgios Pinitas574775c2019-02-18 20:08:02 +0000300 switch(_output->info()->data_type())
Michele Di Giorgio32982d82017-07-07 14:44:43 +0100301 {
Georgios Pinitas574775c2019-02-18 20:08:02 +0000302 case DataType::F32:
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100303 run_dequantization_core<float>(_input, _output, window);
Georgios Pinitas574775c2019-02-18 20:08:02 +0000304 break;
305#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
306 case DataType::F16:
Georgios Pinitas3d13af82019-06-04 13:04:16 +0100307 run_dequantization_core<float16_t>(_input, _output, window);
Georgios Pinitas574775c2019-02-18 20:08:02 +0000308 break;
309#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
310 default:
311 ARM_COMPUTE_ERROR("Unsupported data type.");
312 }
313}
Michalis Spyrouba27e442019-05-28 10:04:57 +0100314} // namespace arm_compute