blob: 8dcbd00ddc827a5e780b98b8bd3a524a0fdf3a7a [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Sheri Zhangac6499a2021-02-10 15:32:38 +00002 * Copyright (c) 2017-2021 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouebcebf12020-10-21 00:04:14 +010024#include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010025
Anthony Barbier6ff3b192017-09-04 18:44:23 +010026#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010029#include "arm_compute/core/Types.h"
30#include "arm_compute/core/Validate.h"
31#include "arm_compute/core/Window.h"
Michele Di Giorgio45361932019-12-19 13:53:44 +000032#include "arm_compute/core/utils/misc/Traits.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010033#include "src/core/AccessWindowStatic.h"
34#include "src/core/CPP/Validate.h"
Georgios Pinitasddb93bb2020-10-02 16:38:59 +010035#include "src/core/NEON/NEAsymm.h"
36#include "src/core/NEON/NEFixedPoint.h"
37#include "src/core/NEON/wrapper/wrapper.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010038#include "src/core/helpers/AutoConfiguration.h"
39#include "src/core/helpers/WindowHelpers.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010040
41#include <arm_neon.h>
42#include <cstddef>
43#include <cstdint>
44
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +000045namespace arm_compute
46{
Anthony Barbier6ff3b192017-09-04 18:44:23 +010047namespace
48{
Michele Di Giorgioff271922019-07-17 15:59:32 +010049Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output,
Michele Di Giorgio45361932019-12-19 13:53:44 +000050 const DirectConvolutionLayerOutputStageKernelInfo &info)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000051{
Michele Di Giorgio45361932019-12-19 13:53:44 +000052 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
Anthony Barbiereaefd002018-07-20 17:49:35 +010053 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
Giorgio Arena1ed1fc62018-03-26 16:20:05 +010054 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
Michele Di Giorgio45361932019-12-19 13:53:44 +000055 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::S32, DataType::F32);
Michalis Spyroub91e34c2017-12-20 15:50:55 +000056
57 if(bias != nullptr)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000058 {
Michele Di Giorgio45361932019-12-19 13:53:44 +000059 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
Giorgio Arena1ed1fc62018-03-26 16:20:05 +010060 ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(0) != input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)));
Michalis Spyroub91e34c2017-12-20 15:50:55 +000061 ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000062 }
Michalis Spyrouafa5d812017-11-30 14:25:57 +000063
Michele Di Giorgio45361932019-12-19 13:53:44 +000064 if(input->data_type() == DataType::S32)
65 {
66 ARM_COMPUTE_RETURN_ERROR_ON_MSG(output == nullptr, "In-place computation not allowed for quantized output");
67 }
68
Michalis Spyrouafa5d812017-11-30 14:25:57 +000069 // Checks performed when output is configured
70 if((output != nullptr) && (output->total_size() != 0))
71 {
Michele Di Giorgio45361932019-12-19 13:53:44 +000072 if(is_data_type_float(input->data_type()))
Michalis Spyroub91e34c2017-12-20 15:50:55 +000073 {
74 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
75 }
Michele Di Giorgio45361932019-12-19 13:53:44 +000076 else
77 {
78 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
79 }
80 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
81 }
82 else if(input->data_type() == DataType::S32)
83 {
84 // In case of quantized computation and unconfigured output, the output data type must be provided through DirectConvolutionLayerOutputStageKernelInfo
85 ARM_COMPUTE_RETURN_ERROR_ON((info.output_data_type != DataType::QASYMM8) && (info.output_data_type != DataType::QASYMM8_SIGNED));
Michalis Spyrouafa5d812017-11-30 14:25:57 +000086 }
87
Michalis Spyrouafa5d812017-11-30 14:25:57 +000088 return Status{};
89}
90
Michalis Spyrou14e868e2020-09-30 00:33:05 +010091template <typename T>
Michele Di Giorgio45361932019-12-19 13:53:44 +000092typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
93output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
Michalis Spyrou14e868e2020-09-30 00:33:05 +010094 int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010095{
Sheri Zhangac6499a2021-02-10 15:32:38 +000096 /** Neon vector tag type. */
Michele Di Giorgio45361932019-12-19 13:53:44 +000097 using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
Pablo Tellof87cc7f2017-07-26 10:28:40 +010098
Giorgio Arena1ed1fc62018-03-26 16:20:05 +010099 ARM_COMPUTE_ERROR_ON(input->info()->data_layout() == DataLayout::UNKNOWN);
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000100 ARM_COMPUTE_UNUSED(result_fixedpoint_multiplier);
101 ARM_COMPUTE_UNUSED(result_shift);
102 ARM_COMPUTE_UNUSED(result_offset_after_shift);
103
Michalis Spyrou017ead22020-09-28 23:28:08 +0100104 const int window_start_x = window.x().start();
105 const int window_end_x = window.x().end();
106 const int window_step_x = 16 / input->info()->element_size();
107 Window win = window;
108 win.set(Window::DimX, Window::Dimension(0, 1, 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100109
Michalis Spyrou017ead22020-09-28 23:28:08 +0100110 Iterator in(input, win);
111 Iterator out(output, win);
112 execute_window_loop(win, [&](const Coordinates & id)
113 {
114 int x = window_start_x;
Michalis Spyrouc2268532020-10-09 11:52:10 +0100115 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100116 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100117 // Get bias and pointer to input
118 const auto in_ptr = reinterpret_cast<const T *>(in.ptr()) + x;
119 auto v_in = wrapper::vloadq(in_ptr);
120
121 // Accumulate bias
122 if(has_bias)
123 {
124 const auto vb = wrapper::vdup_n(*reinterpret_cast<const T *>(bias->ptr_to_element(Coordinates(id.z()))), ExactTagType{});
125 v_in = wrapper::vadd(v_in, vb);
126 }
127
128 const auto out_ptr = reinterpret_cast<T *>(out.ptr()) + x;
129 wrapper::vstore(out_ptr, v_in);
Michele Di Giorgio45361932019-12-19 13:53:44 +0000130 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100131
Michalis Spyrou017ead22020-09-28 23:28:08 +0100132 // Left-overs loop
133 for(; x < window_end_x; ++x)
134 {
135 // Get bias and pointer to input
136 auto s_in = *(reinterpret_cast<const T *>(in.ptr()) + x);
137
138 // Accumulate bias
139 if(has_bias)
140 {
141 const auto b = *reinterpret_cast<const T *>(bias->ptr_to_element(Coordinates(id.z())));
142 s_in += b;
143 }
144
145 *(reinterpret_cast<T *>(out.ptr()) + x) = s_in;
146 }
147
Michele Di Giorgio45361932019-12-19 13:53:44 +0000148 },
149 in, out);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100150}
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000151
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100152template <typename T>
Michele Di Giorgio45361932019-12-19 13:53:44 +0000153typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
154output_stage_nhwc(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100155 int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100156{
157 ARM_COMPUTE_UNUSED(result_fixedpoint_multiplier);
158 ARM_COMPUTE_UNUSED(result_shift);
159 ARM_COMPUTE_UNUSED(result_offset_after_shift);
160
161 Window window_bias = window;
Michalis Spyrou017ead22020-09-28 23:28:08 +0100162 window_bias.set(Window::DimX, Window::Dimension(0, 1, 1));
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100163 window_bias.set(Window::DimY, Window::Dimension(0, 0, 0));
164 window_bias.set(Window::DimZ, Window::Dimension(0, 0, 0));
165 window_bias.set(3, Window::Dimension(0, 0, 0));
166
Michalis Spyrou017ead22020-09-28 23:28:08 +0100167 const int window_start_x = window.x().start();
168 const int window_end_x = window.x().end();
169 const int window_step_x = 16 / input->info()->element_size();
170 Window win = window;
171 win.set(Window::DimX, Window::Dimension(0, 1, 1));
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100172
Michalis Spyrou017ead22020-09-28 23:28:08 +0100173 Iterator in(input, win);
174 Iterator bi(bias, window_bias);
175 Iterator out(output, win);
176
177 execute_window_loop(win, [&](const Coordinates &)
178 {
179 int x = window_start_x;
Michalis Spyrouc2268532020-10-09 11:52:10 +0100180 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100181 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100182 // Get bias and pointer to input
183 const auto in_ptr = reinterpret_cast<const T *>(in.ptr());
184 auto v_in = wrapper::vloadq(in_ptr + x);
185
186 // Accumulate bias
187 if(has_bias)
188 {
189 const auto bias_ptr = reinterpret_cast<T *>(bi.ptr()) + x;
190 v_in = wrapper::vadd(v_in, wrapper::vloadq(bias_ptr));
191 }
192
193 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
194 wrapper::vstore(out_ptr + x, v_in);
Michele Di Giorgio45361932019-12-19 13:53:44 +0000195 }
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100196
Michalis Spyrou017ead22020-09-28 23:28:08 +0100197 // Left-overs loop
198 for(; x < window_end_x; ++x)
199 {
200 // Get bias and pointer to input
201 auto s_in = *(reinterpret_cast<const T *>(in.ptr()) + x);
Michele Di Giorgio45361932019-12-19 13:53:44 +0000202
Michalis Spyrou017ead22020-09-28 23:28:08 +0100203 // Accumulate bias
204 if(has_bias)
205 {
206 const auto bias_ptr = reinterpret_cast<T *>(bi.ptr()) + x;
207 s_in += *bias_ptr;
208 }
209
210 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
211 *(out_ptr + x) = s_in;
212 }
Michele Di Giorgio45361932019-12-19 13:53:44 +0000213 },
214 in, bi, out);
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100215}
216
Michele Di Giorgio45361932019-12-19 13:53:44 +0000217// Quantized case
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100218template < typename TOut, typename std::enable_if < std::is_same<TOut, uint8_t>::value || std::is_same<TOut, int8_t>::value, int >::type = 0 >
Michele Di Giorgio45361932019-12-19 13:53:44 +0000219void output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100220 int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000221{
Michele Di Giorgio45361932019-12-19 13:53:44 +0000222 using VectorType = typename wrapper::traits::neon_bitvector_t<TOut, wrapper::traits::BitWidth::W128>;
223 using TagType = typename wrapper::traits::neon_bitvector_tag_t<TOut, wrapper::traits::BitWidth::W128>;
224
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000225 const int32x4_t result_offset_after_shift_s32 = vdupq_n_s32(result_offset_after_shift);
Michele Di Giorgio45361932019-12-19 13:53:44 +0000226
227 const VectorType min = wrapper::vdup_n(std::numeric_limits<TOut>::lowest(), TagType{});
228 const VectorType max = wrapper::vdup_n(std::numeric_limits<TOut>::max(), TagType{});
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000229
Michalis Spyrou017ead22020-09-28 23:28:08 +0100230 const int window_start_x = window.x().start();
231 const int window_end_x = window.x().end();
232 const int window_step_x = 16 / input->info()->element_size();
233 Window win = window;
234 win.set(Window::DimX, Window::Dimension(0, 1, 1));
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000235
Michalis Spyrou017ead22020-09-28 23:28:08 +0100236 Iterator in(input, win);
237 Iterator out(output, win);
238
239 execute_window_loop(win, [&](const Coordinates & id)
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000240 {
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000241
Michalis Spyrou017ead22020-09-28 23:28:08 +0100242 int x = window_start_x;
Michalis Spyrouc2268532020-10-09 11:52:10 +0100243 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000244 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100245 // Get bias and pointer to input
246 const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x;
247 int32x4x4_t v_in =
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000248 {
Michele Di Giorgio45361932019-12-19 13:53:44 +0000249 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100250 wrapper::vloadq(in_ptr),
251 wrapper::vloadq(in_ptr + 4),
252 wrapper::vloadq(in_ptr + 8),
253 wrapper::vloadq(in_ptr + 12)
Michele Di Giorgio45361932019-12-19 13:53:44 +0000254 }
255 };
Michalis Spyrou017ead22020-09-28 23:28:08 +0100256
257 // Accumulate bias
258 if(has_bias)
259 {
260 const auto vb = wrapper::vdup_n(*reinterpret_cast<const int32_t *>(bias->ptr_to_element(Coordinates(id.z()))), TagType{});
261 v_in =
262 {
263 {
264 wrapper::vadd(v_in.val[0], vb),
265 wrapper::vadd(v_in.val[1], vb),
266 wrapper::vadd(v_in.val[2], vb),
267 wrapper::vadd(v_in.val[3], vb)
268 }
269 };
270 }
271
272 const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
273 wrapper::vstore(out_ptr, finalize_quantization(v_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift_s32,
274 min, max, false));
Michele Di Giorgio45361932019-12-19 13:53:44 +0000275 }
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000276
Michalis Spyrou017ead22020-09-28 23:28:08 +0100277 // Left-overs loop
278 for(; x < window_end_x; ++x)
279 {
280 // Get bias and pointer to input
281 int32_t s_in = *(reinterpret_cast<const int32_t *>(in.ptr()) + x);
282
283 // Accumulate bias
284 if(has_bias)
285 {
286 const auto b = *reinterpret_cast<const int32_t *>(bias->ptr_to_element(Coordinates(id.z())));
287 s_in += b;
288 }
289
290 const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
291 *out_ptr = finalize_quantization(s_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift,
292 std::numeric_limits<TOut>::lowest(), std::numeric_limits<TOut>::max(), false);
293 }
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000294 },
295 in, out);
296}
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100297template < typename TOut, typename std::enable_if < std::is_same<TOut, uint8_t>::value || std::is_same<TOut, int8_t>::value, int >::type = 0 >
Michele Di Giorgio45361932019-12-19 13:53:44 +0000298void output_stage_nhwc(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100299 int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000300{
Michele Di Giorgio45361932019-12-19 13:53:44 +0000301 using VectorType = typename wrapper::traits::neon_bitvector_t<TOut, wrapper::traits::BitWidth::W128>;
302 using TagType = typename wrapper::traits::neon_bitvector_tag_t<TOut, wrapper::traits::BitWidth::W128>;
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000303
304 const int32x4_t result_offset_after_shift_s32 = vdupq_n_s32(result_offset_after_shift);
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000305
Michele Di Giorgio45361932019-12-19 13:53:44 +0000306 const VectorType min = wrapper::vdup_n(std::numeric_limits<TOut>::lowest(), TagType{});
307 const VectorType max = wrapper::vdup_n(std::numeric_limits<TOut>::max(), TagType{});
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100308
309 Window window_bias = window;
Michalis Spyrou017ead22020-09-28 23:28:08 +0100310 window_bias.set(Window::DimX, Window::Dimension(0, 1, 1));
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100311 window_bias.set(Window::DimY, Window::Dimension(0, 0, 0));
312 window_bias.set(Window::DimZ, Window::Dimension(0, 0, 0));
313 window_bias.set(3, Window::Dimension(0, 0, 0));
314
Michalis Spyrou017ead22020-09-28 23:28:08 +0100315 const int window_start_x = window.x().start();
316 const int window_end_x = window.x().end();
317 const int window_step_x = 16 / input->info()->element_size();
318 Window win = window;
319 win.set(Window::DimX, Window::Dimension(0, 1, 1));
320
321 Iterator in(input, win);
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100322 Iterator bi(bias, window_bias);
Michalis Spyrou017ead22020-09-28 23:28:08 +0100323 Iterator out(output, win);
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100324
Michalis Spyrou017ead22020-09-28 23:28:08 +0100325 execute_window_loop(win, [&](const Coordinates &)
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100326 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100327 int x = window_start_x;
Michalis Spyrouc2268532020-10-09 11:52:10 +0100328 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Michele Di Giorgio45361932019-12-19 13:53:44 +0000329 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100330 // Get bias and pointer to input
331 const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x;
332 int32x4x4_t v_in =
Michele Di Giorgio45361932019-12-19 13:53:44 +0000333 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100334 {
335 wrapper::vloadq(in_ptr),
336 wrapper::vloadq(in_ptr + 4),
337 wrapper::vloadq(in_ptr + 8),
338 wrapper::vloadq(in_ptr + 12),
339 }
340 };
341
342 // Accumulate bias
343 if(has_bias)
344 {
345 const auto bias_ptr = reinterpret_cast<int32_t *>(bi.ptr()) + x;
346
347 wrapper::vadd(v_in.val[0], wrapper::vloadq(bias_ptr));
348 wrapper::vadd(v_in.val[1], wrapper::vloadq(bias_ptr + 4));
349 wrapper::vadd(v_in.val[2], wrapper::vloadq(bias_ptr + 8));
350 wrapper::vadd(v_in.val[3], wrapper::vloadq(bias_ptr + 12));
Michele Di Giorgio45361932019-12-19 13:53:44 +0000351 }
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100352
Michalis Spyrou017ead22020-09-28 23:28:08 +0100353 const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
354 wrapper::vstore(out_ptr, finalize_quantization(v_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift_s32, min, max, false));
Michele Di Giorgio45361932019-12-19 13:53:44 +0000355 }
356
Michalis Spyrou017ead22020-09-28 23:28:08 +0100357 // Left-overs loop
358 for(; x < window_end_x; ++x)
359 {
360 // Get bias and pointer to input
361 const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x;
362 int32_t s_in = *in_ptr;
363
364 // Accumulate bias
365 if(has_bias)
366 {
367 const auto bias_ptr = reinterpret_cast<int32_t *>(bi.ptr()) + x;
368 s_in += *bias_ptr;
369 }
370
371 const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
372 *out_ptr = finalize_quantization(s_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift,
373 std::numeric_limits<TOut>::lowest(), std::numeric_limits<TOut>::max(), false);
374 }
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100375 },
376 in, bi, out);
377}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100378} // namespace
379
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000380NEDirectConvolutionLayerOutputStageKernel::NEDirectConvolutionLayerOutputStageKernel()
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000381 : _func(nullptr), _input(nullptr), _bias(nullptr), _output(nullptr), _result_fixedpoint_multiplier(0), _result_shift(0), _result_offset_after_shift(0)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100382{
383}
384
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000385void NEDirectConvolutionLayerOutputStageKernel::configure(ITensor *input, const ITensor *bias, ITensor *output,
Michele Di Giorgio45361932019-12-19 13:53:44 +0000386 const DirectConvolutionLayerOutputStageKernelInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100387{
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000388 // Perform validation step
Michele Di Giorgio45361932019-12-19 13:53:44 +0000389 ARM_COMPUTE_ERROR_ON_NULLPTR(input);
390 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias == nullptr) ? nullptr : bias->info(), (output == nullptr) ? nullptr : output->info(), info));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100391
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000392 _func = nullptr;
393 _bias = bias;
394 _input = input;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000395 _output = (output != nullptr) ? output : input;
396 _result_fixedpoint_multiplier = info.result_fixedpoint_multiplier;
397 _result_shift = info.result_shift;
398 _result_offset_after_shift = info.result_offset_after_shift;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100399
Michalis Spyrou017ead22020-09-28 23:28:08 +0100400 // Auto-initialize output output if required
401 if(output != nullptr && output->info() != nullptr)
402 {
403 // Work out expected output data type
404 const DataType output_dt = (input->info()->data_type() == DataType::S32) ? info.output_data_type : DataType::S32;
405 // Output tensor auto initialization if not yet initialized
406 auto_init_if_empty(*output->info(), input->info()->clone()->set_data_type(output_dt));
407 }
408
SiCongLib88272e2021-02-24 15:40:57 +0000409 Window win = calculate_max_window(*input->info(), Steps());
Michalis Spyrou017ead22020-09-28 23:28:08 +0100410
411 INEKernel::configure(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100412
Michele Di Giorgio45361932019-12-19 13:53:44 +0000413 const bool is_qasymm8_signed = (output != nullptr) ? is_data_type_quantized_asymmetric_signed(output->info()->data_type()) : false;
Gian Marco Iodice618493d2018-11-27 16:38:33 +0000414
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100415 // Set appropriate function
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100416 if(input->info()->data_layout() == DataLayout::NCHW)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100417 {
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100418 switch(input->info()->data_type())
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100419 {
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100420 case DataType::S32:
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000421 {
Michele Di Giorgio45361932019-12-19 13:53:44 +0000422 if(is_qasymm8_signed)
423 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100424 _func = &output_stage_nchw<int8_t>;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000425 }
426 else
427 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100428 _func = &output_stage_nchw<uint8_t>;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000429 }
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100430 break;
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100431 }
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000432#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100433 case DataType::F16:
434 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100435 _func = &output_stage_nchw<float16_t>;
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100436 break;
437 }
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000438#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100439 case DataType::F32:
440 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100441 _func = &output_stage_nchw<float>;
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100442 break;
443 }
444 default:
445 {
446 ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs.");
447 }
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100448 }
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100449 }
450 else
451 {
452 switch(input->info()->data_type())
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100453 {
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100454 case DataType::S32:
455 {
Michele Di Giorgio45361932019-12-19 13:53:44 +0000456 if(is_qasymm8_signed)
457 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100458 _func = &output_stage_nhwc<int8_t>;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000459 }
460 else
461 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100462 _func = &output_stage_nhwc<uint8_t>;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000463 }
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100464 break;
465 }
Georgios Pinitas20c246a2018-09-12 16:45:53 +0100466#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
467 case DataType::F16:
468 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100469 _func = &output_stage_nhwc<float16_t>;
Georgios Pinitas20c246a2018-09-12 16:45:53 +0100470 break;
471 }
472#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100473 case DataType::F32:
474 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100475 _func = &output_stage_nhwc<float>;
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100476 break;
477 }
478 default:
479 {
480 ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs.");
481 }
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100482 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100483 }
484}
485
Michele Di Giorgioff271922019-07-17 15:59:32 +0100486Status NEDirectConvolutionLayerOutputStageKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output,
Michele Di Giorgio45361932019-12-19 13:53:44 +0000487 const DirectConvolutionLayerOutputStageKernelInfo &info)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000488{
Michele Di Giorgio45361932019-12-19 13:53:44 +0000489 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, bias, output, info));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000490
491 return Status{};
492}
493
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000494void NEDirectConvolutionLayerOutputStageKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100495{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100496 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100497 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
498 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
499 ARM_COMPUTE_ERROR_ON(_func == nullptr);
500
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100501 const bool has_bias = _bias != nullptr;
502 (*_func)(_input, _bias, window, _output, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift, has_bias);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100503}
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +0000504} // namespace arm_compute