blob: c022fa05a0327bd4d78675bff80a4f8b779f9a72 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2017-2020 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyroub91e34c2017-12-20 15:50:55 +000024#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010025
26#include "arm_compute/core/AccessWindowStatic.h"
Anthony Barbiereaefd002018-07-20 17:49:35 +010027#include "arm_compute/core/CPP/Validate.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010028#include "arm_compute/core/Error.h"
29#include "arm_compute/core/Helpers.h"
30#include "arm_compute/core/ITensor.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031#include "arm_compute/core/Types.h"
32#include "arm_compute/core/Validate.h"
33#include "arm_compute/core/Window.h"
Michele Di Giorgio45361932019-12-19 13:53:44 +000034#include "arm_compute/core/utils/misc/Traits.h"
Georgios Pinitasddb93bb2020-10-02 16:38:59 +010035#include "src/core/NEON/NEAsymm.h"
36#include "src/core/NEON/NEFixedPoint.h"
37#include "src/core/NEON/wrapper/wrapper.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010038
39#include <arm_neon.h>
40#include <cstddef>
41#include <cstdint>
42
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +000043namespace arm_compute
44{
Anthony Barbier6ff3b192017-09-04 18:44:23 +010045namespace
46{
Michele Di Giorgioff271922019-07-17 15:59:32 +010047Status validate_arguments(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output,
Michele Di Giorgio45361932019-12-19 13:53:44 +000048 const DirectConvolutionLayerOutputStageKernelInfo &info)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000049{
Michele Di Giorgio45361932019-12-19 13:53:44 +000050 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
Anthony Barbiereaefd002018-07-20 17:49:35 +010051 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
Giorgio Arena1ed1fc62018-03-26 16:20:05 +010052 ARM_COMPUTE_RETURN_ERROR_ON(input->data_layout() == DataLayout::UNKNOWN);
Michele Di Giorgio45361932019-12-19 13:53:44 +000053 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::S32, DataType::F32);
Michalis Spyroub91e34c2017-12-20 15:50:55 +000054
55 if(bias != nullptr)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000056 {
Michele Di Giorgio45361932019-12-19 13:53:44 +000057 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias);
Giorgio Arena1ed1fc62018-03-26 16:20:05 +010058 ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(0) != input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)));
Michalis Spyroub91e34c2017-12-20 15:50:55 +000059 ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000060 }
Michalis Spyrouafa5d812017-11-30 14:25:57 +000061
Michele Di Giorgio45361932019-12-19 13:53:44 +000062 if(input->data_type() == DataType::S32)
63 {
64 ARM_COMPUTE_RETURN_ERROR_ON_MSG(output == nullptr, "In-place computation not allowed for quantized output");
65 }
66
Michalis Spyrouafa5d812017-11-30 14:25:57 +000067 // Checks performed when output is configured
68 if((output != nullptr) && (output->total_size() != 0))
69 {
Michele Di Giorgio45361932019-12-19 13:53:44 +000070 if(is_data_type_float(input->data_type()))
Michalis Spyroub91e34c2017-12-20 15:50:55 +000071 {
72 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
73 }
Michele Di Giorgio45361932019-12-19 13:53:44 +000074 else
75 {
76 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
77 }
78 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
79 }
80 else if(input->data_type() == DataType::S32)
81 {
82 // In case of quantized computation and unconfigured output, the output data type must be provided through DirectConvolutionLayerOutputStageKernelInfo
83 ARM_COMPUTE_RETURN_ERROR_ON((info.output_data_type != DataType::QASYMM8) && (info.output_data_type != DataType::QASYMM8_SIGNED));
Michalis Spyrouafa5d812017-11-30 14:25:57 +000084 }
85
Michalis Spyrouafa5d812017-11-30 14:25:57 +000086 return Status{};
87}
88
Michalis Spyrou14e868e2020-09-30 00:33:05 +010089template <typename T>
Michele Di Giorgio45361932019-12-19 13:53:44 +000090typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
91output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
Michalis Spyrou14e868e2020-09-30 00:33:05 +010092 int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010093{
Michele Di Giorgio45361932019-12-19 13:53:44 +000094 /** NEON vector tag type. */
95 using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
Pablo Tellof87cc7f2017-07-26 10:28:40 +010096
Giorgio Arena1ed1fc62018-03-26 16:20:05 +010097 ARM_COMPUTE_ERROR_ON(input->info()->data_layout() == DataLayout::UNKNOWN);
Georgios Pinitasf72f9362018-01-12 16:29:45 +000098 ARM_COMPUTE_UNUSED(result_fixedpoint_multiplier);
99 ARM_COMPUTE_UNUSED(result_shift);
100 ARM_COMPUTE_UNUSED(result_offset_after_shift);
101
Michalis Spyrou017ead22020-09-28 23:28:08 +0100102 const int window_start_x = window.x().start();
103 const int window_end_x = window.x().end();
104 const int window_step_x = 16 / input->info()->element_size();
105 Window win = window;
106 win.set(Window::DimX, Window::Dimension(0, 1, 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100107
Michalis Spyrou017ead22020-09-28 23:28:08 +0100108 Iterator in(input, win);
109 Iterator out(output, win);
110 execute_window_loop(win, [&](const Coordinates & id)
111 {
112 int x = window_start_x;
113 for(; x < (window_end_x - window_step_x); x += window_step_x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100114 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100115 // Get bias and pointer to input
116 const auto in_ptr = reinterpret_cast<const T *>(in.ptr()) + x;
117 auto v_in = wrapper::vloadq(in_ptr);
118
119 // Accumulate bias
120 if(has_bias)
121 {
122 const auto vb = wrapper::vdup_n(*reinterpret_cast<const T *>(bias->ptr_to_element(Coordinates(id.z()))), ExactTagType{});
123 v_in = wrapper::vadd(v_in, vb);
124 }
125
126 const auto out_ptr = reinterpret_cast<T *>(out.ptr()) + x;
127 wrapper::vstore(out_ptr, v_in);
Michele Di Giorgio45361932019-12-19 13:53:44 +0000128 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100129
Michalis Spyrou017ead22020-09-28 23:28:08 +0100130 // Left-overs loop
131 for(; x < window_end_x; ++x)
132 {
133 // Get bias and pointer to input
134 auto s_in = *(reinterpret_cast<const T *>(in.ptr()) + x);
135
136 // Accumulate bias
137 if(has_bias)
138 {
139 const auto b = *reinterpret_cast<const T *>(bias->ptr_to_element(Coordinates(id.z())));
140 s_in += b;
141 }
142
143 *(reinterpret_cast<T *>(out.ptr()) + x) = s_in;
144 }
145
Michele Di Giorgio45361932019-12-19 13:53:44 +0000146 },
147 in, out);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100148}
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000149
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100150template <typename T>
Michele Di Giorgio45361932019-12-19 13:53:44 +0000151typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
152output_stage_nhwc(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100153 int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100154{
155 ARM_COMPUTE_UNUSED(result_fixedpoint_multiplier);
156 ARM_COMPUTE_UNUSED(result_shift);
157 ARM_COMPUTE_UNUSED(result_offset_after_shift);
158
159 Window window_bias = window;
Michalis Spyrou017ead22020-09-28 23:28:08 +0100160 window_bias.set(Window::DimX, Window::Dimension(0, 1, 1));
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100161 window_bias.set(Window::DimY, Window::Dimension(0, 0, 0));
162 window_bias.set(Window::DimZ, Window::Dimension(0, 0, 0));
163 window_bias.set(3, Window::Dimension(0, 0, 0));
164
Michalis Spyrou017ead22020-09-28 23:28:08 +0100165 const int window_start_x = window.x().start();
166 const int window_end_x = window.x().end();
167 const int window_step_x = 16 / input->info()->element_size();
168 Window win = window;
169 win.set(Window::DimX, Window::Dimension(0, 1, 1));
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100170
Michalis Spyrou017ead22020-09-28 23:28:08 +0100171 Iterator in(input, win);
172 Iterator bi(bias, window_bias);
173 Iterator out(output, win);
174
175 execute_window_loop(win, [&](const Coordinates &)
176 {
177 int x = window_start_x;
178 for(; x < (window_end_x - window_step_x); x += window_step_x)
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100179 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100180 // Get bias and pointer to input
181 const auto in_ptr = reinterpret_cast<const T *>(in.ptr());
182 auto v_in = wrapper::vloadq(in_ptr + x);
183
184 // Accumulate bias
185 if(has_bias)
186 {
187 const auto bias_ptr = reinterpret_cast<T *>(bi.ptr()) + x;
188 v_in = wrapper::vadd(v_in, wrapper::vloadq(bias_ptr));
189 }
190
191 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
192 wrapper::vstore(out_ptr + x, v_in);
Michele Di Giorgio45361932019-12-19 13:53:44 +0000193 }
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100194
Michalis Spyrou017ead22020-09-28 23:28:08 +0100195 // Left-overs loop
196 for(; x < window_end_x; ++x)
197 {
198 // Get bias and pointer to input
199 auto s_in = *(reinterpret_cast<const T *>(in.ptr()) + x);
Michele Di Giorgio45361932019-12-19 13:53:44 +0000200
Michalis Spyrou017ead22020-09-28 23:28:08 +0100201 // Accumulate bias
202 if(has_bias)
203 {
204 const auto bias_ptr = reinterpret_cast<T *>(bi.ptr()) + x;
205 s_in += *bias_ptr;
206 }
207
208 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
209 *(out_ptr + x) = s_in;
210 }
Michele Di Giorgio45361932019-12-19 13:53:44 +0000211 },
212 in, bi, out);
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100213}
214
Michele Di Giorgio45361932019-12-19 13:53:44 +0000215// Quantized case
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100216template < typename TOut, typename std::enable_if < std::is_same<TOut, uint8_t>::value || std::is_same<TOut, int8_t>::value, int >::type = 0 >
Michele Di Giorgio45361932019-12-19 13:53:44 +0000217void output_stage_nchw(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100218 int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000219{
Michele Di Giorgio45361932019-12-19 13:53:44 +0000220 using VectorType = typename wrapper::traits::neon_bitvector_t<TOut, wrapper::traits::BitWidth::W128>;
221 using TagType = typename wrapper::traits::neon_bitvector_tag_t<TOut, wrapper::traits::BitWidth::W128>;
222
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000223 const int32x4_t result_offset_after_shift_s32 = vdupq_n_s32(result_offset_after_shift);
Michele Di Giorgio45361932019-12-19 13:53:44 +0000224
225 const VectorType min = wrapper::vdup_n(std::numeric_limits<TOut>::lowest(), TagType{});
226 const VectorType max = wrapper::vdup_n(std::numeric_limits<TOut>::max(), TagType{});
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000227
Michalis Spyrou017ead22020-09-28 23:28:08 +0100228 const int window_start_x = window.x().start();
229 const int window_end_x = window.x().end();
230 const int window_step_x = 16 / input->info()->element_size();
231 Window win = window;
232 win.set(Window::DimX, Window::Dimension(0, 1, 1));
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000233
Michalis Spyrou017ead22020-09-28 23:28:08 +0100234 Iterator in(input, win);
235 Iterator out(output, win);
236
237 execute_window_loop(win, [&](const Coordinates & id)
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000238 {
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000239
Michalis Spyrou017ead22020-09-28 23:28:08 +0100240 int x = window_start_x;
241 for(; x < (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000242 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100243 // Get bias and pointer to input
244 const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x;
245 int32x4x4_t v_in =
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000246 {
Michele Di Giorgio45361932019-12-19 13:53:44 +0000247 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100248 wrapper::vloadq(in_ptr),
249 wrapper::vloadq(in_ptr + 4),
250 wrapper::vloadq(in_ptr + 8),
251 wrapper::vloadq(in_ptr + 12)
Michele Di Giorgio45361932019-12-19 13:53:44 +0000252 }
253 };
Michalis Spyrou017ead22020-09-28 23:28:08 +0100254
255 // Accumulate bias
256 if(has_bias)
257 {
258 const auto vb = wrapper::vdup_n(*reinterpret_cast<const int32_t *>(bias->ptr_to_element(Coordinates(id.z()))), TagType{});
259 v_in =
260 {
261 {
262 wrapper::vadd(v_in.val[0], vb),
263 wrapper::vadd(v_in.val[1], vb),
264 wrapper::vadd(v_in.val[2], vb),
265 wrapper::vadd(v_in.val[3], vb)
266 }
267 };
268 }
269
270 const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
271 wrapper::vstore(out_ptr, finalize_quantization(v_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift_s32,
272 min, max, false));
Michele Di Giorgio45361932019-12-19 13:53:44 +0000273 }
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000274
Michalis Spyrou017ead22020-09-28 23:28:08 +0100275 // Left-overs loop
276 for(; x < window_end_x; ++x)
277 {
278 // Get bias and pointer to input
279 int32_t s_in = *(reinterpret_cast<const int32_t *>(in.ptr()) + x);
280
281 // Accumulate bias
282 if(has_bias)
283 {
284 const auto b = *reinterpret_cast<const int32_t *>(bias->ptr_to_element(Coordinates(id.z())));
285 s_in += b;
286 }
287
288 const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
289 *out_ptr = finalize_quantization(s_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift,
290 std::numeric_limits<TOut>::lowest(), std::numeric_limits<TOut>::max(), false);
291 }
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000292 },
293 in, out);
294}
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100295template < typename TOut, typename std::enable_if < std::is_same<TOut, uint8_t>::value || std::is_same<TOut, int8_t>::value, int >::type = 0 >
Michele Di Giorgio45361932019-12-19 13:53:44 +0000296void output_stage_nhwc(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100297 int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias)
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000298{
Michele Di Giorgio45361932019-12-19 13:53:44 +0000299 using VectorType = typename wrapper::traits::neon_bitvector_t<TOut, wrapper::traits::BitWidth::W128>;
300 using TagType = typename wrapper::traits::neon_bitvector_tag_t<TOut, wrapper::traits::BitWidth::W128>;
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000301
302 const int32x4_t result_offset_after_shift_s32 = vdupq_n_s32(result_offset_after_shift);
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000303
Michele Di Giorgio45361932019-12-19 13:53:44 +0000304 const VectorType min = wrapper::vdup_n(std::numeric_limits<TOut>::lowest(), TagType{});
305 const VectorType max = wrapper::vdup_n(std::numeric_limits<TOut>::max(), TagType{});
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100306
307 Window window_bias = window;
Michalis Spyrou017ead22020-09-28 23:28:08 +0100308 window_bias.set(Window::DimX, Window::Dimension(0, 1, 1));
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100309 window_bias.set(Window::DimY, Window::Dimension(0, 0, 0));
310 window_bias.set(Window::DimZ, Window::Dimension(0, 0, 0));
311 window_bias.set(3, Window::Dimension(0, 0, 0));
312
Michalis Spyrou017ead22020-09-28 23:28:08 +0100313 const int window_start_x = window.x().start();
314 const int window_end_x = window.x().end();
315 const int window_step_x = 16 / input->info()->element_size();
316 Window win = window;
317 win.set(Window::DimX, Window::Dimension(0, 1, 1));
318
319 Iterator in(input, win);
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100320 Iterator bi(bias, window_bias);
Michalis Spyrou017ead22020-09-28 23:28:08 +0100321 Iterator out(output, win);
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100322
Michalis Spyrou017ead22020-09-28 23:28:08 +0100323 execute_window_loop(win, [&](const Coordinates &)
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100324 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100325 int x = window_start_x;
326 for(; x < (window_end_x - window_step_x); x += window_step_x)
Michele Di Giorgio45361932019-12-19 13:53:44 +0000327 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100328 // Get bias and pointer to input
329 const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x;
330 int32x4x4_t v_in =
Michele Di Giorgio45361932019-12-19 13:53:44 +0000331 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100332 {
333 wrapper::vloadq(in_ptr),
334 wrapper::vloadq(in_ptr + 4),
335 wrapper::vloadq(in_ptr + 8),
336 wrapper::vloadq(in_ptr + 12),
337 }
338 };
339
340 // Accumulate bias
341 if(has_bias)
342 {
343 const auto bias_ptr = reinterpret_cast<int32_t *>(bi.ptr()) + x;
344
345 wrapper::vadd(v_in.val[0], wrapper::vloadq(bias_ptr));
346 wrapper::vadd(v_in.val[1], wrapper::vloadq(bias_ptr + 4));
347 wrapper::vadd(v_in.val[2], wrapper::vloadq(bias_ptr + 8));
348 wrapper::vadd(v_in.val[3], wrapper::vloadq(bias_ptr + 12));
Michele Di Giorgio45361932019-12-19 13:53:44 +0000349 }
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100350
Michalis Spyrou017ead22020-09-28 23:28:08 +0100351 const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
352 wrapper::vstore(out_ptr, finalize_quantization(v_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift_s32, min, max, false));
Michele Di Giorgio45361932019-12-19 13:53:44 +0000353 }
354
Michalis Spyrou017ead22020-09-28 23:28:08 +0100355 // Left-overs loop
356 for(; x < window_end_x; ++x)
357 {
358 // Get bias and pointer to input
359 const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x;
360 int32_t s_in = *in_ptr;
361
362 // Accumulate bias
363 if(has_bias)
364 {
365 const auto bias_ptr = reinterpret_cast<int32_t *>(bi.ptr()) + x;
366 s_in += *bias_ptr;
367 }
368
369 const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
370 *out_ptr = finalize_quantization(s_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift,
371 std::numeric_limits<TOut>::lowest(), std::numeric_limits<TOut>::max(), false);
372 }
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100373 },
374 in, bi, out);
375}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100376} // namespace
377
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000378NEDirectConvolutionLayerOutputStageKernel::NEDirectConvolutionLayerOutputStageKernel()
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000379 : _func(nullptr), _input(nullptr), _bias(nullptr), _output(nullptr), _result_fixedpoint_multiplier(0), _result_shift(0), _result_offset_after_shift(0)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100380{
381}
382
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000383void NEDirectConvolutionLayerOutputStageKernel::configure(ITensor *input, const ITensor *bias, ITensor *output,
Michele Di Giorgio45361932019-12-19 13:53:44 +0000384 const DirectConvolutionLayerOutputStageKernelInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100385{
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000386 // Perform validation step
Michele Di Giorgio45361932019-12-19 13:53:44 +0000387 ARM_COMPUTE_ERROR_ON_NULLPTR(input);
388 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias == nullptr) ? nullptr : bias->info(), (output == nullptr) ? nullptr : output->info(), info));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100389
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000390 _func = nullptr;
391 _bias = bias;
392 _input = input;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000393 _output = (output != nullptr) ? output : input;
394 _result_fixedpoint_multiplier = info.result_fixedpoint_multiplier;
395 _result_shift = info.result_shift;
396 _result_offset_after_shift = info.result_offset_after_shift;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100397
Michalis Spyrou017ead22020-09-28 23:28:08 +0100398 // Auto-initialize output output if required
399 if(output != nullptr && output->info() != nullptr)
400 {
401 // Work out expected output data type
402 const DataType output_dt = (input->info()->data_type() == DataType::S32) ? info.output_data_type : DataType::S32;
403 // Output tensor auto initialization if not yet initialized
404 auto_init_if_empty(*output->info(), input->info()->clone()->set_data_type(output_dt));
405 }
406
407 Window win = calculate_max_window(*input->info(), Steps());
408 Coordinates coord;
409 coord.set_num_dimensions(input->info()->num_dimensions());
410
411 if(output != nullptr && (output->info()->total_size() != 0))
412 {
413 output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
414 }
415 else
416 {
417 input->info()->set_valid_region(ValidRegion(coord, input->info()->tensor_shape()));
418 }
419
420 INEKernel::configure(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100421
Michele Di Giorgio45361932019-12-19 13:53:44 +0000422 const bool is_qasymm8_signed = (output != nullptr) ? is_data_type_quantized_asymmetric_signed(output->info()->data_type()) : false;
Gian Marco Iodice618493d2018-11-27 16:38:33 +0000423
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100424 // Set appropriate function
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100425 if(input->info()->data_layout() == DataLayout::NCHW)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100426 {
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100427 switch(input->info()->data_type())
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100428 {
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100429 case DataType::S32:
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000430 {
Michele Di Giorgio45361932019-12-19 13:53:44 +0000431 if(is_qasymm8_signed)
432 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100433 _func = &output_stage_nchw<int8_t>;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000434 }
435 else
436 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100437 _func = &output_stage_nchw<uint8_t>;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000438 }
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100439 break;
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100440 }
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000441#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100442 case DataType::F16:
443 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100444 _func = &output_stage_nchw<float16_t>;
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100445 break;
446 }
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000447#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100448 case DataType::F32:
449 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100450 _func = &output_stage_nchw<float>;
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100451 break;
452 }
453 default:
454 {
455 ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs.");
456 }
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100457 }
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100458 }
459 else
460 {
461 switch(input->info()->data_type())
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100462 {
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100463 case DataType::S32:
464 {
Michele Di Giorgio45361932019-12-19 13:53:44 +0000465 if(is_qasymm8_signed)
466 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100467 _func = &output_stage_nhwc<int8_t>;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000468 }
469 else
470 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100471 _func = &output_stage_nhwc<uint8_t>;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000472 }
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100473 break;
474 }
Georgios Pinitas20c246a2018-09-12 16:45:53 +0100475#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
476 case DataType::F16:
477 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100478 _func = &output_stage_nhwc<float16_t>;
Georgios Pinitas20c246a2018-09-12 16:45:53 +0100479 break;
480 }
481#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100482 case DataType::F32:
483 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100484 _func = &output_stage_nhwc<float>;
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100485 break;
486 }
487 default:
488 {
489 ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs.");
490 }
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100491 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100492 }
493}
494
Michele Di Giorgioff271922019-07-17 15:59:32 +0100495Status NEDirectConvolutionLayerOutputStageKernel::validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output,
Michele Di Giorgio45361932019-12-19 13:53:44 +0000496 const DirectConvolutionLayerOutputStageKernelInfo &info)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000497{
Michele Di Giorgio45361932019-12-19 13:53:44 +0000498 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, bias, output, info));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000499
500 return Status{};
501}
502
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000503void NEDirectConvolutionLayerOutputStageKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100504{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100505 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100506 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
507 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
508 ARM_COMPUTE_ERROR_ON(_func == nullptr);
509
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100510 const bool has_bias = _bias != nullptr;
511 (*_func)(_input, _bias, window, _output, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift, has_bias);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100512}
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +0000513} // namespace arm_compute