blob: 93ad5e5ebacd3bb64de6ad0ea31bad5d61c67f31 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Sheri Zhangac6499a2021-02-10 15:32:38 +00002 * Copyright (c) 2017-2021 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas7891a732021-08-20 21:39:25 +010024#include "src/cpu/kernels/CpuDirectConv2dOutputStageKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010025
Anthony Barbier6ff3b192017-09-04 18:44:23 +010026#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010029#include "arm_compute/core/Types.h"
30#include "arm_compute/core/Validate.h"
31#include "arm_compute/core/Window.h"
Michele Di Giorgio45361932019-12-19 13:53:44 +000032#include "arm_compute/core/utils/misc/Traits.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010033#include "src/core/CPP/Validate.h"
Georgios Pinitasddb93bb2020-10-02 16:38:59 +010034#include "src/core/NEON/NEAsymm.h"
35#include "src/core/NEON/NEFixedPoint.h"
36#include "src/core/NEON/wrapper/wrapper.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010037#include "src/core/helpers/AutoConfiguration.h"
38#include "src/core/helpers/WindowHelpers.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010039
40#include <arm_neon.h>
41#include <cstddef>
42#include <cstdint>
43
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +000044namespace arm_compute
45{
Manuel Bottini327225d2021-04-13 13:09:30 +010046namespace cpu
47{
48namespace kernels
49{
Anthony Barbier6ff3b192017-09-04 18:44:23 +010050namespace
51{
Manuel Bottini327225d2021-04-13 13:09:30 +010052Status validate_arguments(const ITensorInfo *src, const ITensorInfo *bias, const ITensorInfo *dst,
Michele Di Giorgio45361932019-12-19 13:53:44 +000053 const DirectConvolutionLayerOutputStageKernelInfo &info)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000054{
Manuel Bottini327225d2021-04-13 13:09:30 +010055 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
56 ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src);
57 ARM_COMPUTE_RETURN_ERROR_ON(src->data_layout() == DataLayout::UNKNOWN);
58 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::S32, DataType::F32);
Michalis Spyroub91e34c2017-12-20 15:50:55 +000059
60 if(bias != nullptr)
Michalis Spyrouafa5d812017-11-30 14:25:57 +000061 {
Manuel Bottini327225d2021-04-13 13:09:30 +010062 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, bias);
63 ARM_COMPUTE_RETURN_ERROR_ON(bias->dimension(0) != src->dimension(get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::CHANNEL)));
Michalis Spyroub91e34c2017-12-20 15:50:55 +000064 ARM_COMPUTE_RETURN_ERROR_ON(bias->num_dimensions() > 1);
Michalis Spyrouafa5d812017-11-30 14:25:57 +000065 }
Michalis Spyrouafa5d812017-11-30 14:25:57 +000066
Manuel Bottini327225d2021-04-13 13:09:30 +010067 if(src->data_type() == DataType::S32)
Michele Di Giorgio45361932019-12-19 13:53:44 +000068 {
Manuel Bottini327225d2021-04-13 13:09:30 +010069 ARM_COMPUTE_RETURN_ERROR_ON_MSG(dst == nullptr, "In-place computation not allowed for quantized output");
Michele Di Giorgio45361932019-12-19 13:53:44 +000070 }
71
Michalis Spyrouafa5d812017-11-30 14:25:57 +000072 // Checks performed when output is configured
Manuel Bottini327225d2021-04-13 13:09:30 +010073 if((dst != nullptr) && (dst->total_size() != 0))
Michalis Spyrouafa5d812017-11-30 14:25:57 +000074 {
Manuel Bottini327225d2021-04-13 13:09:30 +010075 if(is_data_type_float(src->data_type()))
Michalis Spyroub91e34c2017-12-20 15:50:55 +000076 {
Manuel Bottini327225d2021-04-13 13:09:30 +010077 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
Michalis Spyroub91e34c2017-12-20 15:50:55 +000078 }
Michele Di Giorgio45361932019-12-19 13:53:44 +000079 else
80 {
Manuel Bottini327225d2021-04-13 13:09:30 +010081 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
Michele Di Giorgio45361932019-12-19 13:53:44 +000082 }
Manuel Bottini327225d2021-04-13 13:09:30 +010083 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
Michele Di Giorgio45361932019-12-19 13:53:44 +000084 }
Manuel Bottini327225d2021-04-13 13:09:30 +010085 else if(src->data_type() == DataType::S32)
Michele Di Giorgio45361932019-12-19 13:53:44 +000086 {
87 // In case of quantized computation and unconfigured output, the output data type must be provided through DirectConvolutionLayerOutputStageKernelInfo
88 ARM_COMPUTE_RETURN_ERROR_ON((info.output_data_type != DataType::QASYMM8) && (info.output_data_type != DataType::QASYMM8_SIGNED));
Michalis Spyrouafa5d812017-11-30 14:25:57 +000089 }
90
Michalis Spyrouafa5d812017-11-30 14:25:57 +000091 return Status{};
92}
93
Michalis Spyrou14e868e2020-09-30 00:33:05 +010094template <typename T>
Michele Di Giorgio45361932019-12-19 13:53:44 +000095typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
Manuel Bottini327225d2021-04-13 13:09:30 +010096output_stage_nchw(ITensor *src, const ITensor *bias, const Window &window, ITensor *dst,
97 int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010098{
Manuel Bottini327225d2021-04-13 13:09:30 +010099 const bool has_bias = bias != nullptr;
Michele Di Giorgio33f41fa2021-03-09 14:09:08 +0000100 /** SIMD vector tag type. */
Michele Di Giorgio45361932019-12-19 13:53:44 +0000101 using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>;
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100102
Manuel Bottini327225d2021-04-13 13:09:30 +0100103 ARM_COMPUTE_ERROR_ON(src->info()->data_layout() == DataLayout::UNKNOWN);
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000104 ARM_COMPUTE_UNUSED(result_fixedpoint_multiplier);
105 ARM_COMPUTE_UNUSED(result_shift);
106 ARM_COMPUTE_UNUSED(result_offset_after_shift);
107
Michalis Spyrou017ead22020-09-28 23:28:08 +0100108 const int window_start_x = window.x().start();
109 const int window_end_x = window.x().end();
Manuel Bottini327225d2021-04-13 13:09:30 +0100110 const int window_step_x = 16 / src->info()->element_size();
Michalis Spyrou017ead22020-09-28 23:28:08 +0100111 Window win = window;
112 win.set(Window::DimX, Window::Dimension(0, 1, 1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100113
Manuel Bottini327225d2021-04-13 13:09:30 +0100114 Iterator in(src, win);
115 Iterator out(dst, win);
Michalis Spyrou017ead22020-09-28 23:28:08 +0100116 execute_window_loop(win, [&](const Coordinates & id)
117 {
118 int x = window_start_x;
Michalis Spyrouc2268532020-10-09 11:52:10 +0100119 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100120 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100121 // Get bias and pointer to input
122 const auto in_ptr = reinterpret_cast<const T *>(in.ptr()) + x;
123 auto v_in = wrapper::vloadq(in_ptr);
124
125 // Accumulate bias
126 if(has_bias)
127 {
128 const auto vb = wrapper::vdup_n(*reinterpret_cast<const T *>(bias->ptr_to_element(Coordinates(id.z()))), ExactTagType{});
129 v_in = wrapper::vadd(v_in, vb);
130 }
131
132 const auto out_ptr = reinterpret_cast<T *>(out.ptr()) + x;
133 wrapper::vstore(out_ptr, v_in);
Michele Di Giorgio45361932019-12-19 13:53:44 +0000134 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100135
Michalis Spyrou017ead22020-09-28 23:28:08 +0100136 // Left-overs loop
137 for(; x < window_end_x; ++x)
138 {
139 // Get bias and pointer to input
140 auto s_in = *(reinterpret_cast<const T *>(in.ptr()) + x);
141
142 // Accumulate bias
143 if(has_bias)
144 {
145 const auto b = *reinterpret_cast<const T *>(bias->ptr_to_element(Coordinates(id.z())));
146 s_in += b;
147 }
148
149 *(reinterpret_cast<T *>(out.ptr()) + x) = s_in;
150 }
151
Michele Di Giorgio45361932019-12-19 13:53:44 +0000152 },
153 in, out);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100154}
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000155
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100156template <typename T>
Michele Di Giorgio45361932019-12-19 13:53:44 +0000157typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, void>::type
Manuel Bottini327225d2021-04-13 13:09:30 +0100158output_stage_nhwc(ITensor *src, const ITensor *bias, const Window &window, ITensor *dst,
159 int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift)
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100160{
Manuel Bottini327225d2021-04-13 13:09:30 +0100161 const bool has_bias = bias != nullptr;
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100162 ARM_COMPUTE_UNUSED(result_fixedpoint_multiplier);
163 ARM_COMPUTE_UNUSED(result_shift);
164 ARM_COMPUTE_UNUSED(result_offset_after_shift);
165
166 Window window_bias = window;
Michalis Spyrou017ead22020-09-28 23:28:08 +0100167 window_bias.set(Window::DimX, Window::Dimension(0, 1, 1));
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100168 window_bias.set(Window::DimY, Window::Dimension(0, 0, 0));
169 window_bias.set(Window::DimZ, Window::Dimension(0, 0, 0));
170 window_bias.set(3, Window::Dimension(0, 0, 0));
171
Michalis Spyrou017ead22020-09-28 23:28:08 +0100172 const int window_start_x = window.x().start();
173 const int window_end_x = window.x().end();
Manuel Bottini327225d2021-04-13 13:09:30 +0100174 const int window_step_x = 16 / src->info()->element_size();
Michalis Spyrou017ead22020-09-28 23:28:08 +0100175 Window win = window;
176 win.set(Window::DimX, Window::Dimension(0, 1, 1));
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100177
Manuel Bottini327225d2021-04-13 13:09:30 +0100178 Iterator in(src, win);
Michalis Spyrou017ead22020-09-28 23:28:08 +0100179 Iterator bi(bias, window_bias);
Manuel Bottini327225d2021-04-13 13:09:30 +0100180 Iterator out(dst, win);
Michalis Spyrou017ead22020-09-28 23:28:08 +0100181
182 execute_window_loop(win, [&](const Coordinates &)
183 {
184 int x = window_start_x;
Michalis Spyrouc2268532020-10-09 11:52:10 +0100185 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100186 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100187 // Get bias and pointer to input
188 const auto in_ptr = reinterpret_cast<const T *>(in.ptr());
189 auto v_in = wrapper::vloadq(in_ptr + x);
190
191 // Accumulate bias
192 if(has_bias)
193 {
194 const auto bias_ptr = reinterpret_cast<T *>(bi.ptr()) + x;
195 v_in = wrapper::vadd(v_in, wrapper::vloadq(bias_ptr));
196 }
197
198 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
199 wrapper::vstore(out_ptr + x, v_in);
Michele Di Giorgio45361932019-12-19 13:53:44 +0000200 }
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100201
Michalis Spyrou017ead22020-09-28 23:28:08 +0100202 // Left-overs loop
203 for(; x < window_end_x; ++x)
204 {
205 // Get bias and pointer to input
206 auto s_in = *(reinterpret_cast<const T *>(in.ptr()) + x);
Michele Di Giorgio45361932019-12-19 13:53:44 +0000207
Michalis Spyrou017ead22020-09-28 23:28:08 +0100208 // Accumulate bias
209 if(has_bias)
210 {
211 const auto bias_ptr = reinterpret_cast<T *>(bi.ptr()) + x;
212 s_in += *bias_ptr;
213 }
214
215 const auto out_ptr = reinterpret_cast<T *>(out.ptr());
216 *(out_ptr + x) = s_in;
217 }
Michele Di Giorgio45361932019-12-19 13:53:44 +0000218 },
219 in, bi, out);
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100220}
221
Michele Di Giorgio45361932019-12-19 13:53:44 +0000222// Quantized case
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100223template < typename TOut, typename std::enable_if < std::is_same<TOut, uint8_t>::value || std::is_same<TOut, int8_t>::value, int >::type = 0 >
Manuel Bottini327225d2021-04-13 13:09:30 +0100224void output_stage_nchw(ITensor *src, const ITensor *bias, const Window &window, ITensor *dst,
225 int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift)
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000226{
Manuel Bottini327225d2021-04-13 13:09:30 +0100227 const bool has_bias = bias != nullptr;
228 using VectorType = typename wrapper::traits::neon_bitvector_t<TOut, wrapper::traits::BitWidth::W128>;
229 using TagType = typename wrapper::traits::neon_bitvector_tag_t<TOut, wrapper::traits::BitWidth::W128>;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000230
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000231 const int32x4_t result_offset_after_shift_s32 = vdupq_n_s32(result_offset_after_shift);
Michele Di Giorgio45361932019-12-19 13:53:44 +0000232
233 const VectorType min = wrapper::vdup_n(std::numeric_limits<TOut>::lowest(), TagType{});
234 const VectorType max = wrapper::vdup_n(std::numeric_limits<TOut>::max(), TagType{});
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000235
Michalis Spyrou017ead22020-09-28 23:28:08 +0100236 const int window_start_x = window.x().start();
237 const int window_end_x = window.x().end();
Manuel Bottini327225d2021-04-13 13:09:30 +0100238 const int window_step_x = 16 / src->info()->element_size();
Michalis Spyrou017ead22020-09-28 23:28:08 +0100239 Window win = window;
240 win.set(Window::DimX, Window::Dimension(0, 1, 1));
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000241
Manuel Bottini327225d2021-04-13 13:09:30 +0100242 Iterator in(src, win);
243 Iterator out(dst, win);
Michalis Spyrou017ead22020-09-28 23:28:08 +0100244
245 execute_window_loop(win, [&](const Coordinates & id)
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000246 {
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000247
Michalis Spyrou017ead22020-09-28 23:28:08 +0100248 int x = window_start_x;
Michalis Spyrouc2268532020-10-09 11:52:10 +0100249 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000250 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100251 // Get bias and pointer to input
252 const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x;
253 int32x4x4_t v_in =
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000254 {
Michele Di Giorgio45361932019-12-19 13:53:44 +0000255 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100256 wrapper::vloadq(in_ptr),
257 wrapper::vloadq(in_ptr + 4),
258 wrapper::vloadq(in_ptr + 8),
259 wrapper::vloadq(in_ptr + 12)
Michele Di Giorgio45361932019-12-19 13:53:44 +0000260 }
261 };
Michalis Spyrou017ead22020-09-28 23:28:08 +0100262
263 // Accumulate bias
264 if(has_bias)
265 {
266 const auto vb = wrapper::vdup_n(*reinterpret_cast<const int32_t *>(bias->ptr_to_element(Coordinates(id.z()))), TagType{});
267 v_in =
268 {
269 {
270 wrapper::vadd(v_in.val[0], vb),
271 wrapper::vadd(v_in.val[1], vb),
272 wrapper::vadd(v_in.val[2], vb),
273 wrapper::vadd(v_in.val[3], vb)
274 }
275 };
276 }
277
278 const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
279 wrapper::vstore(out_ptr, finalize_quantization(v_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift_s32,
280 min, max, false));
Michele Di Giorgio45361932019-12-19 13:53:44 +0000281 }
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000282
Michalis Spyrou017ead22020-09-28 23:28:08 +0100283 // Left-overs loop
284 for(; x < window_end_x; ++x)
285 {
286 // Get bias and pointer to input
287 int32_t s_in = *(reinterpret_cast<const int32_t *>(in.ptr()) + x);
288
289 // Accumulate bias
290 if(has_bias)
291 {
292 const auto b = *reinterpret_cast<const int32_t *>(bias->ptr_to_element(Coordinates(id.z())));
293 s_in += b;
294 }
295
296 const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
297 *out_ptr = finalize_quantization(s_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift,
298 std::numeric_limits<TOut>::lowest(), std::numeric_limits<TOut>::max(), false);
299 }
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000300 },
301 in, out);
302}
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100303template < typename TOut, typename std::enable_if < std::is_same<TOut, uint8_t>::value || std::is_same<TOut, int8_t>::value, int >::type = 0 >
Manuel Bottini327225d2021-04-13 13:09:30 +0100304void output_stage_nhwc(ITensor *src, const ITensor *bias, const Window &window, ITensor *dst,
305 int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift)
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000306{
Manuel Bottini327225d2021-04-13 13:09:30 +0100307 const bool has_bias = bias != nullptr;
308 using VectorType = typename wrapper::traits::neon_bitvector_t<TOut, wrapper::traits::BitWidth::W128>;
309 using TagType = typename wrapper::traits::neon_bitvector_tag_t<TOut, wrapper::traits::BitWidth::W128>;
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000310
311 const int32x4_t result_offset_after_shift_s32 = vdupq_n_s32(result_offset_after_shift);
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000312
Michele Di Giorgio45361932019-12-19 13:53:44 +0000313 const VectorType min = wrapper::vdup_n(std::numeric_limits<TOut>::lowest(), TagType{});
314 const VectorType max = wrapper::vdup_n(std::numeric_limits<TOut>::max(), TagType{});
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100315
316 Window window_bias = window;
Michalis Spyrou017ead22020-09-28 23:28:08 +0100317 window_bias.set(Window::DimX, Window::Dimension(0, 1, 1));
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100318 window_bias.set(Window::DimY, Window::Dimension(0, 0, 0));
319 window_bias.set(Window::DimZ, Window::Dimension(0, 0, 0));
320 window_bias.set(3, Window::Dimension(0, 0, 0));
321
Michalis Spyrou017ead22020-09-28 23:28:08 +0100322 const int window_start_x = window.x().start();
323 const int window_end_x = window.x().end();
Manuel Bottini327225d2021-04-13 13:09:30 +0100324 const int window_step_x = 16 / src->info()->element_size();
Michalis Spyrou017ead22020-09-28 23:28:08 +0100325 Window win = window;
326 win.set(Window::DimX, Window::Dimension(0, 1, 1));
327
Manuel Bottini327225d2021-04-13 13:09:30 +0100328 Iterator in(src, win);
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100329 Iterator bi(bias, window_bias);
Manuel Bottini327225d2021-04-13 13:09:30 +0100330 Iterator out(dst, win);
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100331
Michalis Spyrou017ead22020-09-28 23:28:08 +0100332 execute_window_loop(win, [&](const Coordinates &)
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100333 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100334 int x = window_start_x;
Michalis Spyrouc2268532020-10-09 11:52:10 +0100335 for(; x <= (window_end_x - window_step_x); x += window_step_x)
Michele Di Giorgio45361932019-12-19 13:53:44 +0000336 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100337 // Get bias and pointer to input
338 const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x;
339 int32x4x4_t v_in =
Michele Di Giorgio45361932019-12-19 13:53:44 +0000340 {
Michalis Spyrou017ead22020-09-28 23:28:08 +0100341 {
342 wrapper::vloadq(in_ptr),
343 wrapper::vloadq(in_ptr + 4),
344 wrapper::vloadq(in_ptr + 8),
345 wrapper::vloadq(in_ptr + 12),
346 }
347 };
348
349 // Accumulate bias
350 if(has_bias)
351 {
352 const auto bias_ptr = reinterpret_cast<int32_t *>(bi.ptr()) + x;
353
354 wrapper::vadd(v_in.val[0], wrapper::vloadq(bias_ptr));
355 wrapper::vadd(v_in.val[1], wrapper::vloadq(bias_ptr + 4));
356 wrapper::vadd(v_in.val[2], wrapper::vloadq(bias_ptr + 8));
357 wrapper::vadd(v_in.val[3], wrapper::vloadq(bias_ptr + 12));
Michele Di Giorgio45361932019-12-19 13:53:44 +0000358 }
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100359
Michalis Spyrou017ead22020-09-28 23:28:08 +0100360 const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
361 wrapper::vstore(out_ptr, finalize_quantization(v_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift_s32, min, max, false));
Michele Di Giorgio45361932019-12-19 13:53:44 +0000362 }
363
Michalis Spyrou017ead22020-09-28 23:28:08 +0100364 // Left-overs loop
365 for(; x < window_end_x; ++x)
366 {
367 // Get bias and pointer to input
368 const auto in_ptr = reinterpret_cast<int32_t *>(in.ptr()) + x;
369 int32_t s_in = *in_ptr;
370
371 // Accumulate bias
372 if(has_bias)
373 {
374 const auto bias_ptr = reinterpret_cast<int32_t *>(bi.ptr()) + x;
375 s_in += *bias_ptr;
376 }
377
378 const auto out_ptr = reinterpret_cast<TOut *>(out.ptr()) + x;
379 *out_ptr = finalize_quantization(s_in, result_fixedpoint_multiplier, result_shift, result_offset_after_shift,
380 std::numeric_limits<TOut>::lowest(), std::numeric_limits<TOut>::max(), false);
381 }
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100382 },
383 in, bi, out);
384}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100385} // namespace
386
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100387void CpuDirectConv2dOutputStageKernel::configure(ITensorInfo *src, const ITensorInfo *bias, ITensorInfo *dst,
388 const DirectConvolutionLayerOutputStageKernelInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100389{
Manuel Bottini327225d2021-04-13 13:09:30 +0100390 ARM_COMPUTE_UNUSED(bias);
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000391 // Perform validation step
Manuel Bottini327225d2021-04-13 13:09:30 +0100392 ARM_COMPUTE_ERROR_ON_NULLPTR(src);
393 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, bias, dst, info));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100394
Georgios Pinitasf72f9362018-01-12 16:29:45 +0000395 _func = nullptr;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000396 _result_fixedpoint_multiplier = info.result_fixedpoint_multiplier;
397 _result_shift = info.result_shift;
398 _result_offset_after_shift = info.result_offset_after_shift;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100399
Michalis Spyrou017ead22020-09-28 23:28:08 +0100400 // Auto-initialize output output if required
Manuel Bottini327225d2021-04-13 13:09:30 +0100401 if(dst != nullptr)
Michalis Spyrou017ead22020-09-28 23:28:08 +0100402 {
403 // Work out expected output data type
Manuel Bottini327225d2021-04-13 13:09:30 +0100404 const DataType output_dt = (src->data_type() == DataType::S32) ? info.output_data_type : DataType::S32;
Michalis Spyrou017ead22020-09-28 23:28:08 +0100405 // Output tensor auto initialization if not yet initialized
Manuel Bottini327225d2021-04-13 13:09:30 +0100406 auto_init_if_empty(*dst, src->clone()->set_data_type(output_dt));
Michalis Spyrou017ead22020-09-28 23:28:08 +0100407 }
408
Manuel Bottini327225d2021-04-13 13:09:30 +0100409 Window win = calculate_max_window(*src, Steps());
Michalis Spyrou017ead22020-09-28 23:28:08 +0100410
Manuel Bottini327225d2021-04-13 13:09:30 +0100411 ICpuKernel::configure(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100412
Manuel Bottini327225d2021-04-13 13:09:30 +0100413 const bool is_qasymm8_signed = (dst != nullptr) ? is_data_type_quantized_asymmetric_signed(dst->data_type()) : false;
Gian Marco Iodice618493d2018-11-27 16:38:33 +0000414
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100415 // Set appropriate function
Manuel Bottini327225d2021-04-13 13:09:30 +0100416 if(src->data_layout() == DataLayout::NCHW)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100417 {
Manuel Bottini327225d2021-04-13 13:09:30 +0100418 switch(src->data_type())
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100419 {
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100420 case DataType::S32:
Michalis Spyroub91e34c2017-12-20 15:50:55 +0000421 {
Michele Di Giorgio45361932019-12-19 13:53:44 +0000422 if(is_qasymm8_signed)
423 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100424 _func = &output_stage_nchw<int8_t>;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000425 }
426 else
427 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100428 _func = &output_stage_nchw<uint8_t>;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000429 }
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100430 break;
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100431 }
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000432#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100433 case DataType::F16:
434 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100435 _func = &output_stage_nchw<float16_t>;
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100436 break;
437 }
Ioan-Cristian Szabo5edbd1c2017-11-13 13:34:08 +0000438#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100439 case DataType::F32:
440 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100441 _func = &output_stage_nchw<float>;
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100442 break;
443 }
444 default:
445 {
446 ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs.");
447 }
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100448 }
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100449 }
450 else
451 {
Manuel Bottini327225d2021-04-13 13:09:30 +0100452 switch(src->data_type())
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100453 {
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100454 case DataType::S32:
455 {
Michele Di Giorgio45361932019-12-19 13:53:44 +0000456 if(is_qasymm8_signed)
457 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100458 _func = &output_stage_nhwc<int8_t>;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000459 }
460 else
461 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100462 _func = &output_stage_nhwc<uint8_t>;
Michele Di Giorgio45361932019-12-19 13:53:44 +0000463 }
Georgios Pinitasa799ce02018-09-12 20:11:34 +0100464 break;
465 }
Georgios Pinitas20c246a2018-09-12 16:45:53 +0100466#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
467 case DataType::F16:
468 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100469 _func = &output_stage_nhwc<float16_t>;
Georgios Pinitas20c246a2018-09-12 16:45:53 +0100470 break;
471 }
472#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100473 case DataType::F32:
474 {
Michalis Spyrou14e868e2020-09-30 00:33:05 +0100475 _func = &output_stage_nhwc<float>;
Giorgio Arena1ed1fc62018-03-26 16:20:05 +0100476 break;
477 }
478 default:
479 {
480 ARM_COMPUTE_ERROR("Unsupported combination of types among the inputs.");
481 }
Pablo Tellof87cc7f2017-07-26 10:28:40 +0100482 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100483 }
484}
485
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100486Status CpuDirectConv2dOutputStageKernel::validate(const ITensorInfo *src, const ITensorInfo *bias, const ITensorInfo *dst,
487 const DirectConvolutionLayerOutputStageKernelInfo &info)
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000488{
Manuel Bottini327225d2021-04-13 13:09:30 +0100489 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, bias, dst, info));
Michalis Spyrouafa5d812017-11-30 14:25:57 +0000490 return Status{};
491}
492
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100493void CpuDirectConv2dOutputStageKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100494{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100495 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100496 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Manuel Bottini327225d2021-04-13 13:09:30 +0100497 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100498 ARM_COMPUTE_ERROR_ON(_func == nullptr);
499
Manuel Bottini327225d2021-04-13 13:09:30 +0100500 auto src = tensors.get_tensor(TensorType::ACL_SRC_0);
501 auto bias = tensors.get_const_tensor(TensorType::ACL_SRC_1);
502 auto dst = tensors.get_tensor(TensorType::ACL_DST);
503
504 (*_func)(src, bias, window, dst, _result_fixedpoint_multiplier, _result_shift, _result_offset_after_shift);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100505}
Manuel Bottini327225d2021-04-13 13:09:30 +0100506
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100507const char *CpuDirectConv2dOutputStageKernel::name() const
Manuel Bottini327225d2021-04-13 13:09:30 +0100508{
Manuel Bottinib4bb6a02021-05-24 16:01:32 +0100509 return "CpuDirectConv2dOutputStageKernel";
Manuel Bottini327225d2021-04-13 13:09:30 +0100510}
511} // namespace kernels
512} // namespace cpu
Michele Di Giorgiof29d1b72019-10-29 10:58:13 +0000513} // namespace arm_compute