blob: dee0283a2ca70a750e0aa1e5c822ac7195729a79 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitas61ba0692021-01-10 04:07:39 +00002 * Copyright (c) 2017-2021 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas61ba0692021-01-10 04:07:39 +000024#include "src/core/cpu/kernels/CpuConcatenateDepthKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010025
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/IAccessWindow.h"
29#include "arm_compute/core/ITensor.h"
30#include "arm_compute/core/TensorInfo.h"
31#include "arm_compute/core/Utils.h"
32#include "arm_compute/core/Validate.h"
33#include "arm_compute/core/Window.h"
Georgios Pinitasddb93bb2020-10-02 16:38:59 +010034#include "src/core/NEON/NEAsymm.h"
35#include "src/core/NEON/NEFixedPoint.h"
36#include "src/core/NEON/wrapper/wrapper.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010037#include "src/core/helpers/AutoConfiguration.h"
38#include "src/core/helpers/WindowHelpers.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010039
Georgios Pinitasac4e8732017-07-05 17:02:25 +010040#include <cstdint>
Anthony Barbier6ff3b192017-09-04 18:44:23 +010041
Michalis Spyrouc28d4282020-03-04 15:30:41 +000042namespace arm_compute
43{
Georgios Pinitas61ba0692021-01-10 04:07:39 +000044namespace cpu
45{
46namespace kernels
47{
Georgios Pinitasac4e8732017-07-05 17:02:25 +010048namespace
49{
Georgios Pinitasac4e8732017-07-05 17:02:25 +010050template <typename T>
Georgios Pinitas61ba0692021-01-10 04:07:39 +000051void depth_concat(const ITensor *src, ITensor *dst, unsigned int depth_offset, const Window &window)
Georgios Pinitasac4e8732017-07-05 17:02:25 +010052{
Georgios Pinitas61ba0692021-01-10 04:07:39 +000053 // Offset source
54 uint8_t *src_ptr = src->buffer() + src->info()->offset_first_element_in_bytes();
Georgios Pinitasac4e8732017-07-05 17:02:25 +010055
Georgios Pinitas61ba0692021-01-10 04:07:39 +000056 // Offset destination
57 uint8_t *dst_ptr = dst->buffer() + dst->info()->offset_first_element_in_bytes() + depth_offset * dst->info()->strides_in_bytes()[2];
Georgios Pinitasac4e8732017-07-05 17:02:25 +010058
Michalis Spyrouc28d4282020-03-04 15:30:41 +000059 const auto window_start_x = static_cast<int>(window.x().start());
60 const auto window_end_x = static_cast<int>(window.x().end());
Georgios Pinitas61ba0692021-01-10 04:07:39 +000061 const int window_step_x = 16 / dst->info()->element_size();
Michalis Spyrouc28d4282020-03-04 15:30:41 +000062
63 Window win{ window };
64 win.set(Window::DimX, Window::Dimension(0, 1, 1));
Georgios Pinitas61ba0692021-01-10 04:07:39 +000065 win.set(Window::DimZ, Window::Dimension(0, src->info()->tensor_shape().z(), 1));
Michalis Spyrouc28d4282020-03-04 15:30:41 +000066
Georgios Pinitas61ba0692021-01-10 04:07:39 +000067 Iterator src_it(src, win);
68 Iterator dst_it(dst, win);
Georgios Pinitasac4e8732017-07-05 17:02:25 +010069
Georgios Pinitas61ba0692021-01-10 04:07:39 +000070 const DataType dt = src->info()->data_type();
71 const UniformQuantizationInfo src_qinfo = src->info()->quantization_info().uniform();
72 const UniformQuantizationInfo dst_qinfo = dst->info()->quantization_info().uniform();
73 if(dt == DataType::QASYMM8 && src_qinfo != dst_qinfo)
Georgios Pinitasac4e8732017-07-05 17:02:25 +010074 {
Michalis Spyrouc28d4282020-03-04 15:30:41 +000075 execute_window_loop(win, [&](const Coordinates &)
Pablo Tello54e98d92019-02-05 16:16:19 +000076 {
Georgios Pinitas61ba0692021-01-10 04:07:39 +000077 const auto in_ptr = reinterpret_cast<const uint8_t *>(src_ptr + src_it.offset());
78 const auto out_ptr = reinterpret_cast<uint8_t *>(dst_ptr + dst_it.offset());
Michalis Spyrouc28d4282020-03-04 15:30:41 +000079 int x = window_start_x;
80 for(; x <= (window_end_x - window_step_x); x += window_step_x)
81 {
Georgios Pinitas61ba0692021-01-10 04:07:39 +000082 wrapper::vstore(out_ptr + x, vquantize(vdequantize(wrapper::vloadq(in_ptr + x), src_qinfo), dst_qinfo));
Michalis Spyrouc28d4282020-03-04 15:30:41 +000083 }
84
85 // Compute left-over elements
86 for(; x < window_end_x; ++x)
87 {
Georgios Pinitas61ba0692021-01-10 04:07:39 +000088 *(out_ptr + x) = quantize_qasymm8(dequantize_qasymm8(*(in_ptr + x), src_qinfo), dst_qinfo);
Michalis Spyrouc28d4282020-03-04 15:30:41 +000089 }
Pablo Tello54e98d92019-02-05 16:16:19 +000090 },
Georgios Pinitas61ba0692021-01-10 04:07:39 +000091 src_it, dst_it);
Pablo Tello54e98d92019-02-05 16:16:19 +000092 }
Georgios Pinitas61ba0692021-01-10 04:07:39 +000093 else if(dt == DataType::QASYMM8_SIGNED && src_qinfo != dst_qinfo)
Georgios Pinitas33843562019-12-10 13:33:18 +000094 {
Michalis Spyrouc28d4282020-03-04 15:30:41 +000095 execute_window_loop(win, [&](const Coordinates &)
Georgios Pinitas33843562019-12-10 13:33:18 +000096 {
Georgios Pinitas61ba0692021-01-10 04:07:39 +000097 const auto in_ptr = reinterpret_cast<const int8_t *>(src_ptr + src_it.offset());
98 const auto out_ptr = reinterpret_cast<int8_t *>(dst_ptr + dst_it.offset());
Michalis Spyrouc28d4282020-03-04 15:30:41 +000099 int x = window_start_x;
100 for(; x <= (window_end_x - window_step_x); x += window_step_x)
101 {
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000102 wrapper::vstore(out_ptr + x, vquantize_signed(vdequantize(wrapper::vloadq(in_ptr + x), src_qinfo), dst_qinfo));
Michalis Spyrouc28d4282020-03-04 15:30:41 +0000103 }
104
105 // Compute left-over elements
106 for(; x < window_end_x; ++x)
107 {
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000108 *(out_ptr + x) = quantize_qasymm8_signed(dequantize_qasymm8_signed(*(in_ptr + x), src_qinfo), dst_qinfo);
Michalis Spyrouc28d4282020-03-04 15:30:41 +0000109 }
Georgios Pinitas33843562019-12-10 13:33:18 +0000110 },
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000111 src_it, dst_it);
Georgios Pinitas33843562019-12-10 13:33:18 +0000112 }
Pablo Tello54e98d92019-02-05 16:16:19 +0000113 else
114 {
Michalis Spyrouc28d4282020-03-04 15:30:41 +0000115 execute_window_loop(win, [&](const Coordinates &)
Pablo Tello54e98d92019-02-05 16:16:19 +0000116 {
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000117 const auto in_ptr = reinterpret_cast<const T *>(src_ptr + src_it.offset());
118 const auto out_ptr = reinterpret_cast<T *>(dst_ptr + dst_it.offset());
Michalis Spyrouc28d4282020-03-04 15:30:41 +0000119 int x = window_start_x;
120 for(; x <= (window_end_x - window_step_x); x += window_step_x)
121 {
122 wrapper::vstore(out_ptr + x, wrapper::vloadq(in_ptr + x));
123 }
124 // Compute left-over elements
125 for(; x < window_end_x; ++x)
126 {
127 *(out_ptr + x) = *(in_ptr + x);
128 }
Pablo Tello54e98d92019-02-05 16:16:19 +0000129 },
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000130 src_it, dst_it);
Pablo Tello54e98d92019-02-05 16:16:19 +0000131 }
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100132}
Georgios Pinitasae54e022018-07-16 15:41:27 +0100133
Georgios Pinitasae54e022018-07-16 15:41:27 +0100134Status validate_arguments(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output)
135{
136 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
Anthony Barbiereaefd002018-07-20 17:49:35 +0100137 //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
Georgios Pinitas33843562019-12-10 13:33:18 +0000138 ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
Georgios Pinitasae54e022018-07-16 15:41:27 +0100139 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
140
Michalis Spyroua9c44722019-04-05 17:18:36 +0100141 ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimX) != output->dimension(Window::DimX));
142 ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(Window::DimY) != output->dimension(Window::DimY));
Georgios Pinitasae54e022018-07-16 15:41:27 +0100143 ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) + depth_offset > output->dimension(2));
Georgios Pinitasae54e022018-07-16 15:41:27 +0100144 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(3, input, output);
145
Georgios Pinitasae54e022018-07-16 15:41:27 +0100146 return Status{};
147}
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100148} // namespace
149
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000150CpuConcatenateDepthKernel::CpuConcatenateDepthKernel()
Georgios Pinitas4667ddd2020-07-13 21:21:33 +0100151 : _func(nullptr), _depth_offset(0)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100152{
153}
154
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000155void CpuConcatenateDepthKernel::configure(const ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100156{
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000157 ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
158 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, depth_offset, dst));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100159
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100160 _func = nullptr;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100161 _depth_offset = depth_offset;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100162
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000163 switch(src->data_type())
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100164 {
Georgios Pinitasae54e022018-07-16 15:41:27 +0100165 case DataType::QASYMM8:
166 _func = &depth_concat<uint8_t>;
167 break;
Georgios Pinitas33843562019-12-10 13:33:18 +0000168 case DataType::QASYMM8_SIGNED:
169 _func = &depth_concat<int8_t>;
170 break;
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100171 case DataType::F16:
172 _func = &depth_concat<uint16_t>;
173 break;
174 case DataType::F32:
175 _func = &depth_concat<uint32_t>;
176 break;
177 default:
178 ARM_COMPUTE_ERROR("Unsupported data type.");
179 }
180
Georgios Pinitasae54e022018-07-16 15:41:27 +0100181 // Configure kernel window
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000182 Window win = calculate_max_window(*dst, Steps());
Michalis Spyrouc28d4282020-03-04 15:30:41 +0000183 Coordinates coord;
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000184 coord.set_num_dimensions(dst->num_dimensions());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100185
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000186 dst->set_valid_region(ValidRegion(coord, dst->tensor_shape()));
187 ICpuKernel::configure(win);
Georgios Pinitasae54e022018-07-16 15:41:27 +0100188}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100189
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000190Status CpuConcatenateDepthKernel::validate(const arm_compute::ITensorInfo *src,
191 unsigned int depth_offset,
192 const arm_compute::ITensorInfo *dst)
Georgios Pinitasae54e022018-07-16 15:41:27 +0100193{
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000194 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, depth_offset, dst));
Georgios Pinitasae54e022018-07-16 15:41:27 +0100195 return Status{};
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100196}
197
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000198void CpuConcatenateDepthKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100199{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100200 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100201 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000202 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
Georgios Pinitasac4e8732017-07-05 17:02:25 +0100203 ARM_COMPUTE_ERROR_ON(_func == nullptr);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100204
Georgios Pinitas0499dff2020-07-31 22:21:38 +0100205 (*_func)(tensors.get_const_tensor(TensorType::ACL_SRC),
206 tensors.get_tensor(TensorType::ACL_DST),
207 _depth_offset,
208 window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100209}
Georgios Pinitas61ba0692021-01-10 04:07:39 +0000210
211const char *CpuConcatenateDepthKernel::name() const
212{
213 return "CpuConcatenateDepthKernel";
214}
215} // namespace kernels
216} // namespace cpu
Michalis Spyrouc28d4282020-03-04 15:30:41 +0000217} // namespace arm_compute