blob: c9bcbc912755b0f8d6a62ff61647621f4163b07b [file] [log] [blame]
Manuel Bottini9032ee32019-08-07 17:04:11 +01001/*
Mohammed Suhail Munshi066607f2022-01-19 12:22:50 +00002 * Copyright (c) 2019-2022 Arm Limited.
Manuel Bottini9032ee32019-08-07 17:04:11 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouebcebf12020-10-21 00:04:14 +010024#include "src/core/NEON/kernels/NEPadLayerKernel.h"
Manuel Bottini9032ee32019-08-07 17:04:11 +010025
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
Manuel Bottini9032ee32019-08-07 17:04:11 +010029#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/Types.h"
Manuel Bottini9032ee32019-08-07 17:04:11 +010031#include "arm_compute/core/utils/misc/ShapeCalculator.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010032#include "arm_compute/core/Validate.h"
33
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010034#include "src/core/helpers/AutoConfiguration.h"
35#include "src/core/helpers/WindowHelpers.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010036#include "src/core/NEON/wrapper/wrapper.h"
Manuel Bottini9032ee32019-08-07 17:04:11 +010037
38namespace arm_compute
39{
40namespace
41{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010042Status validate_arguments(const ITensorInfo *input,
43 const ITensorInfo *output,
44 const PaddingList &paddings,
45 const PaddingMode mode)
Manuel Bottini9032ee32019-08-07 17:04:11 +010046{
Georgios Pinitas33843562019-12-10 13:33:18 +000047 ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
48 ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN);
Manuel Bottini9032ee32019-08-07 17:04:11 +010049 ARM_COMPUTE_RETURN_ERROR_ON_MSG(mode != PaddingMode::CONSTANT, "Only constant padding mode is supported");
50 ARM_COMPUTE_RETURN_ERROR_ON_MSG(paddings.size() > 4, "Padding list bigger than 4 dimensions");
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010051 if (output->total_size() != 0)
Manuel Bottini9032ee32019-08-07 17:04:11 +010052 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010053 const TensorShape expected_output_shape =
54 arm_compute::misc::shape_calculator::compute_padded_shape(input->tensor_shape(), paddings);
55 const TensorInfo expected_output_info = input->clone()->set_tensor_shape(expected_output_shape);
Manuel Bottini9032ee32019-08-07 17:04:11 +010056 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &expected_output_info);
57 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
58 }
59 return Status{};
60}
61} // namespace
62
63template <typename T>
64void NEPadLayerKernel::run_pad_constant(const Window &window)
65{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010066 Window output_window{window};
Manuel Bottini9032ee32019-08-07 17:04:11 +010067 output_window.set(Window::DimX, Window::Dimension(0, 1, 1));
68
69 const size_t element_size = _input->info()->element_size();
70 Iterator output_it(_output, output_window);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010071 execute_window_loop(
72 output_window,
73 [&](const Coordinates &id)
Manuel Bottini9032ee32019-08-07 17:04:11 +010074 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010075 Coordinates idin{id};
76 for (size_t dim = _padding.size() - 1; dim > 0; --dim)
Manuel Bottini9032ee32019-08-07 17:04:11 +010077 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010078 idin[dim] -= _padding[dim].first;
79 if (idin[dim] < 0 || static_cast<int>(_input->info()->dimension(dim)) - 1 < idin[dim])
80 {
81 std::fill_n(reinterpret_cast<T *>(output_it.ptr()), _output->info()->dimension(0),
82 _constant_value.get<T>());
83 return;
84 }
Manuel Bottini9032ee32019-08-07 17:04:11 +010085 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010086 T *input_it_ptr = reinterpret_cast<T *>(_input->ptr_to_element(idin));
87 T *output_it_ptr = reinterpret_cast<T *>(output_it.ptr());
88 std::fill_n(output_it_ptr, _padding[0].first, _constant_value.get<T>());
89 memcpy(output_it_ptr + _padding[0].first, input_it_ptr, _input->info()->dimension(0) * element_size);
90 std::fill_n(output_it_ptr + _padding[0].first + _input->info()->dimension(0), _padding[0].second,
91 _constant_value.get<T>());
92 },
93 output_it);
Manuel Bottini9032ee32019-08-07 17:04:11 +010094}
95
96void NEPadLayerKernel::run_pad_constant_uint8_3Dinput_3Dpad(const Window &window)
97{
98 ARM_COMPUTE_UNUSED(window);
99
100 const size_t start_plane = window.z().start();
101 const size_t end_plane = window.z().end();
102
Georgios Pinitas64e738f2019-12-18 15:09:00 +0000103 size_t start_plane_input = start_plane;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100104 if (_padding.size() > 2)
Georgios Pinitas64e738f2019-12-18 15:09:00 +0000105 {
106 start_plane_input = (start_plane < _padding[2].first) ? 0 : start_plane - _padding[2].first;
107 }
Manuel Bottini9032ee32019-08-07 17:04:11 +0100108 const int output_plane_size = _output->info()->dimension(0) * _output->info()->dimension(1);
Georgios Pinitas64e738f2019-12-18 15:09:00 +0000109 const int input_plane_size = _input->info()->dimension(0) * _input->info()->dimension(1);
Manuel Bottini9032ee32019-08-07 17:04:11 +0100110
111 const int pad_y_elems_top = (_padding.size() > 1 ? _padding[1].first : 0) * _output->info()->dimension(0);
112 const int pad_y_elems_bot = (_padding.size() > 1 ? _padding[1].second : 0) * _output->info()->dimension(0);
113
Georgios Pinitas64e738f2019-12-18 15:09:00 +0000114 const size_t jump_to_next_row_input = _input->info()->dimension(0);
115 const size_t jump_to_next_row_output = _padding[0].first + _padding[0].second;
Manuel Bottini9032ee32019-08-07 17:04:11 +0100116
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100117 uint8_t *output_row_ptr =
118 _output->buffer() + _output->info()->offset_first_element_in_bytes() + start_plane * output_plane_size;
119 const uint8_t *input_it_ptr =
120 _input->buffer() + _input->info()->offset_first_element_in_bytes() + start_plane_input * input_plane_size;
121 const auto pad_value = _constant_value.get<uint8_t>();
Manuel Bottini9032ee32019-08-07 17:04:11 +0100122
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100123 for (size_t z_i = start_plane; z_i < end_plane; ++z_i)
Manuel Bottini9032ee32019-08-07 17:04:11 +0100124 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100125 if (_padding.size() > 2 && z_i < _padding[2].first)
Manuel Bottini9032ee32019-08-07 17:04:11 +0100126 {
127 memset(output_row_ptr, pad_value, output_plane_size);
128 output_row_ptr += output_plane_size;
129 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100130 else if (_padding.size() > 2 && z_i > (_input->info()->dimension(2) + _padding[2].first - 1))
Manuel Bottini9032ee32019-08-07 17:04:11 +0100131 {
132 memset(output_row_ptr, pad_value, output_plane_size);
133 output_row_ptr += output_plane_size;
134 }
135 else
136 {
137 memset(output_row_ptr, pad_value, pad_y_elems_top);
138 output_row_ptr += pad_y_elems_top;
139 size_t y_i = _input->info()->dimension(1);
140 // Basic loop unrolling
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100141 for (; y_i > 3; y_i -= 4)
Manuel Bottini9032ee32019-08-07 17:04:11 +0100142 {
143 memset(output_row_ptr, pad_value, _padding[0].first);
144 output_row_ptr += _padding[0].first;
145
146 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
147 output_row_ptr += _input->info()->dimension(0);
148 input_it_ptr += jump_to_next_row_input;
149
150 memset(output_row_ptr, pad_value, _padding[0].second + _padding[0].first);
151 output_row_ptr += jump_to_next_row_output;
152
153 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
154 output_row_ptr += _input->info()->dimension(0);
155 input_it_ptr += jump_to_next_row_input;
156
157 memset(output_row_ptr, pad_value, _padding[0].second + _padding[0].first);
158 output_row_ptr += jump_to_next_row_output;
159
160 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
161 output_row_ptr += _input->info()->dimension(0);
162 input_it_ptr += jump_to_next_row_input;
163
164 memset(output_row_ptr, pad_value, _padding[0].second + _padding[0].first);
165 output_row_ptr += jump_to_next_row_output;
166
167 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
168 output_row_ptr += _input->info()->dimension(0);
169 input_it_ptr += jump_to_next_row_input;
170
171 memset(output_row_ptr, pad_value, _padding[0].second);
172 output_row_ptr += _padding[0].second;
173 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100174 for (; y_i > 0; --y_i)
Manuel Bottini9032ee32019-08-07 17:04:11 +0100175 {
176 memset(output_row_ptr, pad_value, _padding[0].first);
177 output_row_ptr += _padding[0].first;
178
179 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
180 output_row_ptr += _input->info()->dimension(0);
181 input_it_ptr += _input->info()->dimension(0);
182
183 memset(output_row_ptr, pad_value, _padding[0].second);
184 output_row_ptr += _padding[0].second;
185 }
Manuel Bottini9032ee32019-08-07 17:04:11 +0100186 memset(output_row_ptr, pad_value, pad_y_elems_bot);
187 output_row_ptr += pad_y_elems_bot;
188 }
189 }
190}
191
192NEPadLayerKernel::NEPadLayerKernel()
193 : _func(), _input(nullptr), _output(nullptr), _padding(), _constant_value(), _mode()
194{
195}
196
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100197void NEPadLayerKernel::configure(ITensor *input,
198 ITensor *output,
199 const PaddingList &padding,
200 const PixelValue constant_value,
201 const PaddingMode mode)
Manuel Bottini9032ee32019-08-07 17:04:11 +0100202{
203 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
204 // Auto-init
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100205 const TensorShape expected_output_shape =
206 arm_compute::misc::shape_calculator::compute_padded_shape(input->info()->tensor_shape(), padding);
207 const TensorInfo expected_output_info = input->info()->clone()->set_tensor_shape(expected_output_shape);
Manuel Bottini9032ee32019-08-07 17:04:11 +0100208 auto_init_if_empty(*output->info(), expected_output_info);
209
210 // Perform validation step
211 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), padding, mode));
212
213 _input = input;
214 _output = output;
215 _padding = padding;
216 _constant_value = constant_value;
217 _mode = mode;
218
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100219 if (_mode == PaddingMode::CONSTANT)
Manuel Bottini9032ee32019-08-07 17:04:11 +0100220 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100221 switch (_input->info()->element_size())
Manuel Bottini9032ee32019-08-07 17:04:11 +0100222 {
223 case 1:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100224 if (_input->info()->num_dimensions() == 3 && // Is 3D
225 padding.size() <= 3 && // Has 3D padding
226 !_input->info()->has_padding() && !_output->info()->has_padding()) // Input & Output have no padding
Manuel Bottini9032ee32019-08-07 17:04:11 +0100227 {
228 _func = &NEPadLayerKernel::run_pad_constant_uint8_3Dinput_3Dpad;
229 }
230 else
231 {
232 _func = &NEPadLayerKernel::run_pad_constant<uint8_t>;
233 }
234 break;
235 case 2:
236 _func = &NEPadLayerKernel::run_pad_constant<uint16_t>;
237 break;
238 case 4:
239 _func = &NEPadLayerKernel::run_pad_constant<uint32_t>;
240 break;
241 default:
242 ARM_COMPUTE_ERROR("Element size not supported");
243 break;
244 }
245 }
246 else
247 {
248 ARM_COMPUTE_ERROR("Padding mode not supported");
249 }
250
251 // Configure kernel window
252 Window win = calculate_max_window(*output->info(), Steps());
253
254 // The NEPad doesn't need padding so update_window_and_padding() can be skipped
Manuel Bottini9032ee32019-08-07 17:04:11 +0100255
256 ICPPKernel::configure(win);
257}
258
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100259Status NEPadLayerKernel::validate(const ITensorInfo *input,
260 const ITensorInfo *output,
261 const PaddingList &padding,
262 const PixelValue constant_value,
263 const PaddingMode mode)
Manuel Bottini9032ee32019-08-07 17:04:11 +0100264{
265 ARM_COMPUTE_UNUSED(constant_value);
266 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, padding, mode));
267 return Status{};
268}
269
270void NEPadLayerKernel::run(const Window &window, const ThreadInfo &info)
271{
272 ARM_COMPUTE_UNUSED(info);
273 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
274 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
275
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100276 if (_func != nullptr)
Manuel Bottini9032ee32019-08-07 17:04:11 +0100277 {
278 (this->*_func)(window);
279 }
280}
Dana Zlotnik4cdd6b82021-10-07 15:31:54 +0300281
282size_t NEPadLayerKernel::get_mws(const CPUInfo &platform, size_t thread_count) const
283{
Dana Zlotnikd7154db2021-11-10 11:50:58 +0200284 ARM_COMPUTE_UNUSED(thread_count);
Mohammed Suhail Munshi066607f2022-01-19 12:22:50 +0000285 ARM_COMPUTE_UNUSED(platform);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100286
Mohammed Suhail Munshi066607f2022-01-19 12:22:50 +0000287 return ICPPKernel::default_mws;
Dana Zlotnik4cdd6b82021-10-07 15:31:54 +0300288}
289
Manuel Bottini9032ee32019-08-07 17:04:11 +0100290} // namespace arm_compute