blob: 88a1c2ec836e2cf66af7f6e3b0ab1f97eaef85ba [file] [log] [blame]
Manuel Bottini9032ee32019-08-07 17:04:11 +01001/*
2 * Copyright (c) 2019 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/NEON/wrapper/wrapper.h"
30#include "arm_compute/core/TensorInfo.h"
31#include "arm_compute/core/Types.h"
32#include "arm_compute/core/Validate.h"
33#include "arm_compute/core/utils/misc/ShapeCalculator.h"
34
35namespace arm_compute
36{
37namespace
38{
39Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &paddings, const PaddingMode mode)
40{
41 ARM_COMPUTE_RETURN_ERROR_ON_MSG(mode != PaddingMode::CONSTANT, "Only constant padding mode is supported");
42 ARM_COMPUTE_RETURN_ERROR_ON_MSG(paddings.size() > 4, "Padding list bigger than 4 dimensions");
43 if(output->total_size() != 0)
44 {
45 const TensorShape expected_output_shape = arm_compute::misc::shape_calculator::compute_padded_shape(input->tensor_shape(), paddings);
46 const TensorInfo expected_output_info = input->clone()->set_tensor_shape(expected_output_shape);
47 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(output, &expected_output_info);
48 ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
49 }
50 return Status{};
51}
52} // namespace
53
54template <typename T>
55void NEPadLayerKernel::run_pad_constant(const Window &window)
56{
57 Window output_window{ window };
58 output_window.set(Window::DimX, Window::Dimension(0, 1, 1));
59
60 const size_t element_size = _input->info()->element_size();
61 Iterator output_it(_output, output_window);
62 execute_window_loop(output_window, [&](const Coordinates & id)
63 {
64 Coordinates idin{ id };
65 for(size_t dim = _padding.size() - 1; dim > 0; --dim)
66 {
67 idin[dim] -= _padding[dim].first;
68 if(idin[dim] < 0 || static_cast<int>(_input->info()->dimension(dim)) - 1 < idin[dim])
69 {
70 std::fill_n(reinterpret_cast<T *>(output_it.ptr()), _output->info()->dimension(0), _constant_value.get<T>());
71 return;
72 }
73 }
74 T *input_it_ptr = reinterpret_cast<T *>(_input->ptr_to_element(idin));
75 T *output_it_ptr = reinterpret_cast<T *>(output_it.ptr());
76 std::fill_n(output_it_ptr, _padding[0].first, _constant_value.get<T>());
77 memcpy(output_it_ptr + _padding[0].first, input_it_ptr, _input->info()->dimension(0) * element_size);
78 std::fill_n(output_it_ptr + _padding[0].first + _input->info()->dimension(0), _padding[0].second, _constant_value.get<T>());
79 },
80 output_it);
81}
82
83void NEPadLayerKernel::run_pad_constant_uint8_3Dinput_3Dpad(const Window &window)
84{
85 ARM_COMPUTE_UNUSED(window);
86
87 const size_t start_plane = window.z().start();
88 const size_t end_plane = window.z().end();
89
90 const size_t start_plane_input = start_plane - (_padding.size() > 2 && start_plane >= _padding[2].first ? _padding[2].first : 0);
91
92 const int output_plane_size = _output->info()->dimension(0) * _output->info()->dimension(1);
93 const int input_plane_size = (_input->info()->dimension(0) + _input->info()->padding().right + _input->info()->padding().left) * (_input->info()->dimension(
94 1)
95 + _input->info()->padding().top + _input->info()->padding().bottom);
96
97 const int pad_y_elems_top = (_padding.size() > 1 ? _padding[1].first : 0) * _output->info()->dimension(0);
98 const int pad_y_elems_bot = (_padding.size() > 1 ? _padding[1].second : 0) * _output->info()->dimension(0);
99
100 const size_t jump_to_next_row_input = _input->info()->dimension(0) + _input->info()->padding().right + _input->info()->padding().left;
101 const size_t jump_to_next_row_output = _padding[0].first + _padding[0].second;
102 const size_t jump_to_next_plane_input = _input->info()->padding().empty() ? 0 : _input->info()->dimension(0) * (_input->info()->padding().right + _input->info()->padding().top);
103
104 uint8_t *output_row_ptr = _output->buffer() + start_plane * output_plane_size;
105 const uint8_t *input_it_ptr = _input->buffer() + _input->info()->offset_first_element_in_bytes() + start_plane_input * input_plane_size;
106 const auto pad_value = _constant_value.get<uint8_t>();
107
108 for(size_t z_i = start_plane; z_i < end_plane; ++z_i)
109 {
110 if(_padding.size() > 2 && z_i < _padding[2].first)
111 {
112 memset(output_row_ptr, pad_value, output_plane_size);
113 output_row_ptr += output_plane_size;
114 }
115 else if(_padding.size() > 2 && z_i > _input->info()->dimension(2) + _padding[2].first - 1)
116 {
117 memset(output_row_ptr, pad_value, output_plane_size);
118 output_row_ptr += output_plane_size;
119 }
120 else
121 {
122 memset(output_row_ptr, pad_value, pad_y_elems_top);
123 output_row_ptr += pad_y_elems_top;
124 size_t y_i = _input->info()->dimension(1);
125 // Basic loop unrolling
126 for(; y_i > 3; y_i -= 4)
127 {
128 memset(output_row_ptr, pad_value, _padding[0].first);
129 output_row_ptr += _padding[0].first;
130
131 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
132 output_row_ptr += _input->info()->dimension(0);
133 input_it_ptr += jump_to_next_row_input;
134
135 memset(output_row_ptr, pad_value, _padding[0].second + _padding[0].first);
136 output_row_ptr += jump_to_next_row_output;
137
138 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
139 output_row_ptr += _input->info()->dimension(0);
140 input_it_ptr += jump_to_next_row_input;
141
142 memset(output_row_ptr, pad_value, _padding[0].second + _padding[0].first);
143 output_row_ptr += jump_to_next_row_output;
144
145 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
146 output_row_ptr += _input->info()->dimension(0);
147 input_it_ptr += jump_to_next_row_input;
148
149 memset(output_row_ptr, pad_value, _padding[0].second + _padding[0].first);
150 output_row_ptr += jump_to_next_row_output;
151
152 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
153 output_row_ptr += _input->info()->dimension(0);
154 input_it_ptr += jump_to_next_row_input;
155
156 memset(output_row_ptr, pad_value, _padding[0].second);
157 output_row_ptr += _padding[0].second;
158 }
159 for(; y_i > 0; --y_i)
160 {
161 memset(output_row_ptr, pad_value, _padding[0].first);
162 output_row_ptr += _padding[0].first;
163
164 memcpy(output_row_ptr, input_it_ptr, _input->info()->dimension(0));
165 output_row_ptr += _input->info()->dimension(0);
166 input_it_ptr += _input->info()->dimension(0);
167
168 memset(output_row_ptr, pad_value, _padding[0].second);
169 output_row_ptr += _padding[0].second;
170 }
171 input_it_ptr += jump_to_next_plane_input;
172 memset(output_row_ptr, pad_value, pad_y_elems_bot);
173 output_row_ptr += pad_y_elems_bot;
174 }
175 }
176}
177
178NEPadLayerKernel::NEPadLayerKernel()
179 : _func(), _input(nullptr), _output(nullptr), _padding(), _constant_value(), _mode()
180{
181}
182
183void NEPadLayerKernel::configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value, const PaddingMode mode)
184{
185 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
186 // Auto-init
187 const TensorShape expected_output_shape = arm_compute::misc::shape_calculator::compute_padded_shape(input->info()->tensor_shape(), padding);
188 const TensorInfo expected_output_info = input->info()->clone()->set_tensor_shape(expected_output_shape);
189 auto_init_if_empty(*output->info(), expected_output_info);
190
191 // Perform validation step
192 ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), padding, mode));
193
194 _input = input;
195 _output = output;
196 _padding = padding;
197 _constant_value = constant_value;
198 _mode = mode;
199
200 if(_mode == PaddingMode::CONSTANT)
201 {
202 switch(_input->info()->element_size())
203 {
204 case 1:
205 if(_input->info()->num_dimensions() == 3 && padding.size() <= 3)
206 {
207 _func = &NEPadLayerKernel::run_pad_constant_uint8_3Dinput_3Dpad;
208 }
209 else
210 {
211 _func = &NEPadLayerKernel::run_pad_constant<uint8_t>;
212 }
213 break;
214 case 2:
215 _func = &NEPadLayerKernel::run_pad_constant<uint16_t>;
216 break;
217 case 4:
218 _func = &NEPadLayerKernel::run_pad_constant<uint32_t>;
219 break;
220 default:
221 ARM_COMPUTE_ERROR("Element size not supported");
222 break;
223 }
224 }
225 else
226 {
227 ARM_COMPUTE_ERROR("Padding mode not supported");
228 }
229
230 // Configure kernel window
231 Window win = calculate_max_window(*output->info(), Steps());
232
233 // The NEPad doesn't need padding so update_window_and_padding() can be skipped
234 Coordinates coord;
235 coord.set_num_dimensions(output->info()->num_dimensions());
236 output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
237
238 ICPPKernel::configure(win);
239}
240
241Status NEPadLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value, const PaddingMode mode)
242{
243 ARM_COMPUTE_UNUSED(constant_value);
244 ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, padding, mode));
245 return Status{};
246}
247
248void NEPadLayerKernel::run(const Window &window, const ThreadInfo &info)
249{
250 ARM_COMPUTE_UNUSED(info);
251 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
252 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
253
254 if(_func != nullptr)
255 {
256 (this->*_func)(window);
257 }
258}
259} // namespace arm_compute