blob: 00b0c0ae8d43ccde88b57167f06b5a390052e53b [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michele Di Giorgio19289042021-02-03 16:05:00 +00002 * Copyright (c) 2016-2021 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouebcebf12020-10-21 00:04:14 +010024#include "src/core/NEON/kernels/NEFillBorderKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010025
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/TensorInfo.h"
Georgios Pinitas583137c2017-08-31 18:12:42 +010030#include "arm_compute/core/Types.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031#include "arm_compute/core/Validate.h"
32#include "arm_compute/core/Window.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010033
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010034#include "src/core/helpers/WindowHelpers.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010035#include "src/core/NEON/kernels/NEFillBorderKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010036
Michalis Spyrou95abfdd2018-11-28 14:59:47 +000037namespace arm_compute
38{
Pablo Tello62eeae42017-08-09 16:33:49 +010039namespace
40{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010041inline void fill_constant_value_single_channel_special(ITensor *tensor,
42 const Window &window,
43 unsigned int right,
44 unsigned int bottom,
45 const PixelValue &constant_border_value)
Pablo Tello62eeae42017-08-09 16:33:49 +010046{
47 float border_value;
48 constant_border_value.get(border_value);
49 uint8_t *const start_valid_region = tensor->ptr_to_element(tensor->info()->valid_region().anchor);
Georgios Pinitas0223a782017-12-12 11:44:44 +000050 const size_t width = tensor->info()->valid_region().shape[0];
51 const size_t height = tensor->info()->valid_region().shape[1];
Pablo Tello62eeae42017-08-09 16:33:49 +010052 const int stridey = tensor->info()->strides_in_bytes()[1];
53
54 // Left and right border
55 Window vertical(window);
56 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
57
58 Iterator vertical_it(tensor, vertical);
59
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010060 execute_window_loop(
61 vertical,
62 [&](const Coordinates &)
63 {
64 const auto row_start = reinterpret_cast<float *>(start_valid_region + vertical_it.offset());
Pablo Tello62eeae42017-08-09 16:33:49 +010065
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010066 // Fill left and right borders
67 *(row_start - 1) = border_value;
68 std::fill_n(row_start + width, right, border_value);
69 },
70 vertical_it);
Pablo Tello62eeae42017-08-09 16:33:49 +010071
72 // Top and bottom border
73 Iterator plane_it(tensor, window);
74
75 // Iterate over all XY planes
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010076 execute_window_loop(
77 window,
78 [&](const Coordinates &)
Pablo Tello62eeae42017-08-09 16:33:49 +010079 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010080 uint8_t *base_addr = start_valid_region + plane_it.offset();
81 // Top border
82 const auto row_start = reinterpret_cast<float *>(base_addr - stridey);
83 // Fill top rows including left/right borders
Pablo Tello62eeae42017-08-09 16:33:49 +010084 std::fill_n(row_start - 1, 1 + width + right, border_value);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010085
86 // Bottom border
87 const unsigned low_border_size = height + bottom;
88 for (unsigned int i = height; i < low_border_size; ++i)
89 {
90 const auto row_start = reinterpret_cast<float *>(base_addr + i * stridey);
91
92 // Fill bottom rows including left/right borders
93 std::fill_n(row_start - 1, 1 + width + right, border_value);
94 }
95 },
96 plane_it);
Pablo Tello62eeae42017-08-09 16:33:49 +010097}
98} // namespace
99
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100100NEFillBorderKernel::NEFillBorderKernel()
Michalis Spyrou490bf2e2017-09-29 11:24:55 +0100101 : _tensor(nullptr), _border_size(0), _mode(BorderMode::UNDEFINED), _constant_border_value(static_cast<float>(0.f))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100102{
103}
104
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100105void NEFillBorderKernel::configure(ITensor *tensor,
106 BorderSize border_size,
107 BorderMode border_mode,
108 const PixelValue &constant_border_value)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100109{
Georgios Pinitas33843562019-12-10 13:33:18 +0000110 ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
Michele Di Giorgio19289042021-02-03 16:05:00 +0000111 _tensor = tensor;
112 configure(tensor->info(), border_size, border_mode, constant_border_value);
113}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100114
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100115void NEFillBorderKernel::configure(ITensorInfo *tensor,
116 BorderSize border_size,
117 BorderMode border_mode,
118 const PixelValue &constant_border_value)
Michele Di Giorgio19289042021-02-03 16:05:00 +0000119{
120 ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
Michele Di Giorgio33f41fa2021-03-09 14:09:08 +0000121 //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions.
Michele Di Giorgio19289042021-02-03 16:05:00 +0000122 ARM_COMPUTE_ERROR_ON(tensor->data_type() == DataType::UNKNOWN);
123
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100124 _border_size = border_size;
125 _mode = border_mode;
126 _constant_border_value = constant_border_value;
127
Michele Di Giorgio19289042021-02-03 16:05:00 +0000128 _border_size.limit(tensor->padding());
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100129
130 Window win;
131 win.set(Window::DimX, Window::Dimension(0, 1, 1));
132 win.set(Window::DimY, Window::Dimension(0, 1, 1));
Michele Di Giorgio19289042021-02-03 16:05:00 +0000133 win.use_tensor_dimensions(tensor->tensor_shape(), Window::DimZ);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100134 INEKernel::configure(win);
135}
136
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100137void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100138{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100139 ARM_COMPUTE_UNUSED(info);
140
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100141 // If there is no border: early exit
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100142 if (_border_size.empty())
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100143 {
144 return;
145 }
146
147 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
148 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
149
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100150 switch (_mode)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100151 {
152 case BorderMode::CONSTANT:
153 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100154 if (_border_size.left == 1 && _border_size.top == 1 && _tensor->info()->data_type() == DataType::F32)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100155 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100156 fill_constant_value_single_channel_special(_tensor, window, _border_size.right, _border_size.bottom,
157 _constant_border_value);
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000158 }
159 else
160 {
161 fill_constant_value_single_channel(window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100162 }
163 break;
164 }
165 case BorderMode::REPLICATE:
166 {
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000167 fill_replicate_single_channel(window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100168 break;
169 }
170 case BorderMode::UNDEFINED:
171 break; // Nothing to do here
172 default:
173 ARM_COMPUTE_ERROR("Unknown border mode");
174 }
175}
176
Michele Di Giorgio19289042021-02-03 16:05:00 +0000177void NEFillBorderKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
178{
179 _tensor = tensors.get_tensor(TensorType::ACL_SRC_DST);
180 run(window, info);
181}
182
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100183void NEFillBorderKernel::fill_replicate_single_channel(const Window &window)
184{
185 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
Georgios Pinitas424eb5d2017-12-06 19:49:38 +0000186 const size_t width = _tensor->info()->valid_region().shape[0];
187 const size_t height = _tensor->info()->valid_region().shape[1];
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000188 const size_t element_size = _tensor->info()->element_size();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100189 // Left and right border
190 Window vertical(window);
191 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
192
193 Iterator vertical_it(_tensor, vertical);
194
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100195 execute_window_loop(
196 vertical,
197 [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100198 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100199 uint8_t *base_addr = start_valid_region + vertical_it.offset();
200 // Fill left and right borders
201 for (unsigned int i = 0; i < _border_size.left; ++i)
202 {
203 std::memcpy(base_addr + static_cast<int>(i - _border_size.left) * element_size, vertical_it.ptr(),
204 element_size);
205 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100206
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100207 for (unsigned int i = 0; i < _border_size.right; ++i)
208 {
209 std::memcpy(base_addr + (width + i) * element_size, vertical_it.ptr() + (width - 1) * element_size,
210 element_size);
211 }
212 },
213 vertical_it);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100214
215 // Top and bottom border
216 Iterator plane_it(_tensor, window);
217
218 // Iterate over all XY planes
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100219 execute_window_loop(
220 window,
221 [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100222 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100223 uint8_t *base_addr = start_valid_region + plane_it.offset();
224 // Top border
225 for (int i = -_border_size.top; i < 0; ++i)
226 {
227 // Copy top rows including left/right borders
228 std::memcpy(base_addr + i * static_cast<int>(_tensor->info()->strides_in_bytes()[1]) -
229 _border_size.left * element_size,
230 base_addr - _border_size.left * element_size,
231 (_border_size.left + width + _border_size.right) * element_size);
232 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100233
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100234 // Bottom border
235 for (unsigned int i = height; i < height + _border_size.bottom; ++i)
236 {
237 // Copy bottom rows including left/right borders
238 std::memcpy(base_addr + i * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size,
239 base_addr + (height - 1) * _tensor->info()->strides_in_bytes()[1] -
240 _border_size.left * element_size,
241 (_border_size.left + width + _border_size.right) * element_size);
242 }
243 },
244 plane_it);
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000245}
246
247void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window)
248{
249 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
250 const size_t width = _tensor->info()->valid_region().shape[0];
251 const size_t height = _tensor->info()->valid_region().shape[1];
252 const int stridey = _tensor->info()->strides_in_bytes()[1];
253 const size_t element_size = _tensor->info()->element_size();
254
255 // Left and right border
256 Window vertical(window);
257 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
258
259 Iterator vertical_it(_tensor, vertical);
260
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100261 execute_window_loop(
262 vertical,
263 [&](const Coordinates &)
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000264 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100265 uint8_t *base_addr = start_valid_region + vertical_it.offset();
266 // Fill left and right borders
267 for (unsigned int i = 0; i < _border_size.left; ++i)
268 {
269 std::memcpy(base_addr + static_cast<int>(i - _border_size.left) * element_size, &_constant_border_value,
270 element_size);
271 }
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000272
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100273 for (unsigned int i = 0; i < _border_size.right; ++i)
274 {
275 std::memcpy(base_addr + (width + i) * element_size, &_constant_border_value, element_size);
276 }
277 },
278 vertical_it);
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000279
280 // Top and bottom border
281 Iterator plane_it(_tensor, window);
282
283 // Iterate over all XY planes
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100284 execute_window_loop(
285 window,
286 [&](const Coordinates &)
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000287 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100288 uint8_t *base_addr = start_valid_region + plane_it.offset();
289 // Top border
290 for (int i = -_border_size.top; i < 0; ++i)
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000291 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100292 // Fill top rows including left/right borders
293 for (unsigned int j = 0; j < (_border_size.left + width + _border_size.right); ++j)
294 {
295 std::memcpy(base_addr + i * stridey + static_cast<int>(j - _border_size.left) * element_size,
296 &_constant_border_value, element_size);
297 }
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000298 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100299
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100300 // Bottom border
301 const unsigned low_border_size = height + _border_size.bottom;
302 for (unsigned int i = height; i < low_border_size; ++i)
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000303 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100304 // Fill bottom rows including left/right borders
305 for (unsigned int j = 0; j < (_border_size.left + width + _border_size.right); ++j)
306 {
307 std::memcpy(base_addr + i * stridey + static_cast<int>(j - _border_size.left) * element_size,
308 &_constant_border_value, element_size);
309 }
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000310 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100311 },
312 plane_it);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100313}
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000314} // namespace arm_compute