blob: 75d46c61d8d9c3890673c79facf07019b7227a7a [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Manuel Bottinib412fab2018-12-10 17:40:23 +00002 * Copyright (c) 2016-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/TensorInfo.h"
Georgios Pinitas583137c2017-08-31 18:12:42 +010030#include "arm_compute/core/Types.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031#include "arm_compute/core/Validate.h"
32#include "arm_compute/core/Window.h"
33
34#include <algorithm>
35#include <cstdint>
36
Michalis Spyrou95abfdd2018-11-28 14:59:47 +000037namespace arm_compute
38{
39class Coordinates;
Pablo Tello62eeae42017-08-09 16:33:49 +010040namespace
41{
Michalis Spyrou95abfdd2018-11-28 14:59:47 +000042inline void fill_constant_value_single_channel_special(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value)
Pablo Tello62eeae42017-08-09 16:33:49 +010043{
44 float border_value;
45 constant_border_value.get(border_value);
46 uint8_t *const start_valid_region = tensor->ptr_to_element(tensor->info()->valid_region().anchor);
Georgios Pinitas0223a782017-12-12 11:44:44 +000047 const size_t width = tensor->info()->valid_region().shape[0];
48 const size_t height = tensor->info()->valid_region().shape[1];
Pablo Tello62eeae42017-08-09 16:33:49 +010049 const int stridey = tensor->info()->strides_in_bytes()[1];
50
51 // Left and right border
52 Window vertical(window);
53 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
54
55 Iterator vertical_it(tensor, vertical);
56
57 execute_window_loop(vertical, [&](const Coordinates &)
58 {
59 const auto row_start = reinterpret_cast<float *>(start_valid_region + vertical_it.offset());
60
61 // Fill left and right borders
62 *(row_start - 1) = border_value;
63 std::fill_n(row_start + width, right, border_value);
64 },
65 vertical_it);
66
67 // Top and bottom border
68 Iterator plane_it(tensor, window);
69
70 // Iterate over all XY planes
71 execute_window_loop(window, [&](const Coordinates &)
72 {
73 uint8_t *base_addr = start_valid_region + plane_it.offset();
74 // Top border
75 const auto row_start = reinterpret_cast<float *>(base_addr - stridey);
76 // Fill top rows including left/right borders
77 std::fill_n(row_start - 1, 1 + width + right, border_value);
78
79 // Bottom border
80 const unsigned low_border_size = height + bottom;
81 for(unsigned int i = height; i < low_border_size; ++i)
82 {
83 const auto row_start = reinterpret_cast<float *>(base_addr + i * stridey);
84
85 // Fill bottom rows including left/right borders
86 std::fill_n(row_start - 1, 1 + width + right, border_value);
87 }
88 },
89 plane_it);
90}
91} // namespace
92
Anthony Barbier6ff3b192017-09-04 18:44:23 +010093NEFillBorderKernel::NEFillBorderKernel()
Michalis Spyrou490bf2e2017-09-29 11:24:55 +010094 : _tensor(nullptr), _border_size(0), _mode(BorderMode::UNDEFINED), _constant_border_value(static_cast<float>(0.f))
Anthony Barbier6ff3b192017-09-04 18:44:23 +010095{
96}
97
98void NEFillBorderKernel::configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
99{
Georgios Pinitas33843562019-12-10 13:33:18 +0000100 ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
Anthony Barbiereaefd002018-07-20 17:49:35 +0100101 //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
Georgios Pinitas33843562019-12-10 13:33:18 +0000102 ARM_COMPUTE_ERROR_ON(tensor->info()->data_type() == DataType::UNKNOWN);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100103
104 _tensor = tensor;
105 _border_size = border_size;
106 _mode = border_mode;
107 _constant_border_value = constant_border_value;
108
109 _border_size.limit(tensor->info()->padding());
110
111 Window win;
112 win.set(Window::DimX, Window::Dimension(0, 1, 1));
113 win.set(Window::DimY, Window::Dimension(0, 1, 1));
SiCong Li86b53332017-08-23 11:02:43 +0100114 win.use_tensor_dimensions(_tensor->info()->tensor_shape(), Window::DimZ);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100115 INEKernel::configure(win);
116}
117
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100118void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100119{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100120 ARM_COMPUTE_UNUSED(info);
121
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100122 // If there is no border: early exit
123 if(_border_size.empty())
124 {
125 return;
126 }
127
128 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
129 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
130
131 switch(_mode)
132 {
133 case BorderMode::CONSTANT:
134 {
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000135 if(_border_size.left == 1 && _border_size.top == 1 && _tensor->info()->data_type() == DataType::F32)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100136 {
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000137 fill_constant_value_single_channel_special(_tensor, window, _border_size.right, _border_size.bottom, _constant_border_value);
138 }
139 else
140 {
141 fill_constant_value_single_channel(window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100142 }
143 break;
144 }
145 case BorderMode::REPLICATE:
146 {
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000147 fill_replicate_single_channel(window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100148 break;
149 }
150 case BorderMode::UNDEFINED:
151 break; // Nothing to do here
152 default:
153 ARM_COMPUTE_ERROR("Unknown border mode");
154 }
155}
156
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100157void NEFillBorderKernel::fill_replicate_single_channel(const Window &window)
158{
159 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
Georgios Pinitas424eb5d2017-12-06 19:49:38 +0000160 const size_t width = _tensor->info()->valid_region().shape[0];
161 const size_t height = _tensor->info()->valid_region().shape[1];
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000162 const size_t element_size = _tensor->info()->element_size();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100163 // Left and right border
164 Window vertical(window);
165 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
166
167 Iterator vertical_it(_tensor, vertical);
168
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100169 execute_window_loop(vertical, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100170 {
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000171 uint8_t *base_addr = start_valid_region + vertical_it.offset();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100172 // Fill left and right borders
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000173 for(unsigned int i = 0; i < _border_size.left; ++i)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100174 {
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000175 std::memcpy(base_addr + static_cast<int>(i - _border_size.left) * element_size, vertical_it.ptr(), element_size);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100176 }
177
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000178 for(unsigned int i = 0; i < _border_size.right; ++i)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100179 {
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000180 std::memcpy(base_addr + (width + i) * element_size, vertical_it.ptr() + (width - 1) * element_size, element_size);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100181 }
182 },
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100183 vertical_it);
184
185 // Top and bottom border
186 Iterator plane_it(_tensor, window);
187
188 // Iterate over all XY planes
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100189 execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100190 {
Pablo Tello62eeae42017-08-09 16:33:49 +0100191 uint8_t *base_addr = start_valid_region + plane_it.offset();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100192 // Top border
193 for(int i = -_border_size.top; i < 0; ++i)
194 {
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000195 // Copy top rows including left/right borders
Michalis Spyrou7c60c992019-10-10 14:33:47 +0100196 std::memcpy(base_addr + i * static_cast<int>(_tensor->info()->strides_in_bytes()[1]) - _border_size.left * element_size,
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000197 base_addr - _border_size.left * element_size, (_border_size.left + width + _border_size.right) * element_size);
198 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100199
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000200 // Bottom border
201 for(unsigned int i = height; i < height + _border_size.bottom; ++i)
202 {
203 // Copy bottom rows including left/right borders
204 std::memcpy(base_addr + i * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size,
205 base_addr + (height - 1) * _tensor->info()->strides_in_bytes()[1] - _border_size.left * element_size, (_border_size.left + width + _border_size.right) * element_size);
206 }
207 },
208 plane_it);
209}
210
211void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window)
212{
213 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
214 const size_t width = _tensor->info()->valid_region().shape[0];
215 const size_t height = _tensor->info()->valid_region().shape[1];
216 const int stridey = _tensor->info()->strides_in_bytes()[1];
217 const size_t element_size = _tensor->info()->element_size();
218
219 // Left and right border
220 Window vertical(window);
221 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
222
223 Iterator vertical_it(_tensor, vertical);
224
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100225 execute_window_loop(vertical, [&](const Coordinates &)
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000226 {
227 uint8_t *base_addr = start_valid_region + vertical_it.offset();
228 // Fill left and right borders
229 for(unsigned int i = 0; i < _border_size.left; ++i)
230 {
231 std::memcpy(base_addr + static_cast<int>(i - _border_size.left) * element_size, &_constant_border_value, element_size);
232 }
233
234 for(unsigned int i = 0; i < _border_size.right; ++i)
235 {
236 std::memcpy(base_addr + (width + i) * element_size, &_constant_border_value, element_size);
237 }
238 },
239 vertical_it);
240
241 // Top and bottom border
242 Iterator plane_it(_tensor, window);
243
244 // Iterate over all XY planes
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100245 execute_window_loop(window, [&](const Coordinates &)
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000246 {
247 uint8_t *base_addr = start_valid_region + plane_it.offset();
248 // Top border
249 for(int i = -_border_size.top; i < 0; ++i)
250 {
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100251 // Fill top rows including left/right borders
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000252 for(unsigned int j = 0; j < (_border_size.left + width + _border_size.right); ++j)
253 {
254 std::memcpy(base_addr + i * stridey + static_cast<int>(j - _border_size.left) * element_size, &_constant_border_value, element_size);
255 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100256 }
257
258 // Bottom border
Pablo Tello62eeae42017-08-09 16:33:49 +0100259 const unsigned low_border_size = height + _border_size.bottom;
260 for(unsigned int i = height; i < low_border_size; ++i)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100261 {
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100262 // Fill bottom rows including left/right borders
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000263 for(unsigned int j = 0; j < (_border_size.left + width + _border_size.right); ++j)
264 {
265 std::memcpy(base_addr + i * stridey + static_cast<int>(j - _border_size.left) * element_size, &_constant_border_value, element_size);
266 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100267 }
268 },
269 plane_it);
270}
Michalis Spyrou95abfdd2018-11-28 14:59:47 +0000271} // namespace arm_compute