blob: aef4d4865a881ee9b98124a69f57cc43ce73b309 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Georgios Pinitas55186712018-01-08 17:37:12 +00002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/TensorInfo.h"
Georgios Pinitas583137c2017-08-31 18:12:42 +010030#include "arm_compute/core/Types.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010031#include "arm_compute/core/Validate.h"
32#include "arm_compute/core/Window.h"
33
34#include <algorithm>
35#include <cstdint>
36
37using namespace arm_compute;
38
Pablo Tello62eeae42017-08-09 16:33:49 +010039namespace
40{
41template <typename T, unsigned int leftx, unsigned int rightx>
42void fill_constant_value_single_channel_special(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value);
43
44template <>
45inline void fill_constant_value_single_channel_special<float, 1u, 1u>(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value)
46{
47 float border_value;
48 constant_border_value.get(border_value);
49 uint8_t *const start_valid_region = tensor->ptr_to_element(tensor->info()->valid_region().anchor);
Georgios Pinitas0223a782017-12-12 11:44:44 +000050 const size_t width = tensor->info()->valid_region().shape[0];
51 const size_t height = tensor->info()->valid_region().shape[1];
Pablo Tello62eeae42017-08-09 16:33:49 +010052 const int stridey = tensor->info()->strides_in_bytes()[1];
53
54 // Left and right border
55 Window vertical(window);
56 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
57
58 Iterator vertical_it(tensor, vertical);
59
60 execute_window_loop(vertical, [&](const Coordinates &)
61 {
62 const auto row_start = reinterpret_cast<float *>(start_valid_region + vertical_it.offset());
63
64 // Fill left and right borders
65 *(row_start - 1) = border_value;
66 std::fill_n(row_start + width, right, border_value);
67 },
68 vertical_it);
69
70 // Top and bottom border
71 Iterator plane_it(tensor, window);
72
73 // Iterate over all XY planes
74 execute_window_loop(window, [&](const Coordinates &)
75 {
76 uint8_t *base_addr = start_valid_region + plane_it.offset();
77 // Top border
78 const auto row_start = reinterpret_cast<float *>(base_addr - stridey);
79 // Fill top rows including left/right borders
80 std::fill_n(row_start - 1, 1 + width + right, border_value);
81
82 // Bottom border
83 const unsigned low_border_size = height + bottom;
84 for(unsigned int i = height; i < low_border_size; ++i)
85 {
86 const auto row_start = reinterpret_cast<float *>(base_addr + i * stridey);
87
88 // Fill bottom rows including left/right borders
89 std::fill_n(row_start - 1, 1 + width + right, border_value);
90 }
91 },
92 plane_it);
93}
94} // namespace
95
Anthony Barbier6ff3b192017-09-04 18:44:23 +010096namespace arm_compute
97{
98class Coordinates;
99} // namespace arm_compute
100
101NEFillBorderKernel::NEFillBorderKernel()
Michalis Spyrou490bf2e2017-09-29 11:24:55 +0100102 : _tensor(nullptr), _border_size(0), _mode(BorderMode::UNDEFINED), _constant_border_value(static_cast<float>(0.f))
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100103{
104}
105
106void NEFillBorderKernel::configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
107{
Anthony Barbiereaefd002018-07-20 17:49:35 +0100108 //Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions.
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +0100109 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::U8, DataType::QASYMM8,
110 DataType::U16, DataType::S16,
Georgios Pinitas55186712018-01-08 17:37:12 +0000111 DataType::U32, DataType::S32,
112 DataType::F16, DataType::F32);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100113
114 _tensor = tensor;
115 _border_size = border_size;
116 _mode = border_mode;
117 _constant_border_value = constant_border_value;
118
119 _border_size.limit(tensor->info()->padding());
120
121 Window win;
122 win.set(Window::DimX, Window::Dimension(0, 1, 1));
123 win.set(Window::DimY, Window::Dimension(0, 1, 1));
SiCong Li86b53332017-08-23 11:02:43 +0100124 win.use_tensor_dimensions(_tensor->info()->tensor_shape(), Window::DimZ);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100125 INEKernel::configure(win);
126}
127
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100128void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100129{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100130 ARM_COMPUTE_UNUSED(info);
131
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100132 // If there is no border: early exit
133 if(_border_size.empty())
134 {
135 return;
136 }
137
138 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
139 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
140
141 switch(_mode)
142 {
143 case BorderMode::CONSTANT:
144 {
145 switch(_tensor->info()->data_type())
146 {
Georgios Pinitas55186712018-01-08 17:37:12 +0000147 case DataType::QASYMM8:
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100148 case DataType::U8:
149 fill_constant_value_single_channel<uint8_t>(window);
150 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100151 case DataType::S8:
152 fill_constant_value_single_channel<int8_t>(window);
153 break;
154 case DataType::U16:
155 fill_constant_value_single_channel<uint16_t>(window);
156 break;
157 case DataType::S16:
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100158 fill_constant_value_single_channel<int16_t>(window);
159 break;
160 case DataType::U32:
161 fill_constant_value_single_channel<uint32_t>(window);
162 break;
163 case DataType::S32:
164 fill_constant_value_single_channel<int32_t>(window);
165 break;
Pablo Tello0c34fe22017-06-26 17:17:42 +0100166 case DataType::F16:
Georgios Pinitas583137c2017-08-31 18:12:42 +0100167 static_assert(sizeof(half) == 2, "Float16_t must be 16 bit");
168 fill_constant_value_single_channel<half>(window);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100169 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100170 case DataType::F32:
171 static_assert(sizeof(float) == 4, "Float must be 32 bit");
Pablo Tello62eeae42017-08-09 16:33:49 +0100172 if(_border_size.left == 1 && _border_size.top == 1)
Georgios Pinitas583137c2017-08-31 18:12:42 +0100173 {
Pablo Tello62eeae42017-08-09 16:33:49 +0100174 fill_constant_value_single_channel_special<float, 1u, 1u>(_tensor, window, _border_size.right, _border_size.bottom, _constant_border_value);
Georgios Pinitas583137c2017-08-31 18:12:42 +0100175 }
Pablo Tello62eeae42017-08-09 16:33:49 +0100176 else
Georgios Pinitas583137c2017-08-31 18:12:42 +0100177 {
Pablo Tello62eeae42017-08-09 16:33:49 +0100178 fill_constant_value_single_channel<float>(window);
Georgios Pinitas583137c2017-08-31 18:12:42 +0100179 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100180 break;
181 default:
182 ARM_COMPUTE_ERROR("Not handled");
183 }
184 break;
185 }
186 case BorderMode::REPLICATE:
187 {
188 switch(_tensor->info()->data_type())
189 {
Georgios Pinitas55186712018-01-08 17:37:12 +0000190 case DataType::QASYMM8:
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100191 case DataType::U8:
192 fill_replicate_single_channel<uint8_t>(window);
193 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100194 case DataType::S8:
195 fill_replicate_single_channel<int8_t>(window);
196 break;
197 case DataType::U16:
198 fill_replicate_single_channel<uint16_t>(window);
199 break;
200 case DataType::S16:
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100201 fill_replicate_single_channel<int16_t>(window);
202 break;
203 case DataType::U32:
204 fill_replicate_single_channel<uint32_t>(window);
205 break;
206 case DataType::S32:
207 fill_replicate_single_channel<int32_t>(window);
208 break;
Pablo Tello0c34fe22017-06-26 17:17:42 +0100209 case DataType::F16:
Georgios Pinitas583137c2017-08-31 18:12:42 +0100210 static_assert(sizeof(half) == 2, "Float16_t must be 16 bit");
211 fill_replicate_single_channel<half>(window);
Pablo Tello0c34fe22017-06-26 17:17:42 +0100212 break;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100213 case DataType::F32:
214 static_assert(sizeof(float) == 4, "Float must be 32 bit");
215 fill_replicate_single_channel<float>(window);
216 break;
217 default:
218 ARM_COMPUTE_ERROR("Not handled");
219 }
220 break;
221 }
222 case BorderMode::UNDEFINED:
223 break; // Nothing to do here
224 default:
225 ARM_COMPUTE_ERROR("Unknown border mode");
226 }
227}
228
229template <typename T>
230void NEFillBorderKernel::fill_replicate_single_channel(const Window &window)
231{
232 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
Georgios Pinitas424eb5d2017-12-06 19:49:38 +0000233 const size_t width = _tensor->info()->valid_region().shape[0];
234 const size_t height = _tensor->info()->valid_region().shape[1];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100235
236 // Left and right border
237 Window vertical(window);
238 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
239
240 Iterator vertical_it(_tensor, vertical);
241
242 execute_window_loop(vertical, [&](const Coordinates & id)
243 {
244 const auto row_start = reinterpret_cast<T *>(start_valid_region + vertical_it.offset());
245 const auto left_val = *reinterpret_cast<T *>(vertical_it.ptr());
246 const auto right_val = *(reinterpret_cast<T *>(vertical_it.ptr()) + width - 1);
247
248 // Fill left and right borders
249 std::fill_n(row_start - _border_size.left, _border_size.left, left_val);
250 std::fill_n(row_start + width, _border_size.right, right_val);
251 },
252 vertical_it);
253
254 // Top and bottom border
255 Iterator plane_it(_tensor, window);
256
257 // Iterate over all XY planes
258 execute_window_loop(window, [&](const Coordinates & id)
259 {
260 const auto first_row = reinterpret_cast<T *>(start_valid_region + plane_it.offset());
261
262 // Top border
263 for(int i = -_border_size.top; i < 0; ++i)
264 {
265 const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
266
267 // Copy top rows including left/right borders
268 std::copy_n(first_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left);
269 }
270
271 const auto last_row = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + (height - 1) * _tensor->info()->strides_in_bytes()[1]);
272
273 // Bottom border
274 for(unsigned int i = height; i < height + _border_size.bottom; ++i)
275 {
276 const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
277
278 // Copy bottom rows including left/right borders
279 std::copy_n(last_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left);
280 }
281 },
282 plane_it);
283}
284
285template <typename T>
286void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window)
287{
288 T constant_border_value;
289 _constant_border_value.get(constant_border_value);
290
291 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
Georgios Pinitas424eb5d2017-12-06 19:49:38 +0000292 const size_t width = _tensor->info()->valid_region().shape[0];
293 const size_t height = _tensor->info()->valid_region().shape[1];
Pablo Tello62eeae42017-08-09 16:33:49 +0100294 const int stridey = _tensor->info()->strides_in_bytes()[1];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100295
296 // Left and right border
297 Window vertical(window);
298 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
299
300 Iterator vertical_it(_tensor, vertical);
301
302 execute_window_loop(vertical, [&](const Coordinates & id)
303 {
304 const auto row_start = reinterpret_cast<T *>(start_valid_region + vertical_it.offset());
305
306 // Fill left and right borders
307 std::fill_n(row_start - _border_size.left, _border_size.left, constant_border_value);
308 std::fill_n(row_start + width, _border_size.right, constant_border_value);
309 },
310 vertical_it);
311
312 // Top and bottom border
313 Iterator plane_it(_tensor, window);
314
315 // Iterate over all XY planes
316 execute_window_loop(window, [&](const Coordinates & id)
317 {
Pablo Tello62eeae42017-08-09 16:33:49 +0100318 uint8_t *base_addr = start_valid_region + plane_it.offset();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100319 // Top border
320 for(int i = -_border_size.top; i < 0; ++i)
321 {
Pablo Tello62eeae42017-08-09 16:33:49 +0100322 const auto row_start = reinterpret_cast<T *>(base_addr + i * stridey);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100323
324 // Fill top rows including left/right borders
325 std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value);
326 }
327
328 // Bottom border
Pablo Tello62eeae42017-08-09 16:33:49 +0100329 const unsigned low_border_size = height + _border_size.bottom;
330 for(unsigned int i = height; i < low_border_size; ++i)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100331 {
Pablo Tello62eeae42017-08-09 16:33:49 +0100332 const auto row_start = reinterpret_cast<T *>(base_addr + i * stridey);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100333
334 // Fill bottom rows including left/right borders
335 std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value);
336 }
337 },
338 plane_it);
339}