blob: 3f1f678a7edd620de1fa181ecf79e62140c9d1e9 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/Validate.h"
31#include "arm_compute/core/Window.h"
32
33#include <algorithm>
34#include <cstdint>
35
Pablo Tello0c34fe22017-06-26 17:17:42 +010036#if ARM_COMPUTE_ENABLE_FP16
37#include <arm_fp16.h> // needed for float16_t
38#endif /* ARM_COMPUTE_ENABLE_FP16 */
39
Anthony Barbier6ff3b192017-09-04 18:44:23 +010040using namespace arm_compute;
41
Pablo Tello62eeae42017-08-09 16:33:49 +010042namespace
43{
44template <typename T, unsigned int leftx, unsigned int rightx>
45void fill_constant_value_single_channel_special(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value);
46
47template <>
48inline void fill_constant_value_single_channel_special<float, 1u, 1u>(ITensor *tensor, const Window &window, unsigned int right, unsigned int bottom, const PixelValue &constant_border_value)
49{
50 float border_value;
51 constant_border_value.get(border_value);
52 uint8_t *const start_valid_region = tensor->ptr_to_element(tensor->info()->valid_region().anchor);
53 const size_t &width = tensor->info()->valid_region().shape[0];
54 const size_t &height = tensor->info()->valid_region().shape[1];
55 const int stridey = tensor->info()->strides_in_bytes()[1];
56
57 // Left and right border
58 Window vertical(window);
59 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
60
61 Iterator vertical_it(tensor, vertical);
62
63 execute_window_loop(vertical, [&](const Coordinates &)
64 {
65 const auto row_start = reinterpret_cast<float *>(start_valid_region + vertical_it.offset());
66
67 // Fill left and right borders
68 *(row_start - 1) = border_value;
69 std::fill_n(row_start + width, right, border_value);
70 },
71 vertical_it);
72
73 // Top and bottom border
74 Iterator plane_it(tensor, window);
75
76 // Iterate over all XY planes
77 execute_window_loop(window, [&](const Coordinates &)
78 {
79 uint8_t *base_addr = start_valid_region + plane_it.offset();
80 // Top border
81 const auto row_start = reinterpret_cast<float *>(base_addr - stridey);
82 // Fill top rows including left/right borders
83 std::fill_n(row_start - 1, 1 + width + right, border_value);
84
85 // Bottom border
86 const unsigned low_border_size = height + bottom;
87 for(unsigned int i = height; i < low_border_size; ++i)
88 {
89 const auto row_start = reinterpret_cast<float *>(base_addr + i * stridey);
90
91 // Fill bottom rows including left/right borders
92 std::fill_n(row_start - 1, 1 + width + right, border_value);
93 }
94 },
95 plane_it);
96}
97} // namespace
98
Anthony Barbier6ff3b192017-09-04 18:44:23 +010099namespace arm_compute
100{
101class Coordinates;
102} // namespace arm_compute
103
104NEFillBorderKernel::NEFillBorderKernel()
105 : _tensor(nullptr), _border_size(0), _mode(BorderMode::UNDEFINED), _constant_border_value(0)
106{
107}
108
109void NEFillBorderKernel::configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
110{
Pablo Tello0c34fe22017-06-26 17:17:42 +0100111 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::U16, DataType::S16, DataType::F16, DataType::U32, DataType::S32, DataType::F32);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100112
113 _tensor = tensor;
114 _border_size = border_size;
115 _mode = border_mode;
116 _constant_border_value = constant_border_value;
117
118 _border_size.limit(tensor->info()->padding());
119
120 Window win;
121 win.set(Window::DimX, Window::Dimension(0, 1, 1));
122 win.set(Window::DimY, Window::Dimension(0, 1, 1));
SiCong Li86b53332017-08-23 11:02:43 +0100123 win.use_tensor_dimensions(_tensor->info()->tensor_shape(), Window::DimZ);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100124 INEKernel::configure(win);
125}
126
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100127void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100128{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100129 ARM_COMPUTE_UNUSED(info);
130
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100131 // If there is no border: early exit
132 if(_border_size.empty())
133 {
134 return;
135 }
136
137 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
138 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
139
140 switch(_mode)
141 {
142 case BorderMode::CONSTANT:
143 {
144 switch(_tensor->info()->data_type())
145 {
146 case DataType::U8:
147 fill_constant_value_single_channel<uint8_t>(window);
148 break;
149 case DataType::QS8:
150 case DataType::S8:
151 fill_constant_value_single_channel<int8_t>(window);
152 break;
153 case DataType::U16:
154 fill_constant_value_single_channel<uint16_t>(window);
155 break;
156 case DataType::S16:
157 case DataType::QS16:
158 fill_constant_value_single_channel<int16_t>(window);
159 break;
160 case DataType::U32:
161 fill_constant_value_single_channel<uint32_t>(window);
162 break;
163 case DataType::S32:
164 fill_constant_value_single_channel<int32_t>(window);
165 break;
Pablo Tello0c34fe22017-06-26 17:17:42 +0100166#ifdef ARM_COMPUTE_ENABLE_FP16
167 case DataType::F16:
168 static_assert(sizeof(float16_t) == 2, "Float16_t must be 16 bit");
169 fill_constant_value_single_channel<float16_t>(window);
170 break;
171#endif /* ARM_COMPUTE_ENABLE_FP16 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100172 case DataType::F32:
173 static_assert(sizeof(float) == 4, "Float must be 32 bit");
Pablo Tello62eeae42017-08-09 16:33:49 +0100174 if(_border_size.left == 1 && _border_size.top == 1)
175 fill_constant_value_single_channel_special<float, 1u, 1u>(_tensor, window, _border_size.right, _border_size.bottom, _constant_border_value);
176 else
177 fill_constant_value_single_channel<float>(window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100178 break;
179 default:
180 ARM_COMPUTE_ERROR("Not handled");
181 }
182 break;
183 }
184 case BorderMode::REPLICATE:
185 {
186 switch(_tensor->info()->data_type())
187 {
188 case DataType::U8:
189 fill_replicate_single_channel<uint8_t>(window);
190 break;
191 case DataType::QS8:
192 case DataType::S8:
193 fill_replicate_single_channel<int8_t>(window);
194 break;
195 case DataType::U16:
196 fill_replicate_single_channel<uint16_t>(window);
197 break;
198 case DataType::S16:
199 case DataType::QS16:
200 fill_replicate_single_channel<int16_t>(window);
201 break;
202 case DataType::U32:
203 fill_replicate_single_channel<uint32_t>(window);
204 break;
205 case DataType::S32:
206 fill_replicate_single_channel<int32_t>(window);
207 break;
Pablo Tello0c34fe22017-06-26 17:17:42 +0100208#ifdef ARM_COMPUTE_ENABLE_FP16
209 case DataType::F16:
210 static_assert(sizeof(float16_t) == 2, "Float16_t must be 16 bit");
211 fill_replicate_single_channel<float16_t>(window);
212 break;
213#endif /* ARM_COMPUTE_ENABLE_FP16 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100214 case DataType::F32:
215 static_assert(sizeof(float) == 4, "Float must be 32 bit");
216 fill_replicate_single_channel<float>(window);
217 break;
218 default:
219 ARM_COMPUTE_ERROR("Not handled");
220 }
221 break;
222 }
223 case BorderMode::UNDEFINED:
224 break; // Nothing to do here
225 default:
226 ARM_COMPUTE_ERROR("Unknown border mode");
227 }
228}
229
230template <typename T>
231void NEFillBorderKernel::fill_replicate_single_channel(const Window &window)
232{
233 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
234 const size_t &width = _tensor->info()->valid_region().shape[0];
235 const size_t &height = _tensor->info()->valid_region().shape[1];
236
237 // Left and right border
238 Window vertical(window);
239 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
240
241 Iterator vertical_it(_tensor, vertical);
242
243 execute_window_loop(vertical, [&](const Coordinates & id)
244 {
245 const auto row_start = reinterpret_cast<T *>(start_valid_region + vertical_it.offset());
246 const auto left_val = *reinterpret_cast<T *>(vertical_it.ptr());
247 const auto right_val = *(reinterpret_cast<T *>(vertical_it.ptr()) + width - 1);
248
249 // Fill left and right borders
250 std::fill_n(row_start - _border_size.left, _border_size.left, left_val);
251 std::fill_n(row_start + width, _border_size.right, right_val);
252 },
253 vertical_it);
254
255 // Top and bottom border
256 Iterator plane_it(_tensor, window);
257
258 // Iterate over all XY planes
259 execute_window_loop(window, [&](const Coordinates & id)
260 {
261 const auto first_row = reinterpret_cast<T *>(start_valid_region + plane_it.offset());
262
263 // Top border
264 for(int i = -_border_size.top; i < 0; ++i)
265 {
266 const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
267
268 // Copy top rows including left/right borders
269 std::copy_n(first_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left);
270 }
271
272 const auto last_row = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + (height - 1) * _tensor->info()->strides_in_bytes()[1]);
273
274 // Bottom border
275 for(unsigned int i = height; i < height + _border_size.bottom; ++i)
276 {
277 const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
278
279 // Copy bottom rows including left/right borders
280 std::copy_n(last_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left);
281 }
282 },
283 plane_it);
284}
285
286template <typename T>
287void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window)
288{
289 T constant_border_value;
290 _constant_border_value.get(constant_border_value);
291
292 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
293 const size_t &width = _tensor->info()->valid_region().shape[0];
294 const size_t &height = _tensor->info()->valid_region().shape[1];
Pablo Tello62eeae42017-08-09 16:33:49 +0100295 const int stridey = _tensor->info()->strides_in_bytes()[1];
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100296
297 // Left and right border
298 Window vertical(window);
299 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
300
301 Iterator vertical_it(_tensor, vertical);
302
303 execute_window_loop(vertical, [&](const Coordinates & id)
304 {
305 const auto row_start = reinterpret_cast<T *>(start_valid_region + vertical_it.offset());
306
307 // Fill left and right borders
308 std::fill_n(row_start - _border_size.left, _border_size.left, constant_border_value);
309 std::fill_n(row_start + width, _border_size.right, constant_border_value);
310 },
311 vertical_it);
312
313 // Top and bottom border
314 Iterator plane_it(_tensor, window);
315
316 // Iterate over all XY planes
317 execute_window_loop(window, [&](const Coordinates & id)
318 {
Pablo Tello62eeae42017-08-09 16:33:49 +0100319 uint8_t *base_addr = start_valid_region + plane_it.offset();
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100320 // Top border
321 for(int i = -_border_size.top; i < 0; ++i)
322 {
Pablo Tello62eeae42017-08-09 16:33:49 +0100323 const auto row_start = reinterpret_cast<T *>(base_addr + i * stridey);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100324
325 // Fill top rows including left/right borders
326 std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value);
327 }
328
329 // Bottom border
Pablo Tello62eeae42017-08-09 16:33:49 +0100330 const unsigned low_border_size = height + _border_size.bottom;
331 for(unsigned int i = height; i < low_border_size; ++i)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100332 {
Pablo Tello62eeae42017-08-09 16:33:49 +0100333 const auto row_start = reinterpret_cast<T *>(base_addr + i * stridey);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100334
335 // Fill bottom rows including left/right borders
336 std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value);
337 }
338 },
339 plane_it);
340}