blob: cd84e36aade76b39316471b25d96938fc3657c35 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/Validate.h"
31#include "arm_compute/core/Window.h"
32
33#include <algorithm>
34#include <cstdint>
35
Pablo Tello0c34fe22017-06-26 17:17:42 +010036#if ARM_COMPUTE_ENABLE_FP16
37#include <arm_fp16.h> // needed for float16_t
38#endif /* ARM_COMPUTE_ENABLE_FP16 */
39
Anthony Barbier6ff3b192017-09-04 18:44:23 +010040using namespace arm_compute;
41
42namespace arm_compute
43{
44class Coordinates;
45} // namespace arm_compute
46
47NEFillBorderKernel::NEFillBorderKernel()
48 : _tensor(nullptr), _border_size(0), _mode(BorderMode::UNDEFINED), _constant_border_value(0)
49{
50}
51
52void NEFillBorderKernel::configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
53{
Pablo Tello0c34fe22017-06-26 17:17:42 +010054 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::U16, DataType::S16, DataType::F16, DataType::U32, DataType::S32, DataType::F32);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010055
56 _tensor = tensor;
57 _border_size = border_size;
58 _mode = border_mode;
59 _constant_border_value = constant_border_value;
60
61 _border_size.limit(tensor->info()->padding());
62
63 Window win;
64 win.set(Window::DimX, Window::Dimension(0, 1, 1));
65 win.set(Window::DimY, Window::Dimension(0, 1, 1));
66 win.use_tensor_dimensions(_tensor->info(), Window::DimZ);
67 INEKernel::configure(win);
68}
69
70void NEFillBorderKernel::run(const Window &window)
71{
72 // If there is no border: early exit
73 if(_border_size.empty())
74 {
75 return;
76 }
77
78 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
79 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
80
81 switch(_mode)
82 {
83 case BorderMode::CONSTANT:
84 {
85 switch(_tensor->info()->data_type())
86 {
87 case DataType::U8:
88 fill_constant_value_single_channel<uint8_t>(window);
89 break;
90 case DataType::QS8:
91 case DataType::S8:
92 fill_constant_value_single_channel<int8_t>(window);
93 break;
94 case DataType::U16:
95 fill_constant_value_single_channel<uint16_t>(window);
96 break;
97 case DataType::S16:
98 case DataType::QS16:
99 fill_constant_value_single_channel<int16_t>(window);
100 break;
101 case DataType::U32:
102 fill_constant_value_single_channel<uint32_t>(window);
103 break;
104 case DataType::S32:
105 fill_constant_value_single_channel<int32_t>(window);
106 break;
Pablo Tello0c34fe22017-06-26 17:17:42 +0100107#ifdef ARM_COMPUTE_ENABLE_FP16
108 case DataType::F16:
109 static_assert(sizeof(float16_t) == 2, "Float16_t must be 16 bit");
110 fill_constant_value_single_channel<float16_t>(window);
111 break;
112#endif /* ARM_COMPUTE_ENABLE_FP16 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100113 case DataType::F32:
114 static_assert(sizeof(float) == 4, "Float must be 32 bit");
115 fill_constant_value_single_channel<float>(window);
116 break;
117 default:
118 ARM_COMPUTE_ERROR("Not handled");
119 }
120 break;
121 }
122 case BorderMode::REPLICATE:
123 {
124 switch(_tensor->info()->data_type())
125 {
126 case DataType::U8:
127 fill_replicate_single_channel<uint8_t>(window);
128 break;
129 case DataType::QS8:
130 case DataType::S8:
131 fill_replicate_single_channel<int8_t>(window);
132 break;
133 case DataType::U16:
134 fill_replicate_single_channel<uint16_t>(window);
135 break;
136 case DataType::S16:
137 case DataType::QS16:
138 fill_replicate_single_channel<int16_t>(window);
139 break;
140 case DataType::U32:
141 fill_replicate_single_channel<uint32_t>(window);
142 break;
143 case DataType::S32:
144 fill_replicate_single_channel<int32_t>(window);
145 break;
Pablo Tello0c34fe22017-06-26 17:17:42 +0100146#ifdef ARM_COMPUTE_ENABLE_FP16
147 case DataType::F16:
148 static_assert(sizeof(float16_t) == 2, "Float16_t must be 16 bit");
149 fill_replicate_single_channel<float16_t>(window);
150 break;
151#endif /* ARM_COMPUTE_ENABLE_FP16 */
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100152 case DataType::F32:
153 static_assert(sizeof(float) == 4, "Float must be 32 bit");
154 fill_replicate_single_channel<float>(window);
155 break;
156 default:
157 ARM_COMPUTE_ERROR("Not handled");
158 }
159 break;
160 }
161 case BorderMode::UNDEFINED:
162 break; // Nothing to do here
163 default:
164 ARM_COMPUTE_ERROR("Unknown border mode");
165 }
166}
167
168template <typename T>
169void NEFillBorderKernel::fill_replicate_single_channel(const Window &window)
170{
171 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
172 const size_t &width = _tensor->info()->valid_region().shape[0];
173 const size_t &height = _tensor->info()->valid_region().shape[1];
174
175 // Left and right border
176 Window vertical(window);
177 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
178
179 Iterator vertical_it(_tensor, vertical);
180
181 execute_window_loop(vertical, [&](const Coordinates & id)
182 {
183 const auto row_start = reinterpret_cast<T *>(start_valid_region + vertical_it.offset());
184 const auto left_val = *reinterpret_cast<T *>(vertical_it.ptr());
185 const auto right_val = *(reinterpret_cast<T *>(vertical_it.ptr()) + width - 1);
186
187 // Fill left and right borders
188 std::fill_n(row_start - _border_size.left, _border_size.left, left_val);
189 std::fill_n(row_start + width, _border_size.right, right_val);
190 },
191 vertical_it);
192
193 // Top and bottom border
194 Iterator plane_it(_tensor, window);
195
196 // Iterate over all XY planes
197 execute_window_loop(window, [&](const Coordinates & id)
198 {
199 const auto first_row = reinterpret_cast<T *>(start_valid_region + plane_it.offset());
200
201 // Top border
202 for(int i = -_border_size.top; i < 0; ++i)
203 {
204 const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
205
206 // Copy top rows including left/right borders
207 std::copy_n(first_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left);
208 }
209
210 const auto last_row = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + (height - 1) * _tensor->info()->strides_in_bytes()[1]);
211
212 // Bottom border
213 for(unsigned int i = height; i < height + _border_size.bottom; ++i)
214 {
215 const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
216
217 // Copy bottom rows including left/right borders
218 std::copy_n(last_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left);
219 }
220 },
221 plane_it);
222}
223
224template <typename T>
225void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window)
226{
227 T constant_border_value;
228 _constant_border_value.get(constant_border_value);
229
230 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
231 const size_t &width = _tensor->info()->valid_region().shape[0];
232 const size_t &height = _tensor->info()->valid_region().shape[1];
233
234 // Left and right border
235 Window vertical(window);
236 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
237
238 Iterator vertical_it(_tensor, vertical);
239
240 execute_window_loop(vertical, [&](const Coordinates & id)
241 {
242 const auto row_start = reinterpret_cast<T *>(start_valid_region + vertical_it.offset());
243
244 // Fill left and right borders
245 std::fill_n(row_start - _border_size.left, _border_size.left, constant_border_value);
246 std::fill_n(row_start + width, _border_size.right, constant_border_value);
247 },
248 vertical_it);
249
250 // Top and bottom border
251 Iterator plane_it(_tensor, window);
252
253 // Iterate over all XY planes
254 execute_window_loop(window, [&](const Coordinates & id)
255 {
256 // Top border
257 for(int i = -_border_size.top; i < 0; ++i)
258 {
259 const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
260
261 // Fill top rows including left/right borders
262 std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value);
263 }
264
265 // Bottom border
266 for(unsigned int i = height; i < height + _border_size.bottom; ++i)
267 {
268 const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
269
270 // Fill bottom rows including left/right borders
271 std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value);
272 }
273 },
274 plane_it);
275}