blob: bd99242b1177c16e218dde1a1c2f64313059bf92 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
2 * Copyright (c) 2016, 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/Validate.h"
31#include "arm_compute/core/Window.h"
32
33#include <algorithm>
34#include <cstdint>
35
36using namespace arm_compute;
37
38namespace arm_compute
39{
40class Coordinates;
41} // namespace arm_compute
42
43NEFillBorderKernel::NEFillBorderKernel()
44 : _tensor(nullptr), _border_size(0), _mode(BorderMode::UNDEFINED), _constant_border_value(0)
45{
46}
47
48void NEFillBorderKernel::configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
49{
50 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::U16, DataType::S16, DataType::U32, DataType::S32, DataType::F32);
51
52 _tensor = tensor;
53 _border_size = border_size;
54 _mode = border_mode;
55 _constant_border_value = constant_border_value;
56
57 _border_size.limit(tensor->info()->padding());
58
59 Window win;
60 win.set(Window::DimX, Window::Dimension(0, 1, 1));
61 win.set(Window::DimY, Window::Dimension(0, 1, 1));
62 win.use_tensor_dimensions(_tensor->info(), Window::DimZ);
63 INEKernel::configure(win);
64}
65
66void NEFillBorderKernel::run(const Window &window)
67{
68 // If there is no border: early exit
69 if(_border_size.empty())
70 {
71 return;
72 }
73
74 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
75 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
76
77 switch(_mode)
78 {
79 case BorderMode::CONSTANT:
80 {
81 switch(_tensor->info()->data_type())
82 {
83 case DataType::U8:
84 fill_constant_value_single_channel<uint8_t>(window);
85 break;
86 case DataType::QS8:
87 case DataType::S8:
88 fill_constant_value_single_channel<int8_t>(window);
89 break;
90 case DataType::U16:
91 fill_constant_value_single_channel<uint16_t>(window);
92 break;
93 case DataType::S16:
94 case DataType::QS16:
95 fill_constant_value_single_channel<int16_t>(window);
96 break;
97 case DataType::U32:
98 fill_constant_value_single_channel<uint32_t>(window);
99 break;
100 case DataType::S32:
101 fill_constant_value_single_channel<int32_t>(window);
102 break;
103 case DataType::F32:
104 static_assert(sizeof(float) == 4, "Float must be 32 bit");
105 fill_constant_value_single_channel<float>(window);
106 break;
107 default:
108 ARM_COMPUTE_ERROR("Not handled");
109 }
110 break;
111 }
112 case BorderMode::REPLICATE:
113 {
114 switch(_tensor->info()->data_type())
115 {
116 case DataType::U8:
117 fill_replicate_single_channel<uint8_t>(window);
118 break;
119 case DataType::QS8:
120 case DataType::S8:
121 fill_replicate_single_channel<int8_t>(window);
122 break;
123 case DataType::U16:
124 fill_replicate_single_channel<uint16_t>(window);
125 break;
126 case DataType::S16:
127 case DataType::QS16:
128 fill_replicate_single_channel<int16_t>(window);
129 break;
130 case DataType::U32:
131 fill_replicate_single_channel<uint32_t>(window);
132 break;
133 case DataType::S32:
134 fill_replicate_single_channel<int32_t>(window);
135 break;
136 case DataType::F32:
137 static_assert(sizeof(float) == 4, "Float must be 32 bit");
138 fill_replicate_single_channel<float>(window);
139 break;
140 default:
141 ARM_COMPUTE_ERROR("Not handled");
142 }
143 break;
144 }
145 case BorderMode::UNDEFINED:
146 break; // Nothing to do here
147 default:
148 ARM_COMPUTE_ERROR("Unknown border mode");
149 }
150}
151
152template <typename T>
153void NEFillBorderKernel::fill_replicate_single_channel(const Window &window)
154{
155 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
156 const size_t &width = _tensor->info()->valid_region().shape[0];
157 const size_t &height = _tensor->info()->valid_region().shape[1];
158
159 // Left and right border
160 Window vertical(window);
161 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
162
163 Iterator vertical_it(_tensor, vertical);
164
165 execute_window_loop(vertical, [&](const Coordinates & id)
166 {
167 const auto row_start = reinterpret_cast<T *>(start_valid_region + vertical_it.offset());
168 const auto left_val = *reinterpret_cast<T *>(vertical_it.ptr());
169 const auto right_val = *(reinterpret_cast<T *>(vertical_it.ptr()) + width - 1);
170
171 // Fill left and right borders
172 std::fill_n(row_start - _border_size.left, _border_size.left, left_val);
173 std::fill_n(row_start + width, _border_size.right, right_val);
174 },
175 vertical_it);
176
177 // Top and bottom border
178 Iterator plane_it(_tensor, window);
179
180 // Iterate over all XY planes
181 execute_window_loop(window, [&](const Coordinates & id)
182 {
183 const auto first_row = reinterpret_cast<T *>(start_valid_region + plane_it.offset());
184
185 // Top border
186 for(int i = -_border_size.top; i < 0; ++i)
187 {
188 const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
189
190 // Copy top rows including left/right borders
191 std::copy_n(first_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left);
192 }
193
194 const auto last_row = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + (height - 1) * _tensor->info()->strides_in_bytes()[1]);
195
196 // Bottom border
197 for(unsigned int i = height; i < height + _border_size.bottom; ++i)
198 {
199 const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
200
201 // Copy bottom rows including left/right borders
202 std::copy_n(last_row - _border_size.left, _border_size.left + width + _border_size.right, row_start - _border_size.left);
203 }
204 },
205 plane_it);
206}
207
208template <typename T>
209void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window)
210{
211 T constant_border_value;
212 _constant_border_value.get(constant_border_value);
213
214 uint8_t *const start_valid_region = _tensor->ptr_to_element(_tensor->info()->valid_region().anchor);
215 const size_t &width = _tensor->info()->valid_region().shape[0];
216 const size_t &height = _tensor->info()->valid_region().shape[1];
217
218 // Left and right border
219 Window vertical(window);
220 vertical.set(Window::DimY, Window::Dimension(0, height, 1));
221
222 Iterator vertical_it(_tensor, vertical);
223
224 execute_window_loop(vertical, [&](const Coordinates & id)
225 {
226 const auto row_start = reinterpret_cast<T *>(start_valid_region + vertical_it.offset());
227
228 // Fill left and right borders
229 std::fill_n(row_start - _border_size.left, _border_size.left, constant_border_value);
230 std::fill_n(row_start + width, _border_size.right, constant_border_value);
231 },
232 vertical_it);
233
234 // Top and bottom border
235 Iterator plane_it(_tensor, window);
236
237 // Iterate over all XY planes
238 execute_window_loop(window, [&](const Coordinates & id)
239 {
240 // Top border
241 for(int i = -_border_size.top; i < 0; ++i)
242 {
243 const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
244
245 // Fill top rows including left/right borders
246 std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value);
247 }
248
249 // Bottom border
250 for(unsigned int i = height; i < height + _border_size.bottom; ++i)
251 {
252 const auto row_start = reinterpret_cast<T *>(start_valid_region + plane_it.offset() + i * _tensor->info()->strides_in_bytes()[1]);
253
254 // Fill bottom rows including left/right borders
255 std::fill_n(row_start - _border_size.left, _border_size.left + width + _border_size.right, constant_border_value);
256 }
257 },
258 plane_it);
259}