blob: 8280b52fcb4d72ebbc223e69d2f45a48edc1b0f2 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +01002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000024#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010025
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/NEON/NEFixedPoint.h"
30#include "arm_compute/core/TensorInfo.h"
31#include "arm_compute/core/Validate.h"
32
33#include <arm_neon.h>
34
35using namespace arm_compute;
36
37namespace arm_compute
38{
39class Coordinates;
40} // namespace arm_compute
41
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000042NEDepthConvertLayerKernel::NEDepthConvertLayerKernel()
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +010043 : _input(nullptr), _output(nullptr), _policy(), _shift(0)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010044{
45}
46
Giorgio Arena04a8f8c2017-11-23 11:45:24 +000047void NEDepthConvertLayerKernel::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
Anthony Barbier6ff3b192017-09-04 18:44:23 +010048{
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +010049 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::U16);
Georgios Pinitase2229412017-07-12 12:30:40 +010050
51 _input = input;
52 _output = input;
53 _policy = policy;
54 _shift = shift;
55
56 if(output != nullptr)
57 {
58 // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
59 set_shape_if_empty(*output->info(), input->info()->tensor_shape());
60
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +010061 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F32);
Georgios Pinitase2229412017-07-12 12:30:40 +010062 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
63
64 // Set output
65 _output = output;
66 }
67
Vidhya Sudhan Loganathan7485d5a2018-07-04 09:34:00 +010068 ARM_COMPUTE_ERROR_ON(shift >= 8);
Georgios Pinitase2229412017-07-12 12:30:40 +010069 ARM_COMPUTE_ERROR_ON(input == output && (data_size_from_type(input->info()->data_type()) != data_size_from_type(output->info()->data_type())));
Anthony Barbier6ff3b192017-09-04 18:44:23 +010070
Anthony Barbier6ff3b192017-09-04 18:44:23 +010071 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U16
72 && output->info()->data_type() != DataType::S32),
73 "Only data_types supported [in] U8 -> [out] U16, S16, S32");
74
75 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32),
76 "Only data_types supported [in] U16 -> [out] U8, U32");
77
78 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::S32),
79 "Only data_types supported [in] S16 -> [out] U8, S32");
80
Anthony Barbier6ff3b192017-09-04 18:44:23 +010081 constexpr unsigned int num_elems_processed_per_iteration = 16;
Georgios Pinitase2229412017-07-12 12:30:40 +010082
83 // Configure kernel window
84 Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
85
86 AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
87 if(output != nullptr)
88 {
89 AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
90 update_window_and_padding(win, input_access, output_access);
91 output_access.set_valid_region(win, input->info()->valid_region());
92 }
93 else
94 {
95 // In-place computation
96 update_window_and_padding(win, input_access);
97 }
98 ICPPKernel::configure(win);
Anthony Barbier6ff3b192017-09-04 18:44:23 +010099}
100
Giorgio Arena04a8f8c2017-11-23 11:45:24 +0000101void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100102{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100103 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100104 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
Georgios Pinitase2229412017-07-12 12:30:40 +0100105 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100106 ARM_COMPUTE_ERROR_ON(nullptr == _input);
107 ARM_COMPUTE_ERROR_ON(nullptr == _output);
108 ARM_COMPUTE_ERROR_ON(_input == _output);
109
110 Iterator input(_input, window);
111 Iterator output(_output, window);
112
113 switch(_input->info()->data_type())
114 {
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100115 case DataType::U8:
116 {
117 const int16x8_t b = vdupq_n_s16(_shift);
118
119 switch(_output->info()->data_type())
120 {
121 case DataType::S16:
122 {
123 /* Up-conversion U8 -> S16 */
124 execute_window_loop(window, [&](const Coordinates & id)
125 {
126 const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
127
128 const int16x8x2_t texels =
129 {
130 {
131 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b),
132 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b)
133 }
134 };
135
136 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), texels.val[0]);
137 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, texels.val[1]);
138 },
139 input, output);
140 break;
141 }
142 case DataType::S32:
143 {
144 /* Up-conversion U8 -> S32 */
145 execute_window_loop(window, [&](const Coordinates & id)
146 {
147 const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
148
149 const int16x8x2_t texels =
150 {
151 {
152 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b),
153 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b)
154 }
155 };
156
157 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), vmovl_s16(vget_low_s16(texels.val[0])));
158 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, vmovl_s16(vget_high_s16(texels.val[0])));
159 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 8, vmovl_s16(vget_low_s16(texels.val[1])));
160 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 12, vmovl_s16(vget_high_s16(texels.val[1])));
161 },
162 input, output);
163 break;
164 }
165 case DataType::U16:
166 {
167 /* Up-conversion U8 -> U16 */
168 execute_window_loop(window, [&](const Coordinates & id)
169 {
170 const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
171
172 const uint16x8x2_t texels =
173 {
174 {
175 vshlq_u16(vmovl_u8(vget_low_u8(texels_u8)), b),
176 vshlq_u16(vmovl_u8(vget_high_u8(texels_u8)), b)
177 }
178 };
179
180 vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()), texels.val[0]);
181 vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()) + 8, texels.val[1]);
182 },
183 input, output);
184 break;
185 }
186 default:
187 ARM_COMPUTE_ERROR("Output data type not supported");
188 }
189 break;
190 }
191 case DataType::S16:
192 {
193 switch(_output->info()->data_type())
194 {
195 case DataType::U8:
196 {
197 const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
198
199 /* Down-conversion S16 -> U8 */
200 if(ConvertPolicy::SATURATE == _policy)
201 {
202 execute_window_loop(window, [&](const Coordinates & id)
203 {
204 const int16x8x2_t texels =
205 {
206 {
207 vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
208 vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
209 }
210 };
211
212 vst1q_u8(output.ptr(), vcombine_u8(vqmovun_s16(texels.val[0]), vqmovun_s16(texels.val[1])));
213 },
214 input, output);
215 }
216 else
217 {
218 execute_window_loop(window, [&](const Coordinates & id)
219 {
220 const int16x8x2_t texels =
221 {
222 {
223 vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
224 vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
225 }
226 };
227
228 vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(vreinterpretq_u16_s16(texels.val[0])),
229 vmovn_u16(vreinterpretq_u16_s16(texels.val[1]))));
230 },
231 input, output);
232 }
233 break;
234 }
235 case DataType::S32:
236 {
237 const int32x4_t b = vdupq_n_s32(_shift);
238
239 /* Up-conversion S16 -> S32 */
240 execute_window_loop(window, [&](const Coordinates & id)
241 {
242 const int16x8x2_t texels =
243 {
244 {
245 vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())),
246 vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8)
247 }
248 };
249
250 const int32x4x4_t texels_s32 =
251 {
252 {
253 vshlq_s32(vmovl_s16(vget_low_s16(texels.val[0])), b),
254 vshlq_s32(vmovl_s16(vget_high_s16(texels.val[0])), b),
255 vshlq_s32(vmovl_s16(vget_low_s16(texels.val[1])), b),
256 vshlq_s32(vmovl_s16(vget_high_s16(texels.val[1])), b)
257 }
258 };
259
260 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), texels_s32.val[0]);
261 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, texels_s32.val[1]);
262 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 8, texels_s32.val[2]);
263 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 12, texels_s32.val[3]);
264 },
265 input, output);
266 break;
267 }
268 default:
269 ARM_COMPUTE_ERROR("Output data type not supported");
270 }
271 break;
272 }
273 case DataType::U16:
274 {
275 switch(_output->info()->data_type())
276 {
277 case DataType::U8:
278 {
279 const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
280
281 /* Down-conversion U16 -> U8 */
282 if(ConvertPolicy::SATURATE == _policy)
283 {
284 execute_window_loop(window, [&](const Coordinates & id)
285 {
286 const uint16x8x2_t texels =
287 {
288 {
289 vqshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())), b),
290 vqshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8), b)
291 }
292 };
293
294 vst1q_u8(output.ptr(), vcombine_u8(vqmovn_u16(texels.val[0]), vqmovn_u16(texels.val[1])));
295 },
296 input, output);
297 }
298 else
299 {
300 execute_window_loop(window, [&](const Coordinates & id)
301 {
302 const uint16x8x2_t texels =
303 {
304 {
305 vshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())), b),
306 vshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8), b)
307 }
308 };
309
310 vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(texels.val[0]), vmovn_u16(texels.val[1])));
311 },
312 input, output);
313 }
314 break;
315 }
316 case DataType::U32:
317 {
318 const int32x4_t b = vdupq_n_s32(_shift);
319
320 /* Up-conversion U16 -> U32 */
321 execute_window_loop(window, [&](const Coordinates & id)
322 {
323 const uint16x8x2_t texels =
324 {
325 {
326 vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())),
327 vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8)
328 }
329 };
330
331 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()), vshlq_u32(vmovl_u16(vget_low_u16(texels.val[0])), b));
332 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 4, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[0])), b));
333 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 8, vshlq_u32(vmovl_u16(vget_low_u16(texels.val[1])), b));
334 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 12, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[1])), b));
335 },
336 input, output);
337 break;
338 }
339 default:
340 ARM_COMPUTE_ERROR("Output data type not supported");
341 }
342 break;
343 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100344 default:
345 ARM_COMPUTE_ERROR("Not supported");
346 }
347}