blob: 7a2bf20c0408daf7ce9b19cc1b2fa14208bbd7c2 [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +01002 * Copyright (c) 2016-2020 Arm Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouebcebf12020-10-21 00:04:14 +010024#include "src/core/NEON/kernels/NECannyEdgeKernel.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010025
Anthony Barbier6ff3b192017-09-04 18:44:23 +010026#include "arm_compute/core/Error.h"
27#include "arm_compute/core/Helpers.h"
28#include "arm_compute/core/ITensor.h"
29#include "arm_compute/core/TensorInfo.h"
30#include "arm_compute/core/Types.h"
31#include "arm_compute/core/Utils.h"
32#include "arm_compute/core/Validate.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010033#include "src/core/AccessWindowStatic.h"
34#include "src/core/helpers/AutoConfiguration.h"
35#include "src/core/helpers/AutoConfiguration.h"
36#include "src/core/helpers/WindowHelpers.h"
37#include "src/core/helpers/WindowHelpers.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010038
39#include <arm_neon.h>
40#include <cstddef>
41#include <cstdint>
42#include <tuple>
43
Anthony Barbier6ff3b192017-09-04 18:44:23 +010044namespace arm_compute
45{
Anthony Barbier6ff3b192017-09-04 18:44:23 +010046namespace
47{
48constexpr int NO_EDGE = 0;
49constexpr int EDGE = 255;
50constexpr int MAYBE = 127;
Anthony Barbier6ff3b192017-09-04 18:44:23 +010051
Anthony Barbier6ff3b192017-09-04 18:44:23 +010052inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy)
53{
54 // Constant use for evaluating score1 and score3
55 static const float32x4_t const45 = vdupq_n_f32(0.70710678118655f);
56 static const float32x4_t zero = vdupq_n_f32(0.0f);
57 static const float32x4_t one = vdupq_n_f32(1.0f);
58 static const float32x4_t two = vdupq_n_f32(2.0f);
59 static const float32x4_t three = vdupq_n_f32(3.0f);
60
61 // Score0: (1, 0)
62 const float32x4x2_t score0 =
63 {
64 {
65 vabsq_f32(gx.val[0]),
66 vabsq_f32(gx.val[1])
67 }
68 };
69
70 // Score2: ( 0, 1 )
71 const float32x4x2_t score2 =
72 {
73 {
74 vabsq_f32(gy.val[0]),
75 vabsq_f32(gy.val[1])
76 }
77 };
78
79 // Score1 and Score3: ( sqrt(2) / 2, sqrt(2) / 2 ) - ( -sqrt(2) / 2, sqrt(2) / 2 )
80 float32x4x2_t score1 =
81 {
82 {
83 vmulq_f32(gy.val[0], const45),
84 vmulq_f32(gy.val[1], const45)
85 }
86 };
87
88 float32x4x2_t score3 = score1;
89
90 score1.val[0] = vmlaq_f32(score1.val[0], gx.val[0], const45);
91 score1.val[1] = vmlaq_f32(score1.val[1], gx.val[1], const45);
92 score3.val[0] = vmlsq_f32(score3.val[0], gx.val[0], const45);
93 score3.val[1] = vmlsq_f32(score3.val[1], gx.val[1], const45);
94
95 score1.val[0] = vabsq_f32(score1.val[0]);
96 score1.val[1] = vabsq_f32(score1.val[1]);
97 score3.val[0] = vabsq_f32(score3.val[0]);
98 score3.val[1] = vabsq_f32(score3.val[1]);
99
100 float32x4x2_t phase =
101 {
102 {
103 zero,
104 zero
105 }
106 };
107
108 float32x4x2_t old_score = score0;
109
110 // score1 > old_score?
111 uint32x4x2_t mask =
112 {
113 {
114 vcgtq_f32(score1.val[0], old_score.val[0]),
115 vcgtq_f32(score1.val[1], old_score.val[1])
116 }
117 };
118
119 phase.val[0] = vbslq_f32(mask.val[0], one, phase.val[0]);
120 phase.val[1] = vbslq_f32(mask.val[1], one, phase.val[1]);
121 old_score.val[0] = vbslq_f32(mask.val[0], score1.val[0], old_score.val[0]);
122 old_score.val[1] = vbslq_f32(mask.val[1], score1.val[1], old_score.val[1]);
123
124 // score2 > old_score?
125 mask.val[0] = vcgtq_f32(score2.val[0], old_score.val[0]);
126 mask.val[1] = vcgtq_f32(score2.val[1], old_score.val[1]);
127
128 phase.val[0] = vbslq_f32(mask.val[0], two, phase.val[0]);
129 phase.val[1] = vbslq_f32(mask.val[1], two, phase.val[1]);
130 old_score.val[0] = vbslq_f32(mask.val[0], score2.val[0], old_score.val[0]);
131 old_score.val[1] = vbslq_f32(mask.val[1], score2.val[1], old_score.val[1]);
132
133 // score3 > old_score?
134 mask.val[0] = vcgtq_f32(score3.val[0], old_score.val[0]);
135 mask.val[1] = vcgtq_f32(score3.val[1], old_score.val[1]);
136
137 phase.val[0] = vbslq_f32(mask.val[0], three, phase.val[0]);
138 phase.val[1] = vbslq_f32(mask.val[1], three, phase.val[1]);
139 old_score.val[0] = vbslq_f32(mask.val[0], score3.val[0], old_score.val[0]);
140 old_score.val[1] = vbslq_f32(mask.val[1], score3.val[1], old_score.val[1]);
141
142 // Convert from float32x4_t to uint8x8_t
143 return vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(phase.val[0])),
144 vmovn_u32(vcvtq_u32_f32(phase.val[1]))));
145}
146
147/* Computes the gradient phase if gradient_size = 3 or 5. The output is quantized.
148 * 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
149 *
150 * @param[in] gx Gx component
151 * @param[in] gy Gy component
152 *
153 * @return quantized phase for 8 pixels
154 */
155inline uint8x8_t phase_quantization_S16_S16(int16x8_t gx, int16x8_t gy)
156{
157 // Convert to float
158 const float32x4x2_t gx_f32 =
159 {
160 {
161 vcvtq_f32_s32(vmovl_s16(vget_low_s16(gx))),
162 vcvtq_f32_s32(vmovl_s16(vget_high_s16(gx)))
163 }
164 };
165
166 const float32x4x2_t gy_f32 =
167 {
168 {
169 vcvtq_f32_s32(vmovl_s16(vget_low_s16(gy))),
170 vcvtq_f32_s32(vmovl_s16(vget_high_s16(gy)))
171 }
172 };
173
174 return phase_quantization(gx_f32, gy_f32);
175}
176
177/* Computes the gradient phase if gradient_size = 7. The output is quantized.
178 * 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
179 *
180 * @param[in] gx Gx component
181 * @param[in] gy Gy component
182 *
183 * @return quantized phase for 8 pixels
184 */
185inline uint8x8_t phase_quantization_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy)
186{
187 // Convert to float
188 const float32x4x2_t gx_f32 =
189 {
190 {
191 vcvtq_f32_s32(gx.val[0]),
192 vcvtq_f32_s32(gx.val[1])
193 }
194 };
195
196 const float32x4x2_t gy_f32 =
197 {
198 {
199 vcvtq_f32_s32(gy.val[0]),
200 vcvtq_f32_s32(gy.val[1])
201 }
202 };
203
204 return phase_quantization(gx_f32, gy_f32);
205}
206
207/* Computes the magnitude using the L1-norm type if gradient_size = 3 or 5
208 *
209 * @param[in] gx Gx component
210 * @param[in] gy Gy component
211 *
212 * @return magnitude for 8 pixels
213 */
214inline uint16x8_t mag_l1_S16_S16(int16x8_t gx, int16x8_t gy)
215{
216 return vaddq_u16(vreinterpretq_u16_s16(vabsq_s16(gx)),
217 vreinterpretq_u16_s16(vabsq_s16(gy)));
218}
219
220/* Computes the magnitude using the L1-norm type if gradient_size = 7
221 *
222 * @param[in] gx Gx component
223 * @param[in] gy Gy component
224 *
225 * @return magnitude for 8 pixels
226 */
227inline uint32x4x2_t mag_l1_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy)
228{
229 const uint32x4x2_t gx_abs =
230 {
231 {
232 vreinterpretq_u32_s32(vabsq_s32(gx.val[0])),
233 vreinterpretq_u32_s32(vabsq_s32(gx.val[1]))
234 }
235 };
236
237 const uint32x4x2_t gy_abs =
238 {
239 {
240 vreinterpretq_u32_s32(vabsq_s32(gy.val[0])),
241 vreinterpretq_u32_s32(vabsq_s32(gy.val[1]))
242 }
243 };
244
245 const uint32x4x2_t output =
246 {
247 {
248 vaddq_u32(gx_abs.val[0], gy_abs.val[0]),
249 vaddq_u32(gx_abs.val[1], gy_abs.val[1])
250 }
251 };
252
253 return output;
254}
255
256inline float32x4x2_t mag_l2(const float32x4x2_t &gx, const float32x4x2_t &gy)
257{
258 // x^2 ...
259 float32x4x2_t magnitude =
260 {
261 {
262 vmulq_f32(gx.val[0], gx.val[0]),
263 vmulq_f32(gx.val[1], gx.val[1])
264 }
265 };
266
267 // ... + y^2
268 magnitude.val[0] = vmlaq_f32(magnitude.val[0], gy.val[0], gy.val[0]);
269 magnitude.val[1] = vmlaq_f32(magnitude.val[1], gy.val[1], gy.val[1]);
270
271 // sqrt(...)
272 magnitude.val[0] = vmulq_f32(vrsqrteq_f32(magnitude.val[0]), magnitude.val[0]);
273 magnitude.val[1] = vmulq_f32(vrsqrteq_f32(magnitude.val[1]), magnitude.val[1]);
274
275 return magnitude;
276}
277
278/* Computes the magnitude using L2-norm if gradient_size = 3 or 5
279 *
280 * @param[in] gx Gx component
281 * @param[in] gy Gy component
282 *
283 * @return magnitude for 8 pixels
284 */
285inline uint16x8_t mag_l2_S16_S16(int16x8_t gx, int16x8_t gy)
286{
287 // Compute magnitude using L2 normalization
288 const float32x4x2_t gx2 =
289 {
290 {
291 vcvtq_f32_s32(vmovl_s16(vget_low_s16(gx))),
292 vcvtq_f32_s32(vmovl_s16(vget_high_s16(gx)))
293 }
294 };
295
296 const float32x4x2_t gy2 =
297 {
298 {
299 vcvtq_f32_s32(vmovl_s16(vget_low_s16(gy))),
300 vcvtq_f32_s32(vmovl_s16(vget_high_s16(gy)))
301 }
302 };
303
304 const float32x4x2_t magnitude = mag_l2(gx2, gy2);
305
306 // Store magnitude - Convert to uint16x8
307 return vcombine_u16(vmovn_u32(vcvtq_u32_f32(magnitude.val[0])),
308 vmovn_u32(vcvtq_u32_f32(magnitude.val[1])));
309}
310
311/* Computes the magnitude using L2-norm if gradient_size = 7
312 *
313 * @param[in] gx Gx component
314 * @param[in] gy Gy component
315 *
316 * @return magnitude for 8 pixels
317 */
318inline uint32x4x2_t mag_l2_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy)
319{
320 // Compute magnitude using L2 normalization
321 float32x4x2_t gx2 =
322 {
323 {
324 vcvtq_f32_s32(gx.val[0]),
325 vcvtq_f32_s32(gx.val[1])
326 }
327 };
328
329 float32x4x2_t gy2 =
330 {
331 {
332 vcvtq_f32_s32(gy.val[0]),
333 vcvtq_f32_s32(gy.val[1])
334 }
335 };
336
337 const float32x4x2_t magnitude = mag_l2(gx2, gy2);
338 const uint32x4x2_t mag32 =
339 {
340 {
341 vcvtq_u32_f32(magnitude.val[0]),
342 vcvtq_u32_f32(magnitude.val[1])
343 }
344 };
345
346 return mag32;
347}
348
349/* Gradient function used when the gradient size = 3 or 5 and when the norm_type = L1-norm
350 *
351 * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S16
352 * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S16
353 * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U16
354 * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type supported U8
355 */
356void mag_phase_l1norm_S16_S16_U16_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr)
357{
358 const auto gx = static_cast<const int16_t *__restrict>(gx_ptr);
359 const auto gy = static_cast<const int16_t *__restrict>(gy_ptr);
360 const auto magnitude = static_cast<uint16_t *__restrict>(magnitude_ptr);
361 const auto phase = static_cast<uint8_t *__restrict>(phase_ptr);
362
363 const int16x8x4_t gx_val =
364 {
365 {
366 vld1q_s16(gx),
367 vld1q_s16(gx + 8),
368 vld1q_s16(gx + 16),
369 vld1q_s16(gx + 24)
370 }
371 };
372
373 const int16x8x4_t gy_val =
374 {
375 {
376 vld1q_s16(gy),
377 vld1q_s16(gy + 8),
378 vld1q_s16(gy + 16),
379 vld1q_s16(gy + 24)
380 }
381 };
382
383 // Compute and store phase
384 vst1_u8(phase + 0, phase_quantization_S16_S16(gx_val.val[0], gy_val.val[0]));
385 vst1_u8(phase + 8, phase_quantization_S16_S16(gx_val.val[1], gy_val.val[1]));
386 vst1_u8(phase + 16, phase_quantization_S16_S16(gx_val.val[2], gy_val.val[2]));
387 vst1_u8(phase + 24, phase_quantization_S16_S16(gx_val.val[3], gy_val.val[3]));
388
389 // Compute ans store magnitude using L1 normalization
390 vst1q_u16(magnitude + 0, mag_l1_S16_S16(gx_val.val[0], gy_val.val[0]));
391 vst1q_u16(magnitude + 8, mag_l1_S16_S16(gx_val.val[1], gy_val.val[1]));
392 vst1q_u16(magnitude + 16, mag_l1_S16_S16(gx_val.val[2], gy_val.val[2]));
393 vst1q_u16(magnitude + 24, mag_l1_S16_S16(gx_val.val[3], gy_val.val[3]));
394}
395
396/* Gradient function used when the gradient size = 3 or 5 and when the norm_type = L2-norm
397 *
398 * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S16
399 * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S16
400 * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U16
401 * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type supported U8
402 */
403void mag_phase_l2norm_S16_S16_U16_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr)
404{
405 const auto gx = static_cast<const int16_t *__restrict>(gx_ptr);
406 const auto gy = static_cast<const int16_t *__restrict>(gy_ptr);
407 const auto magnitude = static_cast<uint16_t *__restrict>(magnitude_ptr);
408 const auto phase = static_cast<uint8_t *__restrict>(phase_ptr);
409
410 const int16x8x4_t gx_val =
411 {
412 {
413 vld1q_s16(gx),
414 vld1q_s16(gx + 8),
415 vld1q_s16(gx + 16),
416 vld1q_s16(gx + 24)
417 }
418 };
419
420 const int16x8x4_t gy_val =
421 {
422 {
423 vld1q_s16(gy),
424 vld1q_s16(gy + 8),
425 vld1q_s16(gy + 16),
426 vld1q_s16(gy + 24)
427 }
428 };
429
430 // Compute and store phase
431 vst1_u8(phase + 0, phase_quantization_S16_S16(gx_val.val[0], gy_val.val[0]));
432 vst1_u8(phase + 8, phase_quantization_S16_S16(gx_val.val[1], gy_val.val[1]));
433 vst1_u8(phase + 16, phase_quantization_S16_S16(gx_val.val[2], gy_val.val[2]));
434 vst1_u8(phase + 24, phase_quantization_S16_S16(gx_val.val[3], gy_val.val[3]));
435
436 // Compute and store magnitude using L2 normalization
437 vst1q_u16(magnitude + 0, mag_l2_S16_S16(gx_val.val[0], gy_val.val[0]));
438 vst1q_u16(magnitude + 8, mag_l2_S16_S16(gx_val.val[1], gy_val.val[1]));
439 vst1q_u16(magnitude + 16, mag_l2_S16_S16(gx_val.val[2], gy_val.val[2]));
440 vst1q_u16(magnitude + 24, mag_l2_S16_S16(gx_val.val[3], gy_val.val[3]));
441}
442
443/* Gradient function used when the gradient size = 7 and when the norm_type = L1-norm
444 *
445 * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S32
446 * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S32
447 * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U32
448 * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type support U8
449 */
450void mag_phase_l1norm_S32_S32_U32_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr)
451{
452 auto gx = static_cast<const int32_t *__restrict>(gx_ptr);
453 auto gy = static_cast<const int32_t *__restrict>(gy_ptr);
454 auto magnitude = static_cast<uint32_t *__restrict>(magnitude_ptr);
455 auto phase = static_cast<uint8_t *__restrict>(phase_ptr);
456
457 // Process low and high part
458 for(size_t i = 0; i < 2; ++i, gx += 16, gy += 16, magnitude += 16, phase += 16)
459 {
460 const int32x4x2_t gx0 =
461 {
462 {
463 vld1q_s32(gx + 0),
464 vld1q_s32(gx + 4)
465 }
466 };
467
468 const int32x4x2_t gx1 =
469 {
470 {
471 vld1q_s32(gx + 8),
472 vld1q_s32(gx + 12)
473 }
474 };
475
476 const int32x4x2_t gy0 =
477 {
478 {
479 vld1q_s32(gy + 0),
480 vld1q_s32(gy + 4)
481 }
482 };
483
484 const int32x4x2_t gy1 =
485 {
486 {
487 vld1q_s32(gy + 8),
488 vld1q_s32(gy + 12)
489 }
490 };
491
492 // Compute and store phase
493 vst1_u8(phase + 0, phase_quantization_S32_S32(gx0, gy0));
494 vst1_u8(phase + 8, phase_quantization_S32_S32(gx1, gy1));
495
496 // Compute magnitude using L1 normalization
497 const uint32x4x2_t mag0 = mag_l1_S32_S32(gx0, gy0);
498 const uint32x4x2_t mag1 = mag_l1_S32_S32(gx1, gy1);
499
500 // Store magnitude
501 vst1q_u32(magnitude + 0, mag0.val[0]);
502 vst1q_u32(magnitude + 4, mag0.val[1]);
503 vst1q_u32(magnitude + 8, mag1.val[0]);
504 vst1q_u32(magnitude + 12, mag1.val[1]);
505 }
506}
507
508/* Gradient function used when the gradient size = 7 and when the norm_type = L2-norm
509 *
510 * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S32
511 * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S32
512 * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U32
513 * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type supported U8
514 */
515void mag_phase_l2norm_S32_S32_U32_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr)
516{
517 auto gx = static_cast<const int32_t *__restrict>(gx_ptr);
518 auto gy = static_cast<const int32_t *__restrict>(gy_ptr);
519 auto magnitude = static_cast<uint32_t *__restrict>(magnitude_ptr);
520 auto phase = static_cast<uint8_t *__restrict>(phase_ptr);
521
522 // Process low and high part
523 for(size_t i = 0; i < 2; ++i, gx += 16, gy += 16, magnitude += 16, phase += 16)
524 {
525 const int32x4x2_t gx0 =
526 {
527 {
528 vld1q_s32(gx + 0),
529 vld1q_s32(gx + 4)
530 }
531 };
532
533 const int32x4x2_t gx1 =
534 {
535 {
536 vld1q_s32(gx + 8),
537 vld1q_s32(gx + 12)
538 }
539 };
540
541 const int32x4x2_t gy0 =
542 {
543 {
544 vld1q_s32(gy + 0),
545 vld1q_s32(gy + 4)
546 }
547 };
548
549 const int32x4x2_t gy1 =
550 {
551 {
552 vld1q_s32(gy + 8),
553 vld1q_s32(gy + 12)
554 }
555 };
556
557 // Compute and store phase
558 vst1_u8(phase + 0, phase_quantization_S32_S32(gx0, gy0));
559 vst1_u8(phase + 8, phase_quantization_S32_S32(gx1, gy1));
560
561 // Compute magnitude using L2 normalization
562 const uint32x4x2_t mag0 = mag_l2_S32_S32(gx0, gy0);
563 const uint32x4x2_t mag1 = mag_l2_S32_S32(gx1, gy1);
564
565 // Store magnitude
566 vst1q_u32(magnitude + 0, mag0.val[0]);
567 vst1q_u32(magnitude + 4, mag0.val[1]);
568 vst1q_u32(magnitude + 8, mag1.val[0]);
569 vst1q_u32(magnitude + 12, mag1.val[1]);
570 }
571}
572
573/* Computes non-maxima suppression and hysteresis when the gradient size = 3 or 5
574 *
575 * @param[in] magnitude_ptr Pointer to source image. Magnitude. Data type supported U16
576 * @param[in] phase_ptr Pointer to source image. Quantized phase. Data type supported U8
577 * @param[out] output_ptr Pointer to output image. Data type supported U8
578 * @param[in] stride_mag Stride of magnitude image
579 * @param[in] lower_thr Lower threshold used for the hysteresis
580 * @param[in] upper_thr Upper threshold used for the hysteresis
581 */
582void non_max_suppression_U16_U8_U8(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t lower_thr,
583 const int32_t upper_thr)
584{
585 const auto magnitude = static_cast<const uint16_t *__restrict>(magnitude_ptr);
586 const auto phase = static_cast<const uint8_t *__restrict>(phase_ptr);
587 const auto output = static_cast<uint8_t *__restrict>(output_ptr);
588
589 // Get magnitude and phase of the centre pixels
590 uint16x8_t mc = vld1q_u16(magnitude);
591
592 // Angle_quantized: 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
593 const uint16x8_t pc16 = vmovl_u8(vld1_u8(phase));
594
595 // 0 degree
596 const uint16x8_t mk0_0 = vld1q_u16(magnitude - 1);
597 const uint16x8_t mk0_1 = vld1q_u16(magnitude + 1);
598 uint16x8_t mask0 = vceqq_u16(pc16, vdupq_n_u16(0));
Abe Mbise1b993382017-12-19 13:51:59 +0000599 mask0 = vandq_u16(mask0, vcgtq_u16(mc, mk0_0));
600 mask0 = vandq_u16(mask0, vcgtq_u16(mc, mk0_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100601
602 // 45 degree
603 const uint16x8_t mk45_0 = vld1q_u16(magnitude - stride_mag - 1);
604 const uint16x8_t mk45_1 = vld1q_u16(magnitude + stride_mag + 1);
605 uint16x8_t mask1 = vceqq_u16(pc16, vdupq_n_u16(1));
Abe Mbise1b993382017-12-19 13:51:59 +0000606 mask1 = vandq_u16(mask1, vcgtq_u16(mc, mk45_0));
607 mask1 = vandq_u16(mask1, vcgtq_u16(mc, mk45_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100608
609 // 90 degree
610 const uint16x8_t mk90_0 = vld1q_u16(magnitude - stride_mag);
611 const uint16x8_t mk90_1 = vld1q_u16(magnitude + stride_mag);
612 uint16x8_t mask2 = vceqq_u16(pc16, vdupq_n_u16(2));
Abe Mbise1b993382017-12-19 13:51:59 +0000613 mask2 = vandq_u16(mask2, vcgtq_u16(mc, mk90_0));
614 mask2 = vandq_u16(mask2, vcgtq_u16(mc, mk90_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100615
616 // 135 degree
617 const uint16x8_t mk135_0 = vld1q_u16(magnitude - stride_mag + 1);
618 const uint16x8_t mk135_1 = vld1q_u16(magnitude + stride_mag - 1);
619 uint16x8_t mask3 = vceqq_u16(pc16, vdupq_n_u16(3));
Abe Mbise1b993382017-12-19 13:51:59 +0000620 mask3 = vandq_u16(mask3, vcgtq_u16(mc, mk135_0));
621 mask3 = vandq_u16(mask3, vcgtq_u16(mc, mk135_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100622
623 // Merge masks
624 mask0 = vorrq_u16(mask0, mask1);
625 mask2 = vorrq_u16(mask2, mask3);
626 mask0 = vorrq_u16(mask0, mask2);
627
628 mc = vbslq_u16(mask0, mc, vdupq_n_u16(0));
629
630 // mc > upper_thr
631 mask0 = vcgtq_u16(mc, vdupq_n_u16(upper_thr));
632
633 // mc <= lower_thr
634 mask1 = vcleq_u16(mc, vdupq_n_u16(lower_thr));
635
636 // mc <= upper_thr && mc > lower_thr
637 mask2 = vcleq_u16(mc, vdupq_n_u16(upper_thr));
638 mask2 = vandq_u16(mask2, vcgtq_u16(mc, vdupq_n_u16(lower_thr)));
639
640 mc = vbslq_u16(mask0, vdupq_n_u16(EDGE), mc);
641 mc = vbslq_u16(mask1, vdupq_n_u16(NO_EDGE), mc);
642 mc = vbslq_u16(mask2, vdupq_n_u16(MAYBE), mc);
643
644 vst1_u8(output, vmovn_u16(mc));
645}
646
647inline uint16x4_t non_max_U32_helper(const uint32_t *input, const uint16x4_t pc, const uint32_t stride_mag, const int32_t lower_thr, const int32_t upper_thr)
648{
649 // Phase for 4 pixel
650 const uint32x4_t pc32 = vmovl_u16(pc);
651
652 // Get magnitude for 4 pixel
653 uint32x4_t mc = vld1q_u32(input);
654
655 // Angle_quantized: 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
656 // 0 degree
657 const uint32x4_t mk0_0 = vld1q_u32(input - 1);
658 const uint32x4_t mk0_1 = vld1q_u32(input + 1);
659 uint32x4_t mask0 = vceqq_u32(pc32, vdupq_n_u32(0));
Abe Mbise1b993382017-12-19 13:51:59 +0000660 mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_0));
661 mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100662
663 // 45 degree
664 const uint32x4_t mk45_0 = vld1q_u32(input - stride_mag - 1);
665 const uint32x4_t mk45_1 = vld1q_u32(input + stride_mag + 1);
666 uint32x4_t mask1 = vceqq_u32(pc32, vdupq_n_u32(1));
Abe Mbise1b993382017-12-19 13:51:59 +0000667 mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_0));
668 mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100669
670 // 90 degree
671 const uint32x4_t mk90_0 = vld1q_u32(input - stride_mag);
672 const uint32x4_t mk90_1 = vld1q_u32(input + stride_mag);
673 uint32x4_t mask2 = vceqq_u32(pc32, vdupq_n_u32(2));
Abe Mbise1b993382017-12-19 13:51:59 +0000674 mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_0));
675 mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100676
677 // 135 degree
678 const uint32x4_t mk135_0 = vld1q_u32(input - stride_mag + 1);
679 const uint32x4_t mk135_1 = vld1q_u32(input + stride_mag - 1);
680 uint32x4_t mask3 = vceqq_u32(pc32, vdupq_n_u32(3));
Abe Mbise1b993382017-12-19 13:51:59 +0000681 mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_0));
682 mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100683
684 // Merge masks
685 mask0 = vorrq_u32(mask0, mask1);
686 mask2 = vorrq_u32(mask2, mask3);
687 mask0 = vorrq_u32(mask0, mask2);
688
689 mc = vbslq_u32(mask0, mc, vdupq_n_u32(0));
690
691 // mc > upper_thr
692 mask0 = vcgtq_u32(mc, vdupq_n_u32(upper_thr));
693
694 // mc <= lower_thr
695 mask1 = vcleq_u32(mc, vdupq_n_u32(lower_thr));
696
697 // mc <= upper_thr && mc > lower_thr
698 mask2 = vcleq_u32(mc, vdupq_n_u32(upper_thr));
699 mask2 = vandq_u32(mask2, vcgtq_u32(mc, vdupq_n_u32(lower_thr)));
700
701 mc = vbslq_u32(mask0, vdupq_n_u32(EDGE), mc);
702 mc = vbslq_u32(mask1, vdupq_n_u32(NO_EDGE), mc);
703 mc = vbslq_u32(mask2, vdupq_n_u32(MAYBE), mc);
704
705 return vmovn_u32(mc);
706}
707
708/* Computes non-maxima suppression and hysteresis when the gradient_size = 7
709 *
710 * @param[in] magnitude_ptr Pointer to source image. Magnitude. Data type supported U32
711 * @param[in] phase_ptr Pointer to source image. Quantized phase. Data type supported U8
712 * @param[out] output_ptr Pointer to destination image. Data type supported U8
713 * @param[in] stride_mag Stride of magnitude image
714 * @param[in] lower_thr Lower threshold used for the hysteresis
715 * @param[in] upper_thr Upper threshold used for the hysteresis
716 */
717void non_max_suppression_U32_U8_U8(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t lower_thr,
718 const int32_t upper_thr)
719{
720 const auto magnitude = static_cast<const uint32_t *__restrict>(magnitude_ptr);
721 const auto phase = static_cast<const uint8_t *__restrict>(phase_ptr);
722 const auto output = static_cast<uint8_t *__restrict>(output_ptr);
723
724 // Get phase for 8 pixel
725 const uint16x8_t pc16 = vmovl_u8(vld1_u8(phase));
726
727 // Compute non maxima suppression
728 const uint16x4x2_t res =
729 {
730 {
731 non_max_U32_helper(magnitude, vget_low_u16(pc16), stride_mag, lower_thr, upper_thr),
732 non_max_U32_helper(magnitude + 4, vget_high_u16(pc16), stride_mag, lower_thr, upper_thr)
733 }
734 };
735
736 // Store result
737 vst1_u8(output, vmovn_u16(vcombine_u16(res.val[0], res.val[1])));
738}
739
740/* Computes edge tracing when is called by edge_trace_U8_U8 recursively
741 *
742 * @param[in] input Pointer to source image. Data type supported U8
743 * @param[out] output Pointer to destination image. Data type supported U8
744 * @param[in] input_stride Stride of the input image
745 * @param[in] output_stride Stride of the output image
746 */
747void edge_trace_recursive_U8_U8(uint8_t *__restrict input, uint8_t *__restrict output, const int32_t input_stride, const int32_t output_stride)
748{
749 // Look for MAYBE pixels in 8 directions
750 *output = EDGE;
751
752 // (-1, 0)
753 uint8_t pixel = *(input - 1);
754
755 if(pixel == MAYBE)
756 {
757 // Touched a MAYBE point. MAYBE becomes EDGE
758 *(input - 1) = EDGE;
759
760 edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride);
761 }
762
763 // (+1, 0)
764 pixel = *(input + 1);
765
766 if(pixel == MAYBE)
767 {
768 // Touched a MAYBE point. MAYBE becomes EDGE
769 *(input + 1) = EDGE;
770
771 edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride);
772 }
773
774 input -= input_stride;
775 output -= output_stride;
776
777 // (-1, -1)
778 pixel = *(input - 1);
779
780 if(pixel == MAYBE)
781 {
782 // Touched a MAYBE point. MAYBE becomes EDGE
783 *(input - 1) = EDGE;
784
785 edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride);
786 }
787
788 // (0, -1)
789 pixel = *input;
790
791 if(pixel == MAYBE)
792 {
793 // Touched a MAYBE point. MAYBE becomes EDGE
794 *input = EDGE;
795
796 edge_trace_recursive_U8_U8(input, output, input_stride, output_stride);
797 }
798
799 // (+1, -1)
800 pixel = *(input + 1);
801
802 if(pixel == MAYBE)
803 {
804 // Touched a MAYBE point. MAYBE becomes EDGE
805 *(input + 1) = EDGE;
806
807 edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride);
808 }
809
810 input += input_stride * 2;
811 output += output_stride * 2;
812
813 // (-1, +1)
814 pixel = *(input - 1);
815
816 if(pixel == MAYBE)
817 {
818 // Touched a MAYBE point. MAYBE becomes EDGE
819 *(input - 1) = EDGE;
820
821 edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride);
822 }
823
824 // (0, +1)
825 pixel = *input;
826
827 if(pixel == MAYBE)
828 {
829 // Touched a MAYBE point. MAYBE becomes EDGE
830 *input = EDGE;
831
832 edge_trace_recursive_U8_U8(input, output, input_stride, output_stride);
833 }
834
835 // (+1, +1)
836 pixel = *(input + 1);
837
838 if(pixel == MAYBE)
839 {
840 // Touched a MAYBE point. MAYBE becomes EDGE
841 *(input + 1) = EDGE;
842
843 edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride);
844 }
845}
846
847/* Computes edge tracing
848 *
849 * @param[in] input Pointer to source image. Data type supported U8
850 * @param[out] output Pointer to destination image. Data type supported U8
851 * @param[in] input_stride Stride of the input image
852 * @param[in] output_stride Stride of the output image
853 */
854void edge_trace_U8_U8(uint8_t *__restrict input, uint8_t *__restrict output, const int32_t input_stride, const int32_t output_stride)
855{
856 if(*input == NO_EDGE)
857 {
858 *output = NO_EDGE;
859 }
860 // Check if EDGE and not yet touched
861 else if((*input == EDGE) && (*output == NO_EDGE))
862 {
863 edge_trace_recursive_U8_U8(input, output, input_stride, output_stride);
864 }
865}
866} // namespace
867
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100868NEGradientKernel::~NEGradientKernel() = default;
869
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100870NEGradientKernel::NEGradientKernel()
871 : _func(nullptr), _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr)
872{
873}
874
875void NEGradientKernel::configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type)
876{
877 ARM_COMPUTE_ERROR_ON_NULLPTR(gx, gy, magnitude, phase);
878
879 set_shape_if_empty(*magnitude->info(), gx->info()->tensor_shape());
880 set_shape_if_empty(*phase->info(), gx->info()->tensor_shape());
881
882 Format magnitude_format = gx->info()->data_type() == DataType::S16 ? Format::U16 : Format::U32;
883 set_format_if_unknown(*magnitude->info(), magnitude_format);
884 set_format_if_unknown(*phase->info(), Format::U8);
885
886 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(gx, gy, magnitude, phase);
887 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16, DataType::S32);
888 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16, DataType::S32);
889 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32);
890 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8);
891 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(gx, gy);
892 ARM_COMPUTE_ERROR_ON_MSG(element_size_from_data_type(gx->info()->data_type()) != element_size_from_data_type(magnitude->info()->data_type()), "Magnitude must have the same element size as Gx and Gy");
893
894 _gx = gx;
895 _gy = gy;
896 _magnitude = magnitude;
897 _phase = phase;
898
899 if(_gx->info()->data_type() == DataType::S16)
900 {
901 if(norm_type == 1)
902 {
903 _func = &mag_phase_l1norm_S16_S16_U16_U8;
904 }
905 else
906 {
907 _func = &mag_phase_l2norm_S16_S16_U16_U8;
908 }
909 }
910 else
911 {
912 if(norm_type == 1)
913 {
914 _func = &mag_phase_l1norm_S32_S32_U32_U8;
915 }
916 else
917 {
918 _func = &mag_phase_l2norm_S32_S32_U32_U8;
919 }
920 }
921
922 constexpr unsigned int num_elems_processed_per_iteration = 32;
923
924 // Configure kernel window
925 Window win = calculate_max_window(*_gx->info(), Steps(num_elems_processed_per_iteration));
926
927 AccessWindowHorizontal gx_access(_gx->info(), 0, num_elems_processed_per_iteration);
928 AccessWindowHorizontal gy_access(_gy->info(), 0, num_elems_processed_per_iteration);
929 AccessWindowHorizontal mag_access(_magnitude->info(), 0, num_elems_processed_per_iteration);
930 AccessWindowHorizontal phase_access(_phase->info(), 0, num_elems_processed_per_iteration);
931
Michalis Spyrouebdde652019-07-08 11:52:46 +0100932 ARM_COMPUTE_UNUSED(update_window_and_padding(win, gx_access, gy_access, mag_access, phase_access));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100933
934 mag_access.set_valid_region(win, _gx->info()->valid_region());
935 phase_access.set_valid_region(win, _gx->info()->valid_region());
936
937 INEKernel::configure(win);
938}
939
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100940void NEGradientKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100941{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100942 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100943 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
944 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
945 ARM_COMPUTE_ERROR_ON(_func == nullptr);
946 Iterator gx(_gx, window);
947 Iterator gy(_gy, window);
948 Iterator magnitude(_magnitude, window);
949 Iterator phase(_phase, window);
950
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100951 execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100952 {
953 (*_func)(gx.ptr(), gy.ptr(), magnitude.ptr(), phase.ptr());
954 },
955 gx, gy, magnitude, phase);
956}
957
Michalis Spyrouebcebf12020-10-21 00:04:14 +0100958NEEdgeNonMaxSuppressionKernel::~NEEdgeNonMaxSuppressionKernel() = default;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100959NEEdgeNonMaxSuppressionKernel::NEEdgeNonMaxSuppressionKernel()
960 : _func(nullptr), _magnitude(nullptr), _phase(nullptr), _output(nullptr), _lower_thr(0), _upper_thr(0)
961{
962}
963
964BorderSize NEEdgeNonMaxSuppressionKernel::border_size() const
965{
966 return BorderSize(1);
967}
968
969void NEEdgeNonMaxSuppressionKernel::configure(const ITensor *magnitude, const ITensor *phase, ITensor *output,
970 int32_t upper_thr, int32_t lower_thr, bool border_undefined)
971{
972 ARM_COMPUTE_ERROR_ON_NULLPTR(magnitude, phase, output);
973
974 set_shape_if_empty(*output->info(), magnitude->info()->tensor_shape());
975
976 set_format_if_unknown(*phase->info(), Format::U8);
977 set_format_if_unknown(*output->info(), Format::U8);
978
979 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(magnitude, phase, output);
980 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32);
981 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8);
982 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
983 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(phase, output);
984
985 _magnitude = magnitude;
986 _phase = phase;
987 _output = output;
988
989 switch(_magnitude->info()->data_type())
990 {
991 case DataType::U16:
992 _func = &non_max_suppression_U16_U8_U8;
993 break;
994 case DataType::U32:
995 _func = &non_max_suppression_U32_U8_U8;
996 break;
997 default:
998 ARM_COMPUTE_ERROR("Unsupported data type!");
999 }
1000
1001 // Set thresholds
1002 _lower_thr = lower_thr;
1003 _upper_thr = upper_thr;
1004
1005 constexpr unsigned int num_elems_processed_per_iteration = 8;
1006 constexpr unsigned int num_elems_read_per_iteration = 10;
1007 constexpr unsigned int num_rows_read_per_iteration = 3;
1008
1009 // Configure kernel window
1010 Window win = calculate_max_window(*_magnitude->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
1011
1012 AccessWindowRectangle mag_access(_magnitude->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
1013 AccessWindowHorizontal phase_access(_phase->info(), 0, num_elems_processed_per_iteration);
1014 AccessWindowHorizontal output_access(_output->info(), 0, num_elems_processed_per_iteration);
1015
1016 update_window_and_padding(win, mag_access, phase_access, output_access);
1017
1018 output_access.set_valid_region(win, _magnitude->info()->valid_region(), border_undefined, border_size());
1019
1020 INEKernel::configure(win);
1021}
1022
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001023void NEEdgeNonMaxSuppressionKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001024{
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001025 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001026 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
1027 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
1028 ARM_COMPUTE_ERROR_ON(_func == nullptr);
1029 Iterator magnitude(_magnitude, window);
1030 Iterator phase(_phase, window);
1031 Iterator output(_output, window);
1032
1033 const size_t input1_stride = _magnitude->info()->strides_in_bytes()[1];
1034 const size_t input1_stride_ushort = input1_stride / data_size_from_type(_magnitude->info()->data_type());
1035
Michalis Spyroua4f378d2019-04-26 14:54:54 +01001036 execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001037 {
1038 (*_func)(magnitude.ptr(), phase.ptr(), output.ptr(), input1_stride_ushort, _lower_thr, _upper_thr);
1039 },
1040 magnitude, phase, output);
1041}
1042
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001043NEEdgeTraceKernel::~NEEdgeTraceKernel() = default;
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001044NEEdgeTraceKernel::NEEdgeTraceKernel()
1045 : _input(nullptr), _output(nullptr)
1046{
1047}
1048
1049BorderSize NEEdgeTraceKernel::border_size() const
1050{
1051 return BorderSize(1);
1052}
1053
1054bool NEEdgeTraceKernel::is_parallelisable() const
1055{
1056 return false;
1057}
1058
1059void NEEdgeTraceKernel::configure(ITensor *input, ITensor *output)
1060{
1061 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
1062
1063 set_shape_if_empty(*output->info(), input->info()->tensor_shape());
1064
1065 set_format_if_unknown(*input->info(), Format::U8);
1066 set_format_if_unknown(*output->info(), Format::U8);
1067
1068 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
1069 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
1070 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
1071 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
1072
1073 _input = input;
1074 _output = output;
1075
1076 constexpr unsigned int num_elems_processed_per_iteration = 1;
1077
1078 // Configure kernel window
1079 Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration));
1080
1081 const ValidRegion &input_valid_region = input->info()->valid_region();
1082 const ValidRegion &output_valid_region = output->info()->valid_region();
1083
1084 // Reads can occur within the valid region of the input + border
1085 AccessWindowStatic input_access(input->info(),
1086 input_valid_region.anchor[0] - border_size().left,
1087 input_valid_region.anchor[1] - border_size().top,
1088 input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size().right,
1089 input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size().bottom);
1090
1091 // Writes can occur within the valid region of the output + border
1092 AccessWindowStatic output_access(output->info(),
1093 output_valid_region.anchor[0] - border_size().left,
1094 output_valid_region.anchor[1] - border_size().top,
1095 output_valid_region.anchor[0] + output_valid_region.shape[0] + border_size().right,
1096 output_valid_region.anchor[1] + output_valid_region.shape[1] + border_size().bottom);
1097
1098 update_window_and_padding(win, input_access, output_access);
1099
1100 output_access.set_valid_region(win, _input->info()->valid_region());
1101
1102 INEKernel::configure(win);
1103}
1104
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001105void NEEdgeTraceKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001106{
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001107 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001108 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
1109 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
1110 Iterator input(_input, window);
1111 Iterator output(_output, window);
1112
1113 const size_t input_stride = _input->info()->strides_in_bytes()[1];
1114 const size_t output_stride = _output->info()->strides_in_bytes()[1];
1115
Michalis Spyroua4f378d2019-04-26 14:54:54 +01001116 execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001117 {
1118 edge_trace_U8_U8(input.ptr(), output.ptr(), input_stride, output_stride);
1119 },
1120 input, output);
1121}
Michalis Spyrouebcebf12020-10-21 00:04:14 +01001122} // namespace arm_compute