blob: 8d822bd7010ffec229811937b603e7d2de47dcea [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Michalis Spyroua4f378d2019-04-26 14:54:54 +01002 * Copyright (c) 2016-2019 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h"
25
26#include "arm_compute/core/AccessWindowStatic.h"
27#include "arm_compute/core/Error.h"
28#include "arm_compute/core/Helpers.h"
29#include "arm_compute/core/ITensor.h"
30#include "arm_compute/core/TensorInfo.h"
31#include "arm_compute/core/Types.h"
32#include "arm_compute/core/Utils.h"
33#include "arm_compute/core/Validate.h"
34
35#include <arm_neon.h>
36#include <cstddef>
37#include <cstdint>
38#include <tuple>
39
40using namespace arm_compute;
41
42namespace arm_compute
43{
44class Coordinates;
45} // namespace arm_compute
46
47namespace
48{
49constexpr int NO_EDGE = 0;
50constexpr int EDGE = 255;
51constexpr int MAYBE = 127;
52} // namespace
53
Anthony Barbier6ff3b192017-09-04 18:44:23 +010054namespace
55{
56inline uint8x8_t phase_quantization(const float32x4x2_t &gx, const float32x4x2_t &gy)
57{
58 // Constant use for evaluating score1 and score3
59 static const float32x4_t const45 = vdupq_n_f32(0.70710678118655f);
60 static const float32x4_t zero = vdupq_n_f32(0.0f);
61 static const float32x4_t one = vdupq_n_f32(1.0f);
62 static const float32x4_t two = vdupq_n_f32(2.0f);
63 static const float32x4_t three = vdupq_n_f32(3.0f);
64
65 // Score0: (1, 0)
66 const float32x4x2_t score0 =
67 {
68 {
69 vabsq_f32(gx.val[0]),
70 vabsq_f32(gx.val[1])
71 }
72 };
73
74 // Score2: ( 0, 1 )
75 const float32x4x2_t score2 =
76 {
77 {
78 vabsq_f32(gy.val[0]),
79 vabsq_f32(gy.val[1])
80 }
81 };
82
83 // Score1 and Score3: ( sqrt(2) / 2, sqrt(2) / 2 ) - ( -sqrt(2) / 2, sqrt(2) / 2 )
84 float32x4x2_t score1 =
85 {
86 {
87 vmulq_f32(gy.val[0], const45),
88 vmulq_f32(gy.val[1], const45)
89 }
90 };
91
92 float32x4x2_t score3 = score1;
93
94 score1.val[0] = vmlaq_f32(score1.val[0], gx.val[0], const45);
95 score1.val[1] = vmlaq_f32(score1.val[1], gx.val[1], const45);
96 score3.val[0] = vmlsq_f32(score3.val[0], gx.val[0], const45);
97 score3.val[1] = vmlsq_f32(score3.val[1], gx.val[1], const45);
98
99 score1.val[0] = vabsq_f32(score1.val[0]);
100 score1.val[1] = vabsq_f32(score1.val[1]);
101 score3.val[0] = vabsq_f32(score3.val[0]);
102 score3.val[1] = vabsq_f32(score3.val[1]);
103
104 float32x4x2_t phase =
105 {
106 {
107 zero,
108 zero
109 }
110 };
111
112 float32x4x2_t old_score = score0;
113
114 // score1 > old_score?
115 uint32x4x2_t mask =
116 {
117 {
118 vcgtq_f32(score1.val[0], old_score.val[0]),
119 vcgtq_f32(score1.val[1], old_score.val[1])
120 }
121 };
122
123 phase.val[0] = vbslq_f32(mask.val[0], one, phase.val[0]);
124 phase.val[1] = vbslq_f32(mask.val[1], one, phase.val[1]);
125 old_score.val[0] = vbslq_f32(mask.val[0], score1.val[0], old_score.val[0]);
126 old_score.val[1] = vbslq_f32(mask.val[1], score1.val[1], old_score.val[1]);
127
128 // score2 > old_score?
129 mask.val[0] = vcgtq_f32(score2.val[0], old_score.val[0]);
130 mask.val[1] = vcgtq_f32(score2.val[1], old_score.val[1]);
131
132 phase.val[0] = vbslq_f32(mask.val[0], two, phase.val[0]);
133 phase.val[1] = vbslq_f32(mask.val[1], two, phase.val[1]);
134 old_score.val[0] = vbslq_f32(mask.val[0], score2.val[0], old_score.val[0]);
135 old_score.val[1] = vbslq_f32(mask.val[1], score2.val[1], old_score.val[1]);
136
137 // score3 > old_score?
138 mask.val[0] = vcgtq_f32(score3.val[0], old_score.val[0]);
139 mask.val[1] = vcgtq_f32(score3.val[1], old_score.val[1]);
140
141 phase.val[0] = vbslq_f32(mask.val[0], three, phase.val[0]);
142 phase.val[1] = vbslq_f32(mask.val[1], three, phase.val[1]);
143 old_score.val[0] = vbslq_f32(mask.val[0], score3.val[0], old_score.val[0]);
144 old_score.val[1] = vbslq_f32(mask.val[1], score3.val[1], old_score.val[1]);
145
146 // Convert from float32x4_t to uint8x8_t
147 return vmovn_u16(vcombine_u16(vmovn_u32(vcvtq_u32_f32(phase.val[0])),
148 vmovn_u32(vcvtq_u32_f32(phase.val[1]))));
149}
150
151/* Computes the gradient phase if gradient_size = 3 or 5. The output is quantized.
152 * 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
153 *
154 * @param[in] gx Gx component
155 * @param[in] gy Gy component
156 *
157 * @return quantized phase for 8 pixels
158 */
159inline uint8x8_t phase_quantization_S16_S16(int16x8_t gx, int16x8_t gy)
160{
161 // Convert to float
162 const float32x4x2_t gx_f32 =
163 {
164 {
165 vcvtq_f32_s32(vmovl_s16(vget_low_s16(gx))),
166 vcvtq_f32_s32(vmovl_s16(vget_high_s16(gx)))
167 }
168 };
169
170 const float32x4x2_t gy_f32 =
171 {
172 {
173 vcvtq_f32_s32(vmovl_s16(vget_low_s16(gy))),
174 vcvtq_f32_s32(vmovl_s16(vget_high_s16(gy)))
175 }
176 };
177
178 return phase_quantization(gx_f32, gy_f32);
179}
180
181/* Computes the gradient phase if gradient_size = 7. The output is quantized.
182 * 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
183 *
184 * @param[in] gx Gx component
185 * @param[in] gy Gy component
186 *
187 * @return quantized phase for 8 pixels
188 */
189inline uint8x8_t phase_quantization_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy)
190{
191 // Convert to float
192 const float32x4x2_t gx_f32 =
193 {
194 {
195 vcvtq_f32_s32(gx.val[0]),
196 vcvtq_f32_s32(gx.val[1])
197 }
198 };
199
200 const float32x4x2_t gy_f32 =
201 {
202 {
203 vcvtq_f32_s32(gy.val[0]),
204 vcvtq_f32_s32(gy.val[1])
205 }
206 };
207
208 return phase_quantization(gx_f32, gy_f32);
209}
210
211/* Computes the magnitude using the L1-norm type if gradient_size = 3 or 5
212 *
213 * @param[in] gx Gx component
214 * @param[in] gy Gy component
215 *
216 * @return magnitude for 8 pixels
217 */
218inline uint16x8_t mag_l1_S16_S16(int16x8_t gx, int16x8_t gy)
219{
220 return vaddq_u16(vreinterpretq_u16_s16(vabsq_s16(gx)),
221 vreinterpretq_u16_s16(vabsq_s16(gy)));
222}
223
224/* Computes the magnitude using the L1-norm type if gradient_size = 7
225 *
226 * @param[in] gx Gx component
227 * @param[in] gy Gy component
228 *
229 * @return magnitude for 8 pixels
230 */
231inline uint32x4x2_t mag_l1_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy)
232{
233 const uint32x4x2_t gx_abs =
234 {
235 {
236 vreinterpretq_u32_s32(vabsq_s32(gx.val[0])),
237 vreinterpretq_u32_s32(vabsq_s32(gx.val[1]))
238 }
239 };
240
241 const uint32x4x2_t gy_abs =
242 {
243 {
244 vreinterpretq_u32_s32(vabsq_s32(gy.val[0])),
245 vreinterpretq_u32_s32(vabsq_s32(gy.val[1]))
246 }
247 };
248
249 const uint32x4x2_t output =
250 {
251 {
252 vaddq_u32(gx_abs.val[0], gy_abs.val[0]),
253 vaddq_u32(gx_abs.val[1], gy_abs.val[1])
254 }
255 };
256
257 return output;
258}
259
260inline float32x4x2_t mag_l2(const float32x4x2_t &gx, const float32x4x2_t &gy)
261{
262 // x^2 ...
263 float32x4x2_t magnitude =
264 {
265 {
266 vmulq_f32(gx.val[0], gx.val[0]),
267 vmulq_f32(gx.val[1], gx.val[1])
268 }
269 };
270
271 // ... + y^2
272 magnitude.val[0] = vmlaq_f32(magnitude.val[0], gy.val[0], gy.val[0]);
273 magnitude.val[1] = vmlaq_f32(magnitude.val[1], gy.val[1], gy.val[1]);
274
275 // sqrt(...)
276 magnitude.val[0] = vmulq_f32(vrsqrteq_f32(magnitude.val[0]), magnitude.val[0]);
277 magnitude.val[1] = vmulq_f32(vrsqrteq_f32(magnitude.val[1]), magnitude.val[1]);
278
279 return magnitude;
280}
281
282/* Computes the magnitude using L2-norm if gradient_size = 3 or 5
283 *
284 * @param[in] gx Gx component
285 * @param[in] gy Gy component
286 *
287 * @return magnitude for 8 pixels
288 */
289inline uint16x8_t mag_l2_S16_S16(int16x8_t gx, int16x8_t gy)
290{
291 // Compute magnitude using L2 normalization
292 const float32x4x2_t gx2 =
293 {
294 {
295 vcvtq_f32_s32(vmovl_s16(vget_low_s16(gx))),
296 vcvtq_f32_s32(vmovl_s16(vget_high_s16(gx)))
297 }
298 };
299
300 const float32x4x2_t gy2 =
301 {
302 {
303 vcvtq_f32_s32(vmovl_s16(vget_low_s16(gy))),
304 vcvtq_f32_s32(vmovl_s16(vget_high_s16(gy)))
305 }
306 };
307
308 const float32x4x2_t magnitude = mag_l2(gx2, gy2);
309
310 // Store magnitude - Convert to uint16x8
311 return vcombine_u16(vmovn_u32(vcvtq_u32_f32(magnitude.val[0])),
312 vmovn_u32(vcvtq_u32_f32(magnitude.val[1])));
313}
314
315/* Computes the magnitude using L2-norm if gradient_size = 7
316 *
317 * @param[in] gx Gx component
318 * @param[in] gy Gy component
319 *
320 * @return magnitude for 8 pixels
321 */
322inline uint32x4x2_t mag_l2_S32_S32(const int32x4x2_t &gx, const int32x4x2_t &gy)
323{
324 // Compute magnitude using L2 normalization
325 float32x4x2_t gx2 =
326 {
327 {
328 vcvtq_f32_s32(gx.val[0]),
329 vcvtq_f32_s32(gx.val[1])
330 }
331 };
332
333 float32x4x2_t gy2 =
334 {
335 {
336 vcvtq_f32_s32(gy.val[0]),
337 vcvtq_f32_s32(gy.val[1])
338 }
339 };
340
341 const float32x4x2_t magnitude = mag_l2(gx2, gy2);
342 const uint32x4x2_t mag32 =
343 {
344 {
345 vcvtq_u32_f32(magnitude.val[0]),
346 vcvtq_u32_f32(magnitude.val[1])
347 }
348 };
349
350 return mag32;
351}
352
353/* Gradient function used when the gradient size = 3 or 5 and when the norm_type = L1-norm
354 *
355 * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S16
356 * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S16
357 * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U16
358 * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type supported U8
359 */
360void mag_phase_l1norm_S16_S16_U16_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr)
361{
362 const auto gx = static_cast<const int16_t *__restrict>(gx_ptr);
363 const auto gy = static_cast<const int16_t *__restrict>(gy_ptr);
364 const auto magnitude = static_cast<uint16_t *__restrict>(magnitude_ptr);
365 const auto phase = static_cast<uint8_t *__restrict>(phase_ptr);
366
367 const int16x8x4_t gx_val =
368 {
369 {
370 vld1q_s16(gx),
371 vld1q_s16(gx + 8),
372 vld1q_s16(gx + 16),
373 vld1q_s16(gx + 24)
374 }
375 };
376
377 const int16x8x4_t gy_val =
378 {
379 {
380 vld1q_s16(gy),
381 vld1q_s16(gy + 8),
382 vld1q_s16(gy + 16),
383 vld1q_s16(gy + 24)
384 }
385 };
386
387 // Compute and store phase
388 vst1_u8(phase + 0, phase_quantization_S16_S16(gx_val.val[0], gy_val.val[0]));
389 vst1_u8(phase + 8, phase_quantization_S16_S16(gx_val.val[1], gy_val.val[1]));
390 vst1_u8(phase + 16, phase_quantization_S16_S16(gx_val.val[2], gy_val.val[2]));
391 vst1_u8(phase + 24, phase_quantization_S16_S16(gx_val.val[3], gy_val.val[3]));
392
393 // Compute ans store magnitude using L1 normalization
394 vst1q_u16(magnitude + 0, mag_l1_S16_S16(gx_val.val[0], gy_val.val[0]));
395 vst1q_u16(magnitude + 8, mag_l1_S16_S16(gx_val.val[1], gy_val.val[1]));
396 vst1q_u16(magnitude + 16, mag_l1_S16_S16(gx_val.val[2], gy_val.val[2]));
397 vst1q_u16(magnitude + 24, mag_l1_S16_S16(gx_val.val[3], gy_val.val[3]));
398}
399
400/* Gradient function used when the gradient size = 3 or 5 and when the norm_type = L2-norm
401 *
402 * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S16
403 * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S16
404 * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U16
405 * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type supported U8
406 */
407void mag_phase_l2norm_S16_S16_U16_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr)
408{
409 const auto gx = static_cast<const int16_t *__restrict>(gx_ptr);
410 const auto gy = static_cast<const int16_t *__restrict>(gy_ptr);
411 const auto magnitude = static_cast<uint16_t *__restrict>(magnitude_ptr);
412 const auto phase = static_cast<uint8_t *__restrict>(phase_ptr);
413
414 const int16x8x4_t gx_val =
415 {
416 {
417 vld1q_s16(gx),
418 vld1q_s16(gx + 8),
419 vld1q_s16(gx + 16),
420 vld1q_s16(gx + 24)
421 }
422 };
423
424 const int16x8x4_t gy_val =
425 {
426 {
427 vld1q_s16(gy),
428 vld1q_s16(gy + 8),
429 vld1q_s16(gy + 16),
430 vld1q_s16(gy + 24)
431 }
432 };
433
434 // Compute and store phase
435 vst1_u8(phase + 0, phase_quantization_S16_S16(gx_val.val[0], gy_val.val[0]));
436 vst1_u8(phase + 8, phase_quantization_S16_S16(gx_val.val[1], gy_val.val[1]));
437 vst1_u8(phase + 16, phase_quantization_S16_S16(gx_val.val[2], gy_val.val[2]));
438 vst1_u8(phase + 24, phase_quantization_S16_S16(gx_val.val[3], gy_val.val[3]));
439
440 // Compute and store magnitude using L2 normalization
441 vst1q_u16(magnitude + 0, mag_l2_S16_S16(gx_val.val[0], gy_val.val[0]));
442 vst1q_u16(magnitude + 8, mag_l2_S16_S16(gx_val.val[1], gy_val.val[1]));
443 vst1q_u16(magnitude + 16, mag_l2_S16_S16(gx_val.val[2], gy_val.val[2]));
444 vst1q_u16(magnitude + 24, mag_l2_S16_S16(gx_val.val[3], gy_val.val[3]));
445}
446
447/* Gradient function used when the gradient size = 7 and when the norm_type = L1-norm
448 *
449 * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S32
450 * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S32
451 * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U32
452 * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type support U8
453 */
454void mag_phase_l1norm_S32_S32_U32_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr)
455{
456 auto gx = static_cast<const int32_t *__restrict>(gx_ptr);
457 auto gy = static_cast<const int32_t *__restrict>(gy_ptr);
458 auto magnitude = static_cast<uint32_t *__restrict>(magnitude_ptr);
459 auto phase = static_cast<uint8_t *__restrict>(phase_ptr);
460
461 // Process low and high part
462 for(size_t i = 0; i < 2; ++i, gx += 16, gy += 16, magnitude += 16, phase += 16)
463 {
464 const int32x4x2_t gx0 =
465 {
466 {
467 vld1q_s32(gx + 0),
468 vld1q_s32(gx + 4)
469 }
470 };
471
472 const int32x4x2_t gx1 =
473 {
474 {
475 vld1q_s32(gx + 8),
476 vld1q_s32(gx + 12)
477 }
478 };
479
480 const int32x4x2_t gy0 =
481 {
482 {
483 vld1q_s32(gy + 0),
484 vld1q_s32(gy + 4)
485 }
486 };
487
488 const int32x4x2_t gy1 =
489 {
490 {
491 vld1q_s32(gy + 8),
492 vld1q_s32(gy + 12)
493 }
494 };
495
496 // Compute and store phase
497 vst1_u8(phase + 0, phase_quantization_S32_S32(gx0, gy0));
498 vst1_u8(phase + 8, phase_quantization_S32_S32(gx1, gy1));
499
500 // Compute magnitude using L1 normalization
501 const uint32x4x2_t mag0 = mag_l1_S32_S32(gx0, gy0);
502 const uint32x4x2_t mag1 = mag_l1_S32_S32(gx1, gy1);
503
504 // Store magnitude
505 vst1q_u32(magnitude + 0, mag0.val[0]);
506 vst1q_u32(magnitude + 4, mag0.val[1]);
507 vst1q_u32(magnitude + 8, mag1.val[0]);
508 vst1q_u32(magnitude + 12, mag1.val[1]);
509 }
510}
511
512/* Gradient function used when the gradient size = 7 and when the norm_type = L2-norm
513 *
514 * @param[in] gx_ptr Pointer to source image. Gx image. Data type supported S32
515 * @param[in] gy_ptr Pointer to source image. Gy image. Data type supported S32
516 * @param[out] magnitude_ptr Pointer to destination image. Magnitude. Data type supported U32
517 * @param[out] phase_ptr Pointer to destination image. Quantized phase. Data type supported U8
518 */
519void mag_phase_l2norm_S32_S32_U32_U8(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr)
520{
521 auto gx = static_cast<const int32_t *__restrict>(gx_ptr);
522 auto gy = static_cast<const int32_t *__restrict>(gy_ptr);
523 auto magnitude = static_cast<uint32_t *__restrict>(magnitude_ptr);
524 auto phase = static_cast<uint8_t *__restrict>(phase_ptr);
525
526 // Process low and high part
527 for(size_t i = 0; i < 2; ++i, gx += 16, gy += 16, magnitude += 16, phase += 16)
528 {
529 const int32x4x2_t gx0 =
530 {
531 {
532 vld1q_s32(gx + 0),
533 vld1q_s32(gx + 4)
534 }
535 };
536
537 const int32x4x2_t gx1 =
538 {
539 {
540 vld1q_s32(gx + 8),
541 vld1q_s32(gx + 12)
542 }
543 };
544
545 const int32x4x2_t gy0 =
546 {
547 {
548 vld1q_s32(gy + 0),
549 vld1q_s32(gy + 4)
550 }
551 };
552
553 const int32x4x2_t gy1 =
554 {
555 {
556 vld1q_s32(gy + 8),
557 vld1q_s32(gy + 12)
558 }
559 };
560
561 // Compute and store phase
562 vst1_u8(phase + 0, phase_quantization_S32_S32(gx0, gy0));
563 vst1_u8(phase + 8, phase_quantization_S32_S32(gx1, gy1));
564
565 // Compute magnitude using L2 normalization
566 const uint32x4x2_t mag0 = mag_l2_S32_S32(gx0, gy0);
567 const uint32x4x2_t mag1 = mag_l2_S32_S32(gx1, gy1);
568
569 // Store magnitude
570 vst1q_u32(magnitude + 0, mag0.val[0]);
571 vst1q_u32(magnitude + 4, mag0.val[1]);
572 vst1q_u32(magnitude + 8, mag1.val[0]);
573 vst1q_u32(magnitude + 12, mag1.val[1]);
574 }
575}
576
577/* Computes non-maxima suppression and hysteresis when the gradient size = 3 or 5
578 *
579 * @param[in] magnitude_ptr Pointer to source image. Magnitude. Data type supported U16
580 * @param[in] phase_ptr Pointer to source image. Quantized phase. Data type supported U8
581 * @param[out] output_ptr Pointer to output image. Data type supported U8
582 * @param[in] stride_mag Stride of magnitude image
583 * @param[in] lower_thr Lower threshold used for the hysteresis
584 * @param[in] upper_thr Upper threshold used for the hysteresis
585 */
586void non_max_suppression_U16_U8_U8(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t lower_thr,
587 const int32_t upper_thr)
588{
589 const auto magnitude = static_cast<const uint16_t *__restrict>(magnitude_ptr);
590 const auto phase = static_cast<const uint8_t *__restrict>(phase_ptr);
591 const auto output = static_cast<uint8_t *__restrict>(output_ptr);
592
593 // Get magnitude and phase of the centre pixels
594 uint16x8_t mc = vld1q_u16(magnitude);
595
596 // Angle_quantized: 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
597 const uint16x8_t pc16 = vmovl_u8(vld1_u8(phase));
598
599 // 0 degree
600 const uint16x8_t mk0_0 = vld1q_u16(magnitude - 1);
601 const uint16x8_t mk0_1 = vld1q_u16(magnitude + 1);
602 uint16x8_t mask0 = vceqq_u16(pc16, vdupq_n_u16(0));
Abe Mbise1b993382017-12-19 13:51:59 +0000603 mask0 = vandq_u16(mask0, vcgtq_u16(mc, mk0_0));
604 mask0 = vandq_u16(mask0, vcgtq_u16(mc, mk0_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100605
606 // 45 degree
607 const uint16x8_t mk45_0 = vld1q_u16(magnitude - stride_mag - 1);
608 const uint16x8_t mk45_1 = vld1q_u16(magnitude + stride_mag + 1);
609 uint16x8_t mask1 = vceqq_u16(pc16, vdupq_n_u16(1));
Abe Mbise1b993382017-12-19 13:51:59 +0000610 mask1 = vandq_u16(mask1, vcgtq_u16(mc, mk45_0));
611 mask1 = vandq_u16(mask1, vcgtq_u16(mc, mk45_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100612
613 // 90 degree
614 const uint16x8_t mk90_0 = vld1q_u16(magnitude - stride_mag);
615 const uint16x8_t mk90_1 = vld1q_u16(magnitude + stride_mag);
616 uint16x8_t mask2 = vceqq_u16(pc16, vdupq_n_u16(2));
Abe Mbise1b993382017-12-19 13:51:59 +0000617 mask2 = vandq_u16(mask2, vcgtq_u16(mc, mk90_0));
618 mask2 = vandq_u16(mask2, vcgtq_u16(mc, mk90_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100619
620 // 135 degree
621 const uint16x8_t mk135_0 = vld1q_u16(magnitude - stride_mag + 1);
622 const uint16x8_t mk135_1 = vld1q_u16(magnitude + stride_mag - 1);
623 uint16x8_t mask3 = vceqq_u16(pc16, vdupq_n_u16(3));
Abe Mbise1b993382017-12-19 13:51:59 +0000624 mask3 = vandq_u16(mask3, vcgtq_u16(mc, mk135_0));
625 mask3 = vandq_u16(mask3, vcgtq_u16(mc, mk135_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100626
627 // Merge masks
628 mask0 = vorrq_u16(mask0, mask1);
629 mask2 = vorrq_u16(mask2, mask3);
630 mask0 = vorrq_u16(mask0, mask2);
631
632 mc = vbslq_u16(mask0, mc, vdupq_n_u16(0));
633
634 // mc > upper_thr
635 mask0 = vcgtq_u16(mc, vdupq_n_u16(upper_thr));
636
637 // mc <= lower_thr
638 mask1 = vcleq_u16(mc, vdupq_n_u16(lower_thr));
639
640 // mc <= upper_thr && mc > lower_thr
641 mask2 = vcleq_u16(mc, vdupq_n_u16(upper_thr));
642 mask2 = vandq_u16(mask2, vcgtq_u16(mc, vdupq_n_u16(lower_thr)));
643
644 mc = vbslq_u16(mask0, vdupq_n_u16(EDGE), mc);
645 mc = vbslq_u16(mask1, vdupq_n_u16(NO_EDGE), mc);
646 mc = vbslq_u16(mask2, vdupq_n_u16(MAYBE), mc);
647
648 vst1_u8(output, vmovn_u16(mc));
649}
650
651inline uint16x4_t non_max_U32_helper(const uint32_t *input, const uint16x4_t pc, const uint32_t stride_mag, const int32_t lower_thr, const int32_t upper_thr)
652{
653 // Phase for 4 pixel
654 const uint32x4_t pc32 = vmovl_u16(pc);
655
656 // Get magnitude for 4 pixel
657 uint32x4_t mc = vld1q_u32(input);
658
659 // Angle_quantized: 0 = 0°, 1 = 45°, 2 = 90°, 3 = 135°
660 // 0 degree
661 const uint32x4_t mk0_0 = vld1q_u32(input - 1);
662 const uint32x4_t mk0_1 = vld1q_u32(input + 1);
663 uint32x4_t mask0 = vceqq_u32(pc32, vdupq_n_u32(0));
Abe Mbise1b993382017-12-19 13:51:59 +0000664 mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_0));
665 mask0 = vandq_u32(mask0, vcgtq_u32(mc, mk0_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100666
667 // 45 degree
668 const uint32x4_t mk45_0 = vld1q_u32(input - stride_mag - 1);
669 const uint32x4_t mk45_1 = vld1q_u32(input + stride_mag + 1);
670 uint32x4_t mask1 = vceqq_u32(pc32, vdupq_n_u32(1));
Abe Mbise1b993382017-12-19 13:51:59 +0000671 mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_0));
672 mask1 = vandq_u32(mask1, vcgtq_u32(mc, mk45_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100673
674 // 90 degree
675 const uint32x4_t mk90_0 = vld1q_u32(input - stride_mag);
676 const uint32x4_t mk90_1 = vld1q_u32(input + stride_mag);
677 uint32x4_t mask2 = vceqq_u32(pc32, vdupq_n_u32(2));
Abe Mbise1b993382017-12-19 13:51:59 +0000678 mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_0));
679 mask2 = vandq_u32(mask2, vcgtq_u32(mc, mk90_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100680
681 // 135 degree
682 const uint32x4_t mk135_0 = vld1q_u32(input - stride_mag + 1);
683 const uint32x4_t mk135_1 = vld1q_u32(input + stride_mag - 1);
684 uint32x4_t mask3 = vceqq_u32(pc32, vdupq_n_u32(3));
Abe Mbise1b993382017-12-19 13:51:59 +0000685 mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_0));
686 mask3 = vandq_u32(mask3, vcgtq_u32(mc, mk135_1));
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100687
688 // Merge masks
689 mask0 = vorrq_u32(mask0, mask1);
690 mask2 = vorrq_u32(mask2, mask3);
691 mask0 = vorrq_u32(mask0, mask2);
692
693 mc = vbslq_u32(mask0, mc, vdupq_n_u32(0));
694
695 // mc > upper_thr
696 mask0 = vcgtq_u32(mc, vdupq_n_u32(upper_thr));
697
698 // mc <= lower_thr
699 mask1 = vcleq_u32(mc, vdupq_n_u32(lower_thr));
700
701 // mc <= upper_thr && mc > lower_thr
702 mask2 = vcleq_u32(mc, vdupq_n_u32(upper_thr));
703 mask2 = vandq_u32(mask2, vcgtq_u32(mc, vdupq_n_u32(lower_thr)));
704
705 mc = vbslq_u32(mask0, vdupq_n_u32(EDGE), mc);
706 mc = vbslq_u32(mask1, vdupq_n_u32(NO_EDGE), mc);
707 mc = vbslq_u32(mask2, vdupq_n_u32(MAYBE), mc);
708
709 return vmovn_u32(mc);
710}
711
712/* Computes non-maxima suppression and hysteresis when the gradient_size = 7
713 *
714 * @param[in] magnitude_ptr Pointer to source image. Magnitude. Data type supported U32
715 * @param[in] phase_ptr Pointer to source image. Quantized phase. Data type supported U8
716 * @param[out] output_ptr Pointer to destination image. Data type supported U8
717 * @param[in] stride_mag Stride of magnitude image
718 * @param[in] lower_thr Lower threshold used for the hysteresis
719 * @param[in] upper_thr Upper threshold used for the hysteresis
720 */
721void non_max_suppression_U32_U8_U8(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t lower_thr,
722 const int32_t upper_thr)
723{
724 const auto magnitude = static_cast<const uint32_t *__restrict>(magnitude_ptr);
725 const auto phase = static_cast<const uint8_t *__restrict>(phase_ptr);
726 const auto output = static_cast<uint8_t *__restrict>(output_ptr);
727
728 // Get phase for 8 pixel
729 const uint16x8_t pc16 = vmovl_u8(vld1_u8(phase));
730
731 // Compute non maxima suppression
732 const uint16x4x2_t res =
733 {
734 {
735 non_max_U32_helper(magnitude, vget_low_u16(pc16), stride_mag, lower_thr, upper_thr),
736 non_max_U32_helper(magnitude + 4, vget_high_u16(pc16), stride_mag, lower_thr, upper_thr)
737 }
738 };
739
740 // Store result
741 vst1_u8(output, vmovn_u16(vcombine_u16(res.val[0], res.val[1])));
742}
743
744/* Computes edge tracing when is called by edge_trace_U8_U8 recursively
745 *
746 * @param[in] input Pointer to source image. Data type supported U8
747 * @param[out] output Pointer to destination image. Data type supported U8
748 * @param[in] input_stride Stride of the input image
749 * @param[in] output_stride Stride of the output image
750 */
751void edge_trace_recursive_U8_U8(uint8_t *__restrict input, uint8_t *__restrict output, const int32_t input_stride, const int32_t output_stride)
752{
753 // Look for MAYBE pixels in 8 directions
754 *output = EDGE;
755
756 // (-1, 0)
757 uint8_t pixel = *(input - 1);
758
759 if(pixel == MAYBE)
760 {
761 // Touched a MAYBE point. MAYBE becomes EDGE
762 *(input - 1) = EDGE;
763
764 edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride);
765 }
766
767 // (+1, 0)
768 pixel = *(input + 1);
769
770 if(pixel == MAYBE)
771 {
772 // Touched a MAYBE point. MAYBE becomes EDGE
773 *(input + 1) = EDGE;
774
775 edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride);
776 }
777
778 input -= input_stride;
779 output -= output_stride;
780
781 // (-1, -1)
782 pixel = *(input - 1);
783
784 if(pixel == MAYBE)
785 {
786 // Touched a MAYBE point. MAYBE becomes EDGE
787 *(input - 1) = EDGE;
788
789 edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride);
790 }
791
792 // (0, -1)
793 pixel = *input;
794
795 if(pixel == MAYBE)
796 {
797 // Touched a MAYBE point. MAYBE becomes EDGE
798 *input = EDGE;
799
800 edge_trace_recursive_U8_U8(input, output, input_stride, output_stride);
801 }
802
803 // (+1, -1)
804 pixel = *(input + 1);
805
806 if(pixel == MAYBE)
807 {
808 // Touched a MAYBE point. MAYBE becomes EDGE
809 *(input + 1) = EDGE;
810
811 edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride);
812 }
813
814 input += input_stride * 2;
815 output += output_stride * 2;
816
817 // (-1, +1)
818 pixel = *(input - 1);
819
820 if(pixel == MAYBE)
821 {
822 // Touched a MAYBE point. MAYBE becomes EDGE
823 *(input - 1) = EDGE;
824
825 edge_trace_recursive_U8_U8(input - 1, output - 1, input_stride, output_stride);
826 }
827
828 // (0, +1)
829 pixel = *input;
830
831 if(pixel == MAYBE)
832 {
833 // Touched a MAYBE point. MAYBE becomes EDGE
834 *input = EDGE;
835
836 edge_trace_recursive_U8_U8(input, output, input_stride, output_stride);
837 }
838
839 // (+1, +1)
840 pixel = *(input + 1);
841
842 if(pixel == MAYBE)
843 {
844 // Touched a MAYBE point. MAYBE becomes EDGE
845 *(input + 1) = EDGE;
846
847 edge_trace_recursive_U8_U8(input + 1, output + 1, input_stride, output_stride);
848 }
849}
850
851/* Computes edge tracing
852 *
853 * @param[in] input Pointer to source image. Data type supported U8
854 * @param[out] output Pointer to destination image. Data type supported U8
855 * @param[in] input_stride Stride of the input image
856 * @param[in] output_stride Stride of the output image
857 */
858void edge_trace_U8_U8(uint8_t *__restrict input, uint8_t *__restrict output, const int32_t input_stride, const int32_t output_stride)
859{
860 if(*input == NO_EDGE)
861 {
862 *output = NO_EDGE;
863 }
864 // Check if EDGE and not yet touched
865 else if((*input == EDGE) && (*output == NO_EDGE))
866 {
867 edge_trace_recursive_U8_U8(input, output, input_stride, output_stride);
868 }
869}
870} // namespace
871
872NEGradientKernel::NEGradientKernel()
873 : _func(nullptr), _gx(nullptr), _gy(nullptr), _magnitude(nullptr), _phase(nullptr)
874{
875}
876
877void NEGradientKernel::configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type)
878{
879 ARM_COMPUTE_ERROR_ON_NULLPTR(gx, gy, magnitude, phase);
880
881 set_shape_if_empty(*magnitude->info(), gx->info()->tensor_shape());
882 set_shape_if_empty(*phase->info(), gx->info()->tensor_shape());
883
884 Format magnitude_format = gx->info()->data_type() == DataType::S16 ? Format::U16 : Format::U32;
885 set_format_if_unknown(*magnitude->info(), magnitude_format);
886 set_format_if_unknown(*phase->info(), Format::U8);
887
888 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(gx, gy, magnitude, phase);
889 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gx, 1, DataType::S16, DataType::S32);
890 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(gy, 1, DataType::S16, DataType::S32);
891 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32);
892 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8);
893 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(gx, gy);
894 ARM_COMPUTE_ERROR_ON_MSG(element_size_from_data_type(gx->info()->data_type()) != element_size_from_data_type(magnitude->info()->data_type()), "Magnitude must have the same element size as Gx and Gy");
895
896 _gx = gx;
897 _gy = gy;
898 _magnitude = magnitude;
899 _phase = phase;
900
901 if(_gx->info()->data_type() == DataType::S16)
902 {
903 if(norm_type == 1)
904 {
905 _func = &mag_phase_l1norm_S16_S16_U16_U8;
906 }
907 else
908 {
909 _func = &mag_phase_l2norm_S16_S16_U16_U8;
910 }
911 }
912 else
913 {
914 if(norm_type == 1)
915 {
916 _func = &mag_phase_l1norm_S32_S32_U32_U8;
917 }
918 else
919 {
920 _func = &mag_phase_l2norm_S32_S32_U32_U8;
921 }
922 }
923
924 constexpr unsigned int num_elems_processed_per_iteration = 32;
925
926 // Configure kernel window
927 Window win = calculate_max_window(*_gx->info(), Steps(num_elems_processed_per_iteration));
928
929 AccessWindowHorizontal gx_access(_gx->info(), 0, num_elems_processed_per_iteration);
930 AccessWindowHorizontal gy_access(_gy->info(), 0, num_elems_processed_per_iteration);
931 AccessWindowHorizontal mag_access(_magnitude->info(), 0, num_elems_processed_per_iteration);
932 AccessWindowHorizontal phase_access(_phase->info(), 0, num_elems_processed_per_iteration);
933
934 update_window_and_padding(win, gx_access, gy_access, mag_access, phase_access);
935
936 mag_access.set_valid_region(win, _gx->info()->valid_region());
937 phase_access.set_valid_region(win, _gx->info()->valid_region());
938
939 INEKernel::configure(win);
940}
941
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100942void NEGradientKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100943{
Moritz Pflanzerc186b572017-09-07 09:48:04 +0100944 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100945 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
946 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
947 ARM_COMPUTE_ERROR_ON(_func == nullptr);
948 Iterator gx(_gx, window);
949 Iterator gy(_gy, window);
950 Iterator magnitude(_magnitude, window);
951 Iterator phase(_phase, window);
952
Michalis Spyroua4f378d2019-04-26 14:54:54 +0100953 execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100954 {
955 (*_func)(gx.ptr(), gy.ptr(), magnitude.ptr(), phase.ptr());
956 },
957 gx, gy, magnitude, phase);
958}
959
960NEEdgeNonMaxSuppressionKernel::NEEdgeNonMaxSuppressionKernel()
961 : _func(nullptr), _magnitude(nullptr), _phase(nullptr), _output(nullptr), _lower_thr(0), _upper_thr(0)
962{
963}
964
965BorderSize NEEdgeNonMaxSuppressionKernel::border_size() const
966{
967 return BorderSize(1);
968}
969
970void NEEdgeNonMaxSuppressionKernel::configure(const ITensor *magnitude, const ITensor *phase, ITensor *output,
971 int32_t upper_thr, int32_t lower_thr, bool border_undefined)
972{
973 ARM_COMPUTE_ERROR_ON_NULLPTR(magnitude, phase, output);
974
975 set_shape_if_empty(*output->info(), magnitude->info()->tensor_shape());
976
977 set_format_if_unknown(*phase->info(), Format::U8);
978 set_format_if_unknown(*output->info(), Format::U8);
979
980 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(magnitude, phase, output);
981 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(magnitude, 1, DataType::U16, DataType::U32);
982 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(phase, 1, DataType::U8);
983 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
984 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(phase, output);
985
986 _magnitude = magnitude;
987 _phase = phase;
988 _output = output;
989
990 switch(_magnitude->info()->data_type())
991 {
992 case DataType::U16:
993 _func = &non_max_suppression_U16_U8_U8;
994 break;
995 case DataType::U32:
996 _func = &non_max_suppression_U32_U8_U8;
997 break;
998 default:
999 ARM_COMPUTE_ERROR("Unsupported data type!");
1000 }
1001
1002 // Set thresholds
1003 _lower_thr = lower_thr;
1004 _upper_thr = upper_thr;
1005
1006 constexpr unsigned int num_elems_processed_per_iteration = 8;
1007 constexpr unsigned int num_elems_read_per_iteration = 10;
1008 constexpr unsigned int num_rows_read_per_iteration = 3;
1009
1010 // Configure kernel window
1011 Window win = calculate_max_window(*_magnitude->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
1012
1013 AccessWindowRectangle mag_access(_magnitude->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration);
1014 AccessWindowHorizontal phase_access(_phase->info(), 0, num_elems_processed_per_iteration);
1015 AccessWindowHorizontal output_access(_output->info(), 0, num_elems_processed_per_iteration);
1016
1017 update_window_and_padding(win, mag_access, phase_access, output_access);
1018
1019 output_access.set_valid_region(win, _magnitude->info()->valid_region(), border_undefined, border_size());
1020
1021 INEKernel::configure(win);
1022}
1023
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001024void NEEdgeNonMaxSuppressionKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001025{
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001026 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001027 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
1028 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
1029 ARM_COMPUTE_ERROR_ON(_func == nullptr);
1030 Iterator magnitude(_magnitude, window);
1031 Iterator phase(_phase, window);
1032 Iterator output(_output, window);
1033
1034 const size_t input1_stride = _magnitude->info()->strides_in_bytes()[1];
1035 const size_t input1_stride_ushort = input1_stride / data_size_from_type(_magnitude->info()->data_type());
1036
Michalis Spyroua4f378d2019-04-26 14:54:54 +01001037 execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001038 {
1039 (*_func)(magnitude.ptr(), phase.ptr(), output.ptr(), input1_stride_ushort, _lower_thr, _upper_thr);
1040 },
1041 magnitude, phase, output);
1042}
1043
1044NEEdgeTraceKernel::NEEdgeTraceKernel()
1045 : _input(nullptr), _output(nullptr)
1046{
1047}
1048
1049BorderSize NEEdgeTraceKernel::border_size() const
1050{
1051 return BorderSize(1);
1052}
1053
1054bool NEEdgeTraceKernel::is_parallelisable() const
1055{
1056 return false;
1057}
1058
1059void NEEdgeTraceKernel::configure(ITensor *input, ITensor *output)
1060{
1061 ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
1062
1063 set_shape_if_empty(*output->info(), input->info()->tensor_shape());
1064
1065 set_format_if_unknown(*input->info(), Format::U8);
1066 set_format_if_unknown(*output->info(), Format::U8);
1067
1068 ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
1069 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
1070 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
1071 ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
1072
1073 _input = input;
1074 _output = output;
1075
1076 constexpr unsigned int num_elems_processed_per_iteration = 1;
1077
1078 // Configure kernel window
1079 Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration));
1080
1081 const ValidRegion &input_valid_region = input->info()->valid_region();
1082 const ValidRegion &output_valid_region = output->info()->valid_region();
1083
1084 // Reads can occur within the valid region of the input + border
1085 AccessWindowStatic input_access(input->info(),
1086 input_valid_region.anchor[0] - border_size().left,
1087 input_valid_region.anchor[1] - border_size().top,
1088 input_valid_region.anchor[0] + input_valid_region.shape[0] + border_size().right,
1089 input_valid_region.anchor[1] + input_valid_region.shape[1] + border_size().bottom);
1090
1091 // Writes can occur within the valid region of the output + border
1092 AccessWindowStatic output_access(output->info(),
1093 output_valid_region.anchor[0] - border_size().left,
1094 output_valid_region.anchor[1] - border_size().top,
1095 output_valid_region.anchor[0] + output_valid_region.shape[0] + border_size().right,
1096 output_valid_region.anchor[1] + output_valid_region.shape[1] + border_size().bottom);
1097
1098 update_window_and_padding(win, input_access, output_access);
1099
1100 output_access.set_valid_region(win, _input->info()->valid_region());
1101
1102 INEKernel::configure(win);
1103}
1104
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001105void NEEdgeTraceKernel::run(const Window &window, const ThreadInfo &info)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001106{
Moritz Pflanzerc186b572017-09-07 09:48:04 +01001107 ARM_COMPUTE_UNUSED(info);
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001108 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
1109 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
1110 Iterator input(_input, window);
1111 Iterator output(_output, window);
1112
1113 const size_t input_stride = _input->info()->strides_in_bytes()[1];
1114 const size_t output_stride = _output->info()->strides_in_bytes()[1];
1115
Michalis Spyroua4f378d2019-04-26 14:54:54 +01001116 execute_window_loop(window, [&](const Coordinates &)
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001117 {
1118 edge_trace_U8_U8(input.ptr(), output.ptr(), input_stride, output_stride);
1119 },
1120 input, output);
1121}