COMPMID-568: Implement Canny edge function for CL/NEON

Change-Id: Ic5f197463f962bac4b23663bcef7ac744be6fc2a
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114250
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/runtime/CL/functions/CLCannyEdge.cpp b/src/runtime/CL/functions/CLCannyEdge.cpp
index 5acb8e7..ed58345 100644
--- a/src/runtime/CL/functions/CLCannyEdge.cpp
+++ b/src/runtime/CL/functions/CLCannyEdge.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,17 +50,23 @@
       _visited(),
       _recorded(),
       _l1_list_counter(),
-      _l1_stack()
+      _l1_stack(),
+      _output(nullptr)
 {
 }
 
-void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value)
+void CLCannyEdge::configure(ICLTensor *input, ICLTensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode,
+                            uint8_t constant_border_value)
 {
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
     ARM_COMPUTE_ERROR_ON((1 != norm_type) && (2 != norm_type));
+    ARM_COMPUTE_ERROR_ON((gradient_size != 3) && (gradient_size != 5) && (gradient_size != 7));
     ARM_COMPUTE_ERROR_ON(lower_thr > upper_thr);
 
+    _output = output;
+
     const unsigned int L1_hysteresis_stack_size = 8;
     const TensorShape  shape                    = input->info()->tensor_shape();
 
@@ -122,7 +128,7 @@
     }
     else
     {
-        ARM_COMPUTE_ERROR("Gradient %d size not supported", gradient_size);
+        ARM_COMPUTE_ERROR("Gradient size %d not supported", gradient_size);
     }
 
     // Manage intermediate buffers
@@ -187,6 +193,7 @@
     CLScheduler::get().enqueue(_non_max_suppr, false);
 
     // Clear temporary structures and run edge trace
+    _output->clear(CLScheduler::get().queue());
     _visited.clear(CLScheduler::get().queue());
     _recorded.clear(CLScheduler::get().queue());
     _l1_list_counter.clear(CLScheduler::get().queue());
diff --git a/src/runtime/NEON/functions/NECannyEdge.cpp b/src/runtime/NEON/functions/NECannyEdge.cpp
index c27ff2f..1d73148 100644
--- a/src/runtime/NEON/functions/NECannyEdge.cpp
+++ b/src/runtime/NEON/functions/NECannyEdge.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -61,12 +61,12 @@
 void NECannyEdge::configure(ITensor *input, ITensor *output, int32_t upper_thr, int32_t lower_thr, int32_t gradient_size, int32_t norm_type, BorderMode border_mode, uint8_t constant_border_value,
                             bool use_fp16)
 {
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8);
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
-    ARM_COMPUTE_ERROR_ON(gradient_size < 3);
-    ARM_COMPUTE_ERROR_ON(gradient_size > 7);
-    ARM_COMPUTE_ERROR_ON(lower_thr > upper_thr);
     ARM_COMPUTE_ERROR_ON((1 != norm_type) && (2 != norm_type));
+    ARM_COMPUTE_ERROR_ON((gradient_size != 3) && (gradient_size != 5) && (gradient_size != 7));
+    ARM_COMPUTE_ERROR_ON(lower_thr > upper_thr);
 
     _output = output;
 
@@ -119,7 +119,7 @@
     }
     else
     {
-        ARM_COMPUTE_ERROR("Gradient size not supported\n");
+        ARM_COMPUTE_ERROR("Gradient size %d not supported\n", gradient_size);
     }
 
     // Manage intermediate buffers
@@ -171,24 +171,23 @@
 void NECannyEdge::run()
 {
     ARM_COMPUTE_ERROR_ON_MSG(_sobel == nullptr, "Unconfigured function");
-    ARM_COMPUTE_ERROR_ON(_output == nullptr);
 
     _memory_group.acquire();
 
     // Run sobelNxN
     _sobel->run();
 
-    // Fill border before non-maxima suppression. Nop for border mode undefined.
-    NEScheduler::get().schedule(&_border_mag_gradient, Window::DimZ);
-
     // Run gradient
     NEScheduler::get().schedule(_gradient.get(), Window::DimY);
 
+    // Fill border before non-maxima suppression. Nop for border mode undefined.
+    NEScheduler::get().schedule(&_border_mag_gradient, Window::DimZ);
+
     // Run non-maxima suppression
     NEScheduler::get().schedule(&_non_max_suppr, Window::DimY);
 
     ARM_COMPUTE_ERROR_ON(_output->buffer() == nullptr);
-    memset(_output->buffer(), 0, _output->info()->total_size());
+    std::fill_n(_output->buffer(), _output->info()->total_size(), 0);
 
     // Fill border before edge trace
     NEScheduler::get().schedule(&_border_edge_trace, Window::DimZ);