COMPMID-424 NEON/CL Harris Corners validation tests.

Change-Id: I82d2a73f515a8d45d16b9ddb702fea51ae05c82e
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79687
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
index 90da687..f9a1275 100644
--- a/arm_compute/runtime/CL/functions/CLHarrisCorners.h
+++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
@@ -29,10 +29,10 @@
 #include "arm_compute/core/CL/ICLArray.h"
 #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
 #include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
-#include "arm_compute/core/CL/kernels/CLNonMaximaSuppression3x3Kernel.h"
 #include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
 
 #include <cstdint>
 
@@ -87,7 +87,7 @@
 private:
     std::unique_ptr<IFunction>          _sobel;                 /**< Sobel function */
     CLHarrisScoreKernel                 _harris_score;          /**< Harris score kernel */
-    CLNonMaximaSuppression3x3Kernel     _non_max_suppr;         /**< Non-maxima suppression function */
+    CLNonMaximaSuppression3x3           _non_max_suppr;         /**< Non-maxima suppression function */
     CPPCornerCandidatesKernel           _candidates;            /**< Sort kernel */
     CPPSortEuclideanDistanceKernel      _sort_euclidean;        /**< Euclidean distance kernel */
     CLFillBorderKernel                  _border_gx;             /**< Border handler before running harris score */
diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp
index 9fc34a7..1f757fe 100644
--- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp
+++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp
@@ -23,6 +23,7 @@
  */
 #include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
 
+#include "arm_compute/core/AccessWindowStatic.h"
 #include "arm_compute/core/CL/CLHelpers.h"
 #include "arm_compute/core/CL/CLKernelLibrary.h"
 #include "arm_compute/core/CL/ICLTensor.h"
@@ -91,8 +92,8 @@
     // Configure kernel window
     constexpr unsigned int num_elems_processed_per_iteration = 4;
     constexpr unsigned int num_elems_written_per_iteration   = 4;
-    constexpr unsigned int num_elems_read_per_iteration      = 8;
-    constexpr unsigned int num_rows_read_per_iteration       = 3;
+    const unsigned int     num_elems_read_per_iteration      = block_size == 7 ? 10 : 8;
+    const unsigned int     num_rows_read_per_iteration       = block_size;
 
     Window win = calculate_max_window(*_input1->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size());
 
diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
index 1b70d75..1bf831b 100644
--- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp
+++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp
@@ -88,8 +88,7 @@
     constexpr unsigned int num_elems_processed_per_iteration_x = 8;
     const unsigned int     num_elems_processed_per_iteration_y = input->info()->dimension(1);
 
-    _border_size = BorderSize(std::max(static_cast<int>(num_elems_processed_per_iteration_x) - static_cast<int>(input->info()->dimension(0)),
-                                       static_cast<int>(input->info()->dimension(0) % num_elems_processed_per_iteration_x)));
+    _border_size = BorderSize(ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration_x) - input->info()->dimension(0));
 
     Window                win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
     AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
diff --git a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp
index 884da28..84e12d4 100644
--- a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp
+++ b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp
@@ -57,10 +57,7 @@
 
 inline void corner_candidates(const float *__restrict input, InternalKeypoint *__restrict output, int32_t x, int32_t y, int32_t *num_corner_candidates, std::mutex *corner_candidates_mutex)
 {
-    check_corner(x + 0, y, *(input + 0), output, num_corner_candidates, corner_candidates_mutex);
-    check_corner(x + 1, y, *(input + 1), output, num_corner_candidates, corner_candidates_mutex);
-    check_corner(x + 2, y, *(input + 2), output, num_corner_candidates, corner_candidates_mutex);
-    check_corner(x + 3, y, *(input + 3), output, num_corner_candidates, corner_candidates_mutex);
+    check_corner(x, y, *input, output, num_corner_candidates, corner_candidates_mutex);
 }
 } // namespace
 
@@ -86,7 +83,7 @@
     _output                = output;
     _num_corner_candidates = num_corner_candidates;
 
-    const unsigned int num_elems_processed_per_iteration = 4;
+    const unsigned int num_elems_processed_per_iteration = 1;
 
     // Configure kernel window
     Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp
index 1eb7e45..579c46f 100644
--- a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp
+++ b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp
@@ -110,8 +110,7 @@
 
     constexpr unsigned int num_elems_processed_per_iteration = 16;
 
-    _border_size = BorderSize(std::max(static_cast<int>(num_elems_processed_per_iteration) - static_cast<int>(input->info()->dimension(0)),
-                                       static_cast<int>(input->info()->dimension(0) % num_elems_processed_per_iteration)));
+    _border_size = BorderSize(ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration) - input->info()->dimension(0));
 
     // Configure kernel window
     Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
diff --git a/src/runtime/CL/functions/CLHarrisCorners.cpp b/src/runtime/CL/functions/CLHarrisCorners.cpp
index 87d573a..8f9fcdc 100644
--- a/src/runtime/CL/functions/CLHarrisCorners.cpp
+++ b/src/runtime/CL/functions/CLHarrisCorners.cpp
@@ -42,8 +42,20 @@
 
 using namespace arm_compute;
 
-CLHarrisCorners::CLHarrisCorners()
-    : _sobel(), _harris_score(), _non_max_suppr(), _candidates(), _sort_euclidean(), _border_gx(), _border_gy(), _gx(), _gy(), _score(), _nonmax(), _corners_list(), _num_corner_candidates(0),
+CLHarrisCorners::CLHarrisCorners() // NOLINT
+    : _sobel(nullptr),
+      _harris_score(),
+      _non_max_suppr(),
+      _candidates(),
+      _sort_euclidean(),
+      _border_gx(),
+      _border_gy(),
+      _gx(),
+      _gy(),
+      _score(),
+      _nonmax(),
+      _corners_list(nullptr),
+      _num_corner_candidates(0),
       _corners(nullptr)
 {
 }
@@ -62,6 +74,7 @@
     const TensorShape shape = input->info()->tensor_shape();
     const DataType    dt    = (gradient_size < 7) ? DataType::S16 : DataType::S32;
     TensorInfo        tensor_info(shape, 1, dt);
+
     _gx.allocator()->init(tensor_info);
     _gy.allocator()->init(tensor_info);
 
@@ -99,10 +112,6 @@
             ARM_COMPUTE_ERROR("Gradient size not implemented");
     }
 
-    // Configure border filling before harris score
-    _border_gx.configure(&_gx, BorderSize(block_size / 2), border_mode, constant_border_value);
-    _border_gy.configure(&_gy, BorderSize(block_size / 2), border_mode, constant_border_value);
-
     // Normalization factor
     const float norm_factor               = 1.0f / (255.0f * pow(4.0f, gradient_size / 2) * block_size);
     const float pow4_normalization_factor = pow(norm_factor, 4);
@@ -110,8 +119,12 @@
     // Set/init Harris Score kernel accordingly with block_size
     _harris_score.configure(&_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED);
 
+    // Configure border filling using harris score kernel's block size
+    _border_gx.configure(&_gx, _harris_score.border_size(), border_mode, PixelValue(constant_border_value));
+    _border_gy.configure(&_gy, _harris_score.border_size(), border_mode, PixelValue(constant_border_value));
+
     // Init non-maxima suppression function
-    _non_max_suppr.configure(&_score, &_nonmax, border_mode == BorderMode::UNDEFINED);
+    _non_max_suppr.configure(&_score, &_nonmax, border_mode);
 
     // Init corner candidates kernel
     _candidates.configure(&_nonmax, _corners_list.get(), &_num_corner_candidates);
@@ -144,7 +157,7 @@
     CLScheduler::get().enqueue(_harris_score, false);
 
     // Run non-maxima suppression
-    CLScheduler::get().enqueue(_non_max_suppr);
+    _non_max_suppr.run();
 
     // Run corner candidate kernel
     _nonmax.map(true);
diff --git a/tests/RawTensor.cpp b/tests/RawTensor.cpp
index 1d400a5..e6b320f 100644
--- a/tests/RawTensor.cpp
+++ b/tests/RawTensor.cpp
@@ -131,6 +131,7 @@
         case Format::U16:
         case Format::S32:
         case Format::U32:
+        case Format::F32:
             return 1;
         case Format::RGB888:
             return 3;
diff --git a/tests/dataset/ShapeDatasets.h b/tests/dataset/ShapeDatasets.h
index 71c786a..ccdfc9a 100644
--- a/tests/dataset/ShapeDatasets.h
+++ b/tests/dataset/ShapeDatasets.h
@@ -107,7 +107,7 @@
 {
 public:
     Small2DShapes()
-        : ShapeDataset(TensorShape(5U, 5U),
+        : ShapeDataset(TensorShape(17U, 17U),
                        TensorShape(640U, 480U))
     {
     }
diff --git a/tests/validation/CL/CMakeLists.txt b/tests/validation/CL/CMakeLists.txt
index e809377..67900b3 100644
--- a/tests/validation/CL/CMakeLists.txt
+++ b/tests/validation/CL/CMakeLists.txt
@@ -39,6 +39,7 @@
     ${CMAKE_CURRENT_SOURCE_DIR}/Threshold.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/DirectConvolutionLayer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/MeanStdDev.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/HarrisCorners.cpp
 )
 
 add_library(arm_compute_test_validation_OPENCL OBJECT
diff --git a/tests/validation/CL/HarrisCorners.cpp b/tests/validation/CL/HarrisCorners.cpp
new file mode 100644
index 0000000..dca2c79
--- /dev/null
+++ b/tests/validation/CL/HarrisCorners.cpp
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "AssetsLibrary.h"
+#include "CL/CLAccessor.h"
+#include "Globals.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+#include "validation/ValidationUserConfiguration.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLArray.h"
+#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "PaddingCalculator.h"
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute CL Harris corners function.
+ *
+ * @param[in] shape                 Shape of input tensor
+ * @param[in] threshold             Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
+ * @param[in] min_dist              Radial Euclidean distance for the euclidean distance stage
+ * @param[in] sensitivity           Sensitivity threshold k from the Harris-Stephens equation
+ * @param[in] gradient_size         The gradient window size to use on the input. The implementation supports 3, 5, and 7
+ * @param[in] block_size            The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7.
+ * @param[in] border_mode           Border mode to use
+ * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
+ *
+ * @return Computed corners' keypoints.
+ */
+void compute_harris_corners(const TensorShape &shape, CLKeyPointArray &corners, float threshold, float min_dist, float sensitivity,
+                            int32_t gradient_size, int32_t block_size, BorderMode border_mode, uint8_t constant_border_value)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8);
+    src.info()->set_format(Format::U8);
+
+    // Create harris corners configure function
+    CLHarrisCorners harris_corners;
+    harris_corners.configure(&src, threshold, min_dist, sensitivity, gradient_size, block_size, &corners, border_mode, constant_border_value);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(CLAccessor(src), 0);
+
+    // Compute function
+    harris_corners.run();
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(CL)
+BOOST_AUTO_TEST_SUITE(HarrisCorners)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (Small2DShapes() + Large2DShapes()) * BorderModes()
+                     * boost::unit_test::data::make({ 3, 5, 7 }) * boost::unit_test::data::make({ 3, 5, 7 }),
+                     shape, border_mode, gradient, block)
+{
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8);
+    src.info()->set_format(Format::U8);
+
+    CLKeyPointArray corners(shape.total_size());
+
+    uint8_t constant_border_value = 0;
+
+    std::mt19937                          gen(user_config.seed.get());
+    std::uniform_real_distribution<float> real_dist(0.01, std::numeric_limits<float>::min());
+
+    const float threshold              = real_dist(gen);
+    const float sensitivity            = real_dist(gen);
+    const float max_euclidean_distance = 30.f;
+
+    real_dist      = std::uniform_real_distribution<float>(0.f, max_euclidean_distance);
+    float min_dist = real_dist(gen);
+
+    // Generate a random constant value if border_mode is constant
+    if(border_mode == BorderMode::CONSTANT)
+    {
+        std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+        constant_border_value = int_dist(gen);
+    }
+
+    BOOST_TEST(src.info()->is_resizable());
+
+    // Create harris corners configure function
+    CLHarrisCorners harris_corners;
+    harris_corners.configure(&src, threshold, min_dist, sensitivity, gradient, block, &corners, border_mode, constant_border_value);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+
+    validate(src.info()->valid_region(), valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+
+    calculator.set_border_mode(border_mode);
+    calculator.set_border_size(gradient / 2);
+    calculator.set_access_offset(-gradient / 2);
+    calculator.set_accessed_elements(16);
+
+    const PaddingSize padding = calculator.required_padding();
+
+    validate(src.info()->padding(), padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, Small2DShapes() * BorderModes() * boost::unit_test::data::make({ 3, 5, 7 }) * boost::unit_test::data::make({ 3, 5, 7 }), shape, border_mode, gradient, block)
+{
+    uint8_t constant_border_value = 0;
+
+    std::mt19937                          gen(user_config.seed.get());
+    std::uniform_real_distribution<float> real_dist(0.01, std::numeric_limits<float>::min());
+
+    const float threshold              = real_dist(gen);
+    const float sensitivity            = real_dist(gen);
+    const float max_euclidean_distance = 30.f;
+
+    real_dist            = std::uniform_real_distribution<float>(0.f, max_euclidean_distance);
+    const float min_dist = real_dist(gen);
+
+    // Generate a random constant value if border_mode is constant
+    if(border_mode == BorderMode::CONSTANT)
+    {
+        std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+        constant_border_value = int_dist(gen);
+    }
+
+    // Create array of keypoints
+    CLKeyPointArray dst(shape.total_size());
+
+    // Compute function
+    compute_harris_corners(shape, dst, threshold, min_dist, sensitivity, gradient, block, border_mode, constant_border_value);
+
+    // Compute reference
+    KeyPointArray ref_dst = Reference::compute_reference_harris_corners(shape, threshold, min_dist, sensitivity, gradient, block, border_mode, constant_border_value);
+
+    // Validate output
+    dst.map();
+    validate(dst, ref_dst);
+    dst.unmap();
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, Large2DShapes() * BorderModes() * boost::unit_test::data::make({ 3, 5, 7 }) * boost::unit_test::data::make({ 3, 5, 7 }), shape, border_mode, gradient, block)
+{
+    uint8_t constant_border_value = 0;
+
+    std::mt19937                          gen(user_config.seed.get());
+    std::uniform_real_distribution<float> real_dist(0.01, std::numeric_limits<float>::min());
+
+    const float threshold              = real_dist(gen);
+    const float sensitivity            = real_dist(gen);
+    const float max_euclidean_distance = 30.f;
+
+    real_dist            = std::uniform_real_distribution<float>(0.f, max_euclidean_distance);
+    const float min_dist = real_dist(gen);
+
+    // Generate a random constant value if border_mode is constant
+    if(border_mode == BorderMode::CONSTANT)
+    {
+        std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+        constant_border_value = int_dist(gen);
+    }
+
+    // Create array of keypoints
+    CLKeyPointArray dst(shape.total_size());
+
+    // Compute function
+    compute_harris_corners(shape, dst, threshold, min_dist, sensitivity, gradient, block, border_mode, constant_border_value);
+
+    // Compute reference
+    KeyPointArray ref_dst = Reference::compute_reference_harris_corners(shape, threshold, min_dist, sensitivity, gradient, block, border_mode, constant_border_value);
+
+    // Validate output
+    dst.map();
+    validate(dst, ref_dst);
+    dst.unmap();
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif /* DOXYGEN_SKIP_THIS */
diff --git a/tests/validation/NEON/CMakeLists.txt b/tests/validation/NEON/CMakeLists.txt
index 9dda17d..bf07d27 100644
--- a/tests/validation/NEON/CMakeLists.txt
+++ b/tests/validation/NEON/CMakeLists.txt
@@ -47,6 +47,7 @@
     ${CMAKE_CURRENT_SOURCE_DIR}/FullyConnectedLayer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Gaussian3x3.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/GEMM.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/HarrisCorners.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/IntegralImage.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/NormalizationLayer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/PixelWiseMultiplication.cpp
diff --git a/tests/validation/NEON/HarrisCorners.cpp b/tests/validation/NEON/HarrisCorners.cpp
new file mode 100644
index 0000000..6793e21
--- /dev/null
+++ b/tests/validation/NEON/HarrisCorners.cpp
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "AssetsLibrary.h"
+#include "Globals.h"
+#include "NEON/Accessor.h"
+#include "NEON/Helper.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+#include "validation/ValidationUserConfiguration.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEHarrisCorners.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "PaddingCalculator.h"
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon Harris corners function.
+ *
+ * @param[in] shape                 Shape of input tensor
+ * @param[in] threshold             Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
+ * @param[in] min_dist              Radial Euclidean distance for the euclidean distance stage
+ * @param[in] sensitivity           Sensitivity threshold k from the Harris-Stephens equation
+ * @param[in] gradient_size         The gradient window size to use on the input. The implementation supports 3, 5, and 7
+ * @param[in] block_size            The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7.
+ * @param[in] border_mode           Border mode to use
+ * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
+ * @param[in] use_fp16              If true the FP16 kernels will be used. If false F32 kernels are used.
+ *
+ * @return Computed corners' keypoints.
+ */
+KeyPointArray compute_harris_corners(const TensorShape &shape, float threshold, float min_dist, float sensitivity,
+                                     int32_t gradient_size, int32_t block_size, BorderMode border_mode, uint8_t constant_border_value, bool use_fp16)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8);
+    src.info()->set_format(Format::U8);
+
+    // Create array of keypoints
+    KeyPointArray corners(shape.total_size());
+
+    // Create harris corners configure function
+    NEHarrisCorners harris_corners;
+    harris_corners.configure(&src, threshold, min_dist, sensitivity, gradient_size, block_size, &corners, border_mode, constant_border_value, use_fp16);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(Accessor(src), 0);
+
+    // Compute function
+    harris_corners.run();
+
+    return corners;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(HarrisCorners)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (Small2DShapes() + Large2DShapes()) * BorderModes()
+                     * boost::unit_test::data::make({ 3, 5, 7 }) * boost::unit_test::data::make({ 3, 5, 7 }),
+                     shape, border_mode, gradient, block)
+{
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8);
+    src.info()->set_format(Format::U8);
+
+    KeyPointArray corners;
+
+    uint8_t constant_border_value = 0;
+
+    std::mt19937                          gen(user_config.seed.get());
+    std::uniform_real_distribution<float> real_dist(0.01, std::numeric_limits<float>::min());
+
+    const float threshold              = real_dist(gen);
+    const float sensitivity            = real_dist(gen);
+    const float max_euclidean_distance = 30.f;
+
+    real_dist            = std::uniform_real_distribution<float>(0.f, max_euclidean_distance);
+    const float min_dist = real_dist(gen);
+
+    // 50% chance to use fp16
+    bool use_fp16 = real_dist(gen) < max_euclidean_distance / 2 ? true : false;
+
+    // Generate a random constant value if border_mode is constant
+    if(border_mode == BorderMode::CONSTANT)
+    {
+        std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+        constant_border_value = int_dist(gen);
+    }
+
+    BOOST_TEST(src.info()->is_resizable());
+
+    // Create harris corners configure function
+    NEHarrisCorners harris_corners;
+    harris_corners.configure(&src, threshold, min_dist, sensitivity, gradient, block, &corners, border_mode, constant_border_value, use_fp16);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+
+    validate(src.info()->valid_region(), valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+
+    calculator.set_border_mode(border_mode);
+    calculator.set_border_size(gradient / 2);
+    calculator.set_access_offset(-gradient / 2);
+    calculator.set_accessed_elements(16);
+
+    const PaddingSize padding = calculator.required_padding();
+
+    validate(src.info()->padding(), padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, Small2DShapes() * BorderModes() * boost::unit_test::data::make({ 3, 5, 7 }) * boost::unit_test::data::make({ 3, 5, 7 }), shape, border_mode, gradient, block)
+{
+    uint8_t constant_border_value = 0;
+
+    std::mt19937                          gen(user_config.seed.get());
+    std::uniform_real_distribution<float> real_dist(0.01, std::numeric_limits<float>::min());
+
+    const float threshold              = real_dist(gen);
+    const float sensitivity            = real_dist(gen);
+    const float max_euclidean_distance = 30.f;
+
+    real_dist            = std::uniform_real_distribution<float>(0.f, max_euclidean_distance);
+    const float min_dist = real_dist(gen);
+
+    // 50% chance to use fp16
+    bool use_fp16 = real_dist(gen) < max_euclidean_distance / 2 ? true : false;
+
+    // Generate a random constant value if border_mode is constant
+    if(border_mode == BorderMode::CONSTANT)
+    {
+        std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+        constant_border_value = int_dist(gen);
+    }
+
+    // Compute function
+    KeyPointArray dst = compute_harris_corners(shape, threshold, min_dist, sensitivity, gradient, block, border_mode, constant_border_value, use_fp16);
+
+    // Compute reference
+    KeyPointArray ref_dst = Reference::compute_reference_harris_corners(shape, threshold, min_dist, sensitivity, gradient, block, border_mode, constant_border_value);
+
+    // Validate output
+    validate(dst, ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, Large2DShapes() * BorderModes() * boost::unit_test::data::make({ 3, 5, 7 }) * boost::unit_test::data::make({ 3, 5, 7 }), shape, border_mode, gradient, block)
+{
+    uint8_t constant_border_value = 0;
+
+    std::mt19937                          gen(user_config.seed.get());
+    std::uniform_real_distribution<float> real_dist(0.01, std::numeric_limits<float>::min());
+
+    const float threshold              = real_dist(gen);
+    const float sensitivity            = real_dist(gen);
+    const float max_euclidean_distance = 30.f;
+
+    real_dist      = std::uniform_real_distribution<float>(0.f, max_euclidean_distance);
+    float min_dist = real_dist(gen);
+
+    // 50% chance to use fp16
+    bool use_fp16 = real_dist(gen) < max_euclidean_distance / 2 ? true : false;
+
+    // Generate a random constant value if border_mode is constant
+    if(border_mode == BorderMode::CONSTANT)
+    {
+        std::uniform_int_distribution<uint8_t> int_dist(0, 255);
+        constant_border_value = int_dist(gen);
+    }
+
+    // Compute function
+    KeyPointArray dst = compute_harris_corners(shape, threshold, min_dist, sensitivity, gradient, block, border_mode, constant_border_value, use_fp16);
+
+    // Compute reference
+    KeyPointArray ref_dst = Reference::compute_reference_harris_corners(shape, threshold, min_dist, sensitivity, gradient, block, border_mode, constant_border_value);
+
+    // Validate output
+    validate(dst, ref_dst);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif /* DOXYGEN_SKIP_THIS */
diff --git a/tests/validation/Reference.cpp b/tests/validation/Reference.cpp
index 9cdd2d7..fca3b9d 100644
--- a/tests/validation/Reference.cpp
+++ b/tests/validation/Reference.cpp
@@ -56,6 +56,7 @@
 
     return std::make_pair(ref_dst_x, ref_dst_y);
 }
+
 std::pair<RawTensor, RawTensor> Reference::compute_reference_sobel_5x5(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value)
 {
     // Create reference
@@ -83,6 +84,28 @@
     // Compute reference
     ReferenceCPP::min_max_location(ref_src, min, max, min_loc, max_loc, min_count, max_count);
 }
+
+KeyPointArray Reference::compute_reference_harris_corners(const TensorShape &shape, float threshold, float min_dist, float sensitivity,
+                                                          int32_t gradient_size, int32_t block_size, BorderMode border_mode, uint8_t constant_border_value)
+{
+    // Create reference
+    RawTensor ref_src(shape, Format::U8);
+    RawTensor raw_Gx(shape, (gradient_size == 7) ? Format::S32 : Format::S16);
+    RawTensor raw_Gy(shape, (gradient_size == 7) ? Format::S32 : Format::S16);
+    RawTensor raw_candidates(shape, Format::F32);
+    RawTensor raw_non_maxima(shape, Format::F32);
+
+    KeyPointArray corners(shape.total_size());
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src, 0);
+
+    // Compute reference
+    ReferenceCPP::harris_corners(ref_src, raw_Gx, raw_Gy, raw_candidates, raw_non_maxima, threshold, min_dist, sensitivity, gradient_size, block_size, corners, border_mode, constant_border_value);
+
+    return corners;
+}
+
 std::pair<float, float> Reference::compute_reference_mean_and_standard_deviation(const TensorShape &shape)
 {
     // Create reference
@@ -100,6 +123,7 @@
 
     return std::make_pair(mean, std_dev);
 }
+
 RawTensor Reference::compute_reference_integral_image(const TensorShape &shape)
 {
     // Create reference
@@ -114,6 +138,7 @@
 
     return ref_dst;
 }
+
 RawTensor Reference::compute_reference_absolute_difference(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out)
 {
     // Create reference
diff --git a/tests/validation/Reference.h b/tests/validation/Reference.h
index 13e43d3..276540f 100644
--- a/tests/validation/Reference.h
+++ b/tests/validation/Reference.h
@@ -59,6 +59,21 @@
      * @return Computed raw tensors along x and y axis.
      */
     static std::pair<RawTensor, RawTensor> compute_reference_sobel_5x5(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value);
+    /** Compute reference Harris corners.
+     *
+     * @param[in] shape                 Shape of input tensor
+     * @param[in] threshold             Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
+     * @param[in] min_dist              Radial Euclidean distance for the euclidean distance stage
+     * @param[in] sensitivity           Sensitivity threshold k from the Harris-Stephens equation
+     * @param[in] gradient_size         The gradient window size to use on the input. The implementation supports 3, 5, and 7
+     * @param[in] block_size            The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7.
+     * @param[in] border_mode           Border mode to use
+     * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     * @return Computed corners' keypoints.
+     */
+    static KeyPointArray compute_reference_harris_corners(const TensorShape &shape, float threshold, float min_dist, float sensitivity,
+                                                          int32_t gradient_size, int32_t block_size, BorderMode border_mode, uint8_t constant_border_value);
     /** Compute min max location.
      *
      * @param[in]  shape     Shape of the input tensors.
diff --git a/tests/validation/ReferenceCPP.cpp b/tests/validation/ReferenceCPP.cpp
index 4a2d7be..cf0b978 100644
--- a/tests/validation/ReferenceCPP.cpp
+++ b/tests/validation/ReferenceCPP.cpp
@@ -70,6 +70,31 @@
     tensor_operations::sobel_5x5(s, dx, dy, border_mode, constant_border_value);
 }
 
+// Harris corners
+void ReferenceCPP::harris_corners(RawTensor &src, RawTensor &Gx, RawTensor &Gy, const RawTensor &candidates, const RawTensor &non_maxima, float threshold, float min_dist, float sensitivity,
+                                  int32_t gradient_size, int32_t block_size, KeyPointArray &corners, BorderMode border_mode, uint8_t constant_border_value)
+{
+    ARM_COMPUTE_ERROR_ON(src.data_type() != DataType::U8 || (Gx.data_type() != DataType::S16 && Gx.data_type() != DataType::S32) || (Gy.data_type() != DataType::S16 && Gy.data_type() != DataType::S32)
+                         || candidates.data_type() != DataType::F32 || non_maxima.data_type() != DataType::F32);
+
+    Tensor<uint8_t> s(src.shape(), src.data_type(), src.fixed_point_position(), reinterpret_cast<const uint8_t *>(src.data()));
+    Tensor<float>   c(candidates.shape(), candidates.data_type(), candidates.fixed_point_position(), const_cast<float *>(reinterpret_cast<const float *>(candidates.data())));  // NOLINT
+    Tensor<float>   nm(non_maxima.shape(), non_maxima.data_type(), non_maxima.fixed_point_position(), const_cast<float *>(reinterpret_cast<const float *>(non_maxima.data()))); // NOLINT
+
+    if(gradient_size == 7)
+    {
+        Tensor<int32_t> gx(Gx.shape(), Gx.data_type(), Gx.fixed_point_position(), reinterpret_cast<int32_t *>(Gx.data()));
+        Tensor<int32_t> gy(Gy.shape(), Gy.data_type(), Gy.fixed_point_position(), reinterpret_cast<int32_t *>(Gy.data()));
+        tensor_operations::harris_corners(s, gx, gy, c, nm, threshold, min_dist, sensitivity, gradient_size, block_size, corners, border_mode, constant_border_value);
+    }
+    else
+    {
+        Tensor<int16_t> gx(Gx.shape(), Gx.data_type(), Gx.fixed_point_position(), reinterpret_cast<int16_t *>(Gx.data()));
+        Tensor<int16_t> gy(Gy.shape(), Gy.data_type(), Gy.fixed_point_position(), reinterpret_cast<int16_t *>(Gy.data()));
+        tensor_operations::harris_corners(s, gx, gy, c, nm, threshold, min_dist, sensitivity, gradient_size, block_size, corners, border_mode, constant_border_value);
+    }
+}
+
 // Minimum maximum location
 void ReferenceCPP::min_max_location(const RawTensor &src, void *min, void *max, IArray<Coordinates2D> &min_loc, IArray<Coordinates2D> &max_loc, uint32_t &min_count, uint32_t &max_count)
 {
diff --git a/tests/validation/ReferenceCPP.h b/tests/validation/ReferenceCPP.h
index cc886ae..2e8f48f 100644
--- a/tests/validation/ReferenceCPP.h
+++ b/tests/validation/ReferenceCPP.h
@@ -64,6 +64,25 @@
      *
      */
     static void sobel_5x5(RawTensor &src, RawTensor &dst_x, RawTensor &dst_y, BorderMode border_mode, uint8_t constant_border_value);
+    /** Function to compute reference Harris corners.
+     *
+     * @param[in]  src                   Input tensor
+     * @param[in]  Gx                    Tensor used to compute Sobel along the x axis
+     * @param[in]  Gy                    Tensor used to compute Sobel along the y axis
+     * @param[in]  candidates            Tensor used to store candidate corners
+     * @param[in]  non_maxima            Tensor used to store non_maxima suppressed candidate corners
+     * @param[in]  threshold             Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
+     * @param[in]  min_dist              Radial Euclidean distance for the euclidean distance stage
+     * @param[in]  sensitivity           Sensitivity threshold k from the Harris-Stephens equation
+     * @param[in]  gradient_size         The gradient window size to use on the input. The implementation supports 3, 5, and 7
+     * @param[in]  block_size            The block window size used to compute the Harris Corner score. The implementation supports 3, 5, and 7.
+     * @param[out] corners               Array of keypoints to store the results.
+     * @param[in]  border_mode           Border mode to use
+     * @param[in]  constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     */
+    static void harris_corners(RawTensor &src, RawTensor &Gx, RawTensor &Gy, const RawTensor &candidates, const RawTensor &non_maxima, float threshold, float min_dist, float sensitivity,
+                               int32_t gradient_size, int32_t block_size, KeyPointArray &corners, BorderMode border_mode, uint8_t constant_border_value);
     /** Function to compute the min max values and their location in a tensor.
      *
      * @param[in]  src       Input tensor.
diff --git a/tests/validation/Tensor.h b/tests/validation/Tensor.h
index 34d8cd7..84d76e7 100644
--- a/tests/validation/Tensor.h
+++ b/tests/validation/Tensor.h
@@ -66,7 +66,7 @@
 
         ARM_COMPUTE_ERROR_ON(ptr == nullptr);
 
-        return ptr[offset];
+        return ptr[offset]; // NOLINT
     }
 
     int num_elements() const
diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h
index 887d528..104f077 100644
--- a/tests/validation/TensorOperations.h
+++ b/tests/validation/TensorOperations.h
@@ -105,8 +105,8 @@
 }
 
 // Return a tensor element at a specified coordinate with different border modes
-template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
-T tensor_elem_at(const Tensor<T> &in, Coordinates &coord, BorderMode border_mode, T constant_border_value)
+template <typename T>
+T tensor_elem_at(const Tensor<T> &in, Coordinates coord, BorderMode border_mode, T constant_border_value)
 {
     const int x      = coord.x();
     const int y      = coord.y();
@@ -120,17 +120,14 @@
         {
             coord.set(0, std::max(0, std::min(x, width - 1)));
             coord.set(1, std::max(0, std::min(y, height - 1)));
-            return in[coord2index(in.shape(), coord)];
         }
         else
         {
             return constant_border_value;
         }
     }
-    else
-    {
-        return in[coord2index(in.shape(), coord)];
-    }
+
+    return in[coord2index(in.shape(), coord)];
 }
 
 /** Apply 2D spatial filter on a single element of @p in at coordinates @p coord
@@ -210,6 +207,209 @@
     }
 }
 
+// Sobel 7x7
+template <typename T1, typename T2>
+void sobel_7x7(Tensor<T1> &in, Tensor<T2> &out_x, Tensor<T2> &out_y, BorderMode border_mode, uint8_t constant_border_value)
+{
+    const std::array<int8_t, 49> sobel_x{ {
+            -1, -4, -5, 0, 5, 4, 1,
+            -6, -24, -30, 0, 30, 24, 6,
+            -15, -60, -75, 0, 75, 60, 15,
+            -20, -80, -100, 0, 100, 80, 20,
+            -15, -60, -75, 0, 75, 60, 15,
+            -6, -24, -30, 0, 30, 24, 6,
+            -1, -4, -5, 0, 5, 4, 1
+        } };
+
+    const std::array<int8_t, 49> sobel_y{ {
+            -1, -6, -15, -20, -15, -6, -1,
+            -4, -24, -60, -80, -60, -24, -4,
+            -5, -30, -75, -100, -75, -30, -5,
+            0, 0, 0, 0, 0, 0, 0,
+            5, 30, 75, 100, 75, 30, 5,
+            4, 24, 60, 80, 60, 24, 4,
+            1, 6, 15, 20, 15, 6, 1
+        } };
+
+    for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
+    {
+        const Coordinates id = index2coord(in.shape(), element_idx);
+
+        apply_2d_spatial_filter(id, in, out_x, TensorShape(7U, 7U), sobel_x.data(), 1.f, border_mode, constant_border_value);
+        apply_2d_spatial_filter(id, in, out_y, TensorShape(7U, 7U), sobel_y.data(), 1.f, border_mode, constant_border_value);
+    }
+}
+
+template <typename T>
+void non_maxima_suppression_3x3(Tensor<T> &in, Tensor<T> &out, BorderMode border_mode)
+{
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        Coordinates coord = index2coord(in.shape(), i);
+        int         x     = coord.x();
+        int         y     = coord.y();
+
+        if(in[i] >= tensor_elem_at(in, Coordinates(x - 1, y - 1), border_mode, 0.f) && in[i] >= tensor_elem_at(in, Coordinates(x, y - 1), border_mode, 0.f)
+           && in[i] >= tensor_elem_at(in, Coordinates(x + 1, y - 1), border_mode, 0.f) && in[i] >= tensor_elem_at(in, Coordinates(x - 1, y), border_mode, 0.f)
+           && in[i] > tensor_elem_at(in, Coordinates(x + 1, y), border_mode, 0.f) && in[i] > tensor_elem_at(in, Coordinates(x - 1, y + 1), border_mode, 0.f)
+           && in[i] > tensor_elem_at(in, Coordinates(x, y + 1), border_mode, 0.f) && in[i] > tensor_elem_at(in, Coordinates(x + 1, y + 1), border_mode, 0.f))
+        {
+            out[i] = in[i];
+        }
+        else
+        {
+            out[i] = 0;
+        }
+    }
+}
+
+// Harris corners
+template <typename T1, typename T2, typename T3>
+void harris_corners(Tensor<T1> &in, Tensor<T2> &Gx, Tensor<T2> &Gy, Tensor<T3> &candidates, Tensor<T3> &non_maxima, float threshold, float min_dist, float sensitivity,
+                    int32_t gradient_size, int32_t block_size, KeyPointArray &corners, BorderMode border_mode, uint8_t constant_border_value)
+{
+    ARM_COMPUTE_ERROR_ON(block_size != 3 && block_size != 5 && block_size != 7);
+
+    ValidRegion valid_region = shape_to_valid_region(candidates.shape());
+    float       norm_factor  = 0.f;
+
+    // Sobel
+    switch(gradient_size)
+    {
+        case 3:
+            sobel_3x3(in, Gx, Gy, border_mode, constant_border_value);
+            norm_factor = 1.f / (4 * 255 * block_size);
+            break;
+        case 5:
+            sobel_5x5(in, Gx, Gy, border_mode, constant_border_value);
+            norm_factor = 1.f / (16 * 255 * block_size);
+            break;
+        case 7:
+            sobel_7x7(in, Gx, Gy, border_mode, constant_border_value);
+            norm_factor = 1.f / (64 * 255 * block_size);
+            break;
+        default:
+            ARM_COMPUTE_ERROR("Gradient size not supported.");
+    }
+
+    //Calculate scores
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        Coordinates in_coord = index2coord(in.shape(), i);
+
+        float Gx2 = 0;
+        float Gy2 = 0;
+        float Gxy = 0;
+
+        // Calculate Gx^2, Gy^2 and Gxy within the given window
+        for(int y = in_coord.y() - block_size / 2; y <= in_coord.y() + block_size / 2; ++y)
+        {
+            for(int x = in_coord.x() - block_size / 2; x <= in_coord.x() + block_size / 2; ++x)
+            {
+                Coordinates block_coord(x, y);
+
+                float norm_gx = tensor_elem_at(Gx, block_coord, border_mode, static_cast<T2>(constant_border_value)) * norm_factor;
+                float norm_gy = tensor_elem_at(Gy, block_coord, border_mode, static_cast<T2>(constant_border_value)) * norm_factor;
+
+                Gx2 += std::pow(norm_gx, 2);
+                Gy2 += std::pow(norm_gy, 2);
+                Gxy += norm_gx * norm_gy;
+            }
+        }
+
+        float trace2   = std::pow(Gx2 + Gy2, 2);
+        float det      = Gx2 * Gy2 - std::pow(Gxy, 2);
+        float response = det - sensitivity * trace2;
+
+        if(response > threshold)
+        {
+            candidates[i] = response;
+        }
+        else
+        {
+            candidates[i] = 0.f;
+        }
+    }
+
+    // Update valid region and remove candidates on borders for border_mode == UNDEFINED
+    if(border_mode == BorderMode::UNDEFINED)
+    {
+        valid_region = shape_to_valid_region(candidates.shape(), true, BorderSize((gradient_size / 2) + (block_size / 2)));
+
+        for(int i = 0; i < candidates.num_elements(); ++i)
+        {
+            if(!is_in_valid_region(valid_region, index2coord(candidates.shape(), i)))
+            {
+                candidates[i] = 0.f;
+            }
+        }
+    }
+
+    // Suppress non-maxima candidates
+    non_maxima_suppression_3x3(candidates, non_maxima, border_mode != BorderMode::UNDEFINED ? BorderMode::CONSTANT : BorderMode::UNDEFINED);
+    if(border_mode == BorderMode::UNDEFINED)
+    {
+        valid_region = shape_to_valid_region(non_maxima.shape(), true, BorderSize((gradient_size / 2) + (block_size / 2) + 1));
+    }
+
+    // Create vector of candidate corners
+    KeyPointArray candidates_vector(corners.max_num_values());
+    for(int i = 0; i < non_maxima.num_elements(); ++i)
+    {
+        Coordinates coord = index2coord(non_maxima.shape(), i);
+
+        if(non_maxima[i] != 0.f && is_in_valid_region(valid_region, coord))
+        {
+            KeyPoint corner;
+            corner.x               = coord.x();
+            corner.y               = coord.y();
+            corner.tracking_status = 1;
+            corner.strength        = non_maxima[i];
+
+            corner.scale       = 0.f;
+            corner.orientation = 0.f;
+            corner.error       = 0.f;
+
+            candidates_vector.push_back(corner);
+        }
+    }
+
+    // If there are any candidates, sort them by strength and add them to the output corners vector if there are no stronger corners within the given euclidean radius
+    if(candidates_vector.num_values() > 0)
+    {
+        std::sort(candidates_vector.buffer(), candidates_vector.buffer() + candidates_vector.num_values(), [](KeyPoint a, KeyPoint b)
+        {
+            return a.strength > b.strength;
+        });
+        corners.push_back(candidates_vector.at(0));
+
+        for(size_t j = 0; j < candidates_vector.num_values(); ++j)
+        {
+            bool    found = false;
+            int32_t x     = candidates_vector.at(j).x;
+            int32_t y     = candidates_vector.at(j).y;
+
+            for(size_t i = 0; i < corners.num_values(); ++i)
+            {
+                int32_t corners_x = corners.at(i).x;
+                int32_t corners_y = corners.at(i).y;
+
+                // Euclidean distance
+                if(std::sqrt((std::pow(x - corners_x, 2) + std::pow(y - corners_y, 2))) < min_dist)
+                {
+                    found = true;
+                }
+            }
+
+            // If no stronger corners within the given euclidean radius
+            if(!found)
+            {
+                corners.push_back(candidates_vector.at(j));
+            }
+        }
+    }
+}
+
 template <typename T>
 void compute_min_max(const Tensor<T> &in, void *min, void *max)
 {
diff --git a/tests/validation/Validation.cpp b/tests/validation/Validation.cpp
index 868bbaa..61097cd 100644
--- a/tests/validation/Validation.cpp
+++ b/tests/validation/Validation.cpp
@@ -26,6 +26,7 @@
 #include "arm_compute/core/Coordinates.h"
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/FixedPoint.h"
+#include "arm_compute/core/IArray.h"
 #include "arm_compute/core/TensorShape.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "tests/IAccessor.h"
@@ -39,6 +40,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <iomanip>
+#include <vector>
 
 namespace arm_compute
 {
@@ -435,6 +437,27 @@
     BOOST_TEST_INFO("target = " << std::setprecision(5) << target);
     BOOST_TEST(equal);
 }
+
+void validate(IArray<KeyPoint> &target, IArray<KeyPoint> &ref)
+{
+    BOOST_TEST(target.num_values() == ref.num_values());
+
+    for(size_t i = 0; i < target.num_values(); ++i)
+    {
+        KeyPoint *ref_val = std::find_if(ref.buffer(), ref.buffer() + ref.num_values(), [&target, i](KeyPoint key)
+        {
+            return key.x == target.at(i).x && key.y == target.at(i).y;
+        });
+
+        BOOST_TEST(ref_val != ref.buffer() + ref.num_values());
+        BOOST_TEST(target.at(i).tracking_status == ref_val->tracking_status);
+
+        validate(target.at(i).strength, ref_val->strength);
+        validate(target.at(i).scale, ref_val->scale);
+        validate(target.at(i).orientation, ref_val->orientation);
+        validate(target.at(i).error, ref_val->error);
+    }
+}
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/Validation.h b/tests/validation/Validation.h
index 43a90f3..993a3c3 100644
--- a/tests/validation/Validation.h
+++ b/tests/validation/Validation.h
@@ -181,6 +181,12 @@
         BOOST_TEST(same_coords != ref_max_loc.buffer() + max_count);
     }
 }
+
+/** Validate KeyPoint arrays.
+ *
+ * - All values should match
+ */
+void validate(IArray<KeyPoint> &target, IArray<KeyPoint> &ref);
 } // namespace validation
 } // namespace test
 } // namespace arm_compute