COMPMID-378 - Implemented reference implementation and tests (NEON and CL) for Non Linear Filter

Change-Id: I1b81e030a27cf01d098247a87d047099616e2a39
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78531
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
diff --git a/tests/TypePrinter.h b/tests/TypePrinter.h
index 3d5a199..d767544 100644
--- a/tests/TypePrinter.h
+++ b/tests/TypePrinter.h
@@ -86,6 +86,51 @@
     return os;
 }
 
+/** Formatted output of the NonLinearFilterFunction type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const NonLinearFilterFunction &function)
+{
+    switch(function)
+    {
+        case NonLinearFilterFunction::MAX:
+            os << "MAX";
+            break;
+        case NonLinearFilterFunction::MEDIAN:
+            os << "MEDIAN";
+            break;
+        case NonLinearFilterFunction::MIN:
+            os << "MIN";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the MatrixPattern type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const MatrixPattern &pattern)
+{
+    switch(pattern)
+    {
+        case MatrixPattern::BOX:
+            os << "BOX";
+            break;
+        case MatrixPattern::CROSS:
+            os << "CROSS";
+            break;
+        case MatrixPattern::DISK:
+            os << "DISK";
+            break;
+        case MatrixPattern::OTHER:
+            os << "OTHER";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
 /** Formatted output of the InterpolationPolicy type. */
 inline ::std::ostream &operator<<(::std::ostream &os, const InterpolationPolicy &policy)
 {
diff --git a/tests/Utils.h b/tests/Utils.h
index f067990..219cbd0 100644
--- a/tests/Utils.h
+++ b/tests/Utils.h
@@ -681,6 +681,7 @@
     }
     return true;
 }
+
 } // namespace test
 } // namespace arm_compute
 #endif
diff --git a/tests/dataset/MatrixPatternDataset.h b/tests/dataset/MatrixPatternDataset.h
new file mode 100644
index 0000000..d9f20c1
--- /dev/null
+++ b/tests/dataset/MatrixPatternDataset.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_MATRIX_PATTERN_DATASET_H__
+#define __ARM_COMPUTE_TEST_MATRIX_PATTERN_DATASET_H__
+
+#include "arm_compute/core/Types.h"
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+#include <array>
+
+namespace arm_compute
+{
+namespace test
+{
+/** Data set containing all possible border modes.
+ *
+ * Can be used as input for Boost data test cases to automatically run a test
+ * case on all border modes.
+ */
+class MatrixPatterns
+{
+public:
+    /** Type of the samples in the data set. */
+    using sample = MatrixPattern;
+
+    /** Dimensionality of the data set. */
+    enum
+    {
+        arity = 1
+    };
+
+    /** Number of samples in the data set. */
+#ifdef BOOST
+    boost::unit_test::data::size_t size() const
+#else
+    unsigned int size() const
+#endif
+    {
+        return _patterns.size();
+    }
+
+    /** Type of the iterator used to step through all samples in the data set.
+     * Needs to support operator*() and operator++() which a pointer does.
+     */
+    using iterator = const MatrixPattern *;
+
+    /** Iterator to the first sample in the data set. */
+    iterator begin() const
+    {
+        return _patterns.data();
+    }
+
+private:
+    std::array<MatrixPattern, 4> _patterns{ { MatrixPattern::BOX, MatrixPattern::CROSS, MatrixPattern::DISK, MatrixPattern::OTHER } };
+};
+} // namespace test
+} // namespace arm_compute
+#endif // __ARM_COMPUTE_TEST_MATRIX_PATTERN_DATASET_H__
diff --git a/tests/dataset/NonLinearFilterFunctionDataset.h b/tests/dataset/NonLinearFilterFunctionDataset.h
new file mode 100644
index 0000000..1778873
--- /dev/null
+++ b/tests/dataset/NonLinearFilterFunctionDataset.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_NON_LINEAR_FILTER_FUNCTION_DATASET_H__
+#define __ARM_COMPUTE_TEST_NON_LINEAR_FILTER_FUNCTION_DATASET_H__
+
+#include "arm_compute/core/Types.h"
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+#include <array>
+
+namespace arm_compute
+{
+namespace test
+{
+/** Data set containing all possible non linear filter function.
+ *
+ * Can be used as input for Boost data test cases to automatically run a test
+ * case on all border modes.
+ */
+class NonLinearFilterFunctions
+{
+public:
+    /** Type of the samples in the data set. */
+    using sample = NonLinearFilterFunction;
+
+    /** Dimensionality of the data set. */
+    enum
+    {
+        arity = 1
+    };
+
+    /** Number of samples in the data set. */
+#ifdef BOOST
+    boost::unit_test::data::size_t size() const
+#else
+    unsigned int size() const
+#endif
+    {
+        return _functions.size();
+    }
+
+    /** Type of the iterator used to step through all samples in the data set.
+     * Needs to support operator*() and operator++() which a pointer does.
+     */
+    using iterator = const NonLinearFilterFunction *;
+
+    /** Iterator to the first sample in the data set. */
+    iterator begin() const
+    {
+        return _functions.data();
+    }
+
+private:
+    std::array<NonLinearFilterFunction, 3> _functions{ { NonLinearFilterFunction::MAX, NonLinearFilterFunction::MEDIAN, NonLinearFilterFunction::MIN } };
+};
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/validation/CL/NonLinearFilter.cpp b/tests/validation/CL/NonLinearFilter.cpp
new file mode 100644
index 0000000..5288f95
--- /dev/null
+++ b/tests/validation/CL/NonLinearFilter.cpp
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "PaddingCalculator.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Helpers.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+#include "validation/ValidationUserConfiguration.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLNonLinearFilter.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute NonLinearFilter function.
+ *
+     * @param[in] input                 Shape of the input and output tensors.
+     * @param[in] function              Non linear function to perform
+     * @param[in] mask_size             Mask size. Supported sizes: 3, 5
+     * @param[in] pattern               Mask pattern
+     * @param[in] mask                  The given mask. Will be used only if pattern is specified to PATTERN_OTHER
+     * @param[in] border_mode           Strategy to use for borders.
+     * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ *
+ * @return Computed output CL tensor.
+ */
+CLTensor compute_non_linear_filter(const TensorShape &shape, NonLinearFilterFunction function, unsigned int mask_size,
+                                   MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode,
+                                   uint8_t constant_border_value)
+{
+    // Create tensors
+    CLTensor src = create_tensor(shape, DataType::U8);
+    CLTensor dst = create_tensor(shape, DataType::U8);
+
+    // Create and configure function
+    CLNonLinearFilter filter;
+    filter.configure(&src, &dst, function, mask_size, pattern, mask, border_mode, constant_border_value);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(CLAccessor(src), 0);
+
+    // Compute function
+    filter.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(CL)
+BOOST_AUTO_TEST_SUITE(NonLinearFilter)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes())
+                     * NonLinearFilterFunctions() * boost::unit_test::data::make({ 3U, 5U })
+                     * boost::unit_test::data::make({ MatrixPattern::BOX, MatrixPattern::CROSS, MatrixPattern::DISK }) * BorderModes(),
+                     shape, function, mask_size, pattern, border_mode)
+{
+    std::mt19937                           generator(user_config.seed.get());
+    std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
+    const uint8_t                          constant_border_value = distribution_u8(generator);
+
+    // Create the mask
+    uint8_t mask[mask_size * mask_size];
+    fill_mask_from_pattern(mask, mask_size, mask_size, pattern);
+    const auto half_mask_size = static_cast<int>(mask_size / 2);
+
+    // Create tensors
+    CLTensor src = create_tensor(shape, DataType::U8);
+    CLTensor dst = create_tensor(shape, DataType::U8);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    CLNonLinearFilter filter;
+    filter.configure(&src, &dst, function, mask_size, pattern, mask, border_mode, constant_border_value);
+
+    // Validate valid region
+    const ValidRegion src_valid_region = shape_to_valid_region(shape);
+    ValidRegion       dst_valid_region = shape_to_valid_region(shape);
+    if(border_mode == BorderMode::UNDEFINED)
+    {
+        dst_valid_region = shape_to_valid_region_undefined_border(shape, BorderSize(half_mask_size));
+    }
+
+    validate(src.info()->valid_region(), src_valid_region);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), ((MatrixPattern::OTHER == pattern) ? 1 : 8));
+    calculator.set_border_mode(border_mode);
+    calculator.set_border_size(half_mask_size);
+
+    const PaddingSize write_padding = calculator.required_padding(PaddingCalculator::Option::EXCLUDE_BORDER);
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-half_mask_size);
+
+    const PaddingSize read_padding = calculator.required_padding(PaddingCalculator::Option::INCLUDE_BORDER);
+
+    validate(src.info()->padding(), read_padding);
+    validate(dst.info()->padding(), write_padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes()
+                     * NonLinearFilterFunctions() * boost::unit_test::data::make({ 3U, 5U })
+                     * boost::unit_test::data::make({ MatrixPattern::BOX, MatrixPattern::CROSS, MatrixPattern::DISK }) * BorderModes(),
+                     shape, function, mask_size, pattern, border_mode)
+{
+    std::mt19937                           generator(user_config.seed.get());
+    std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
+    const uint8_t                          constant_border_value = distribution_u8(generator);
+
+    // Create the mask
+    uint8_t mask[mask_size * mask_size];
+    fill_mask_from_pattern(mask, mask_size, mask_size, pattern);
+
+    // Compute function
+    CLTensor dst = compute_non_linear_filter(shape, function, mask_size, pattern, mask, border_mode, constant_border_value);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_non_linear_filter(shape, function, mask_size, pattern, mask, border_mode, constant_border_value);
+
+    // Calculate valid region
+    ValidRegion valid_region = shape_to_valid_region(shape);
+    if(border_mode == BorderMode::UNDEFINED)
+    {
+        valid_region = shape_to_valid_region_undefined_border(shape, BorderSize(static_cast<int>(mask_size / 2)));
+    }
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst, valid_region);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes()
+                     * NonLinearFilterFunctions() * boost::unit_test::data::make({ 3U, 5U })
+                     * boost::unit_test::data::make({ MatrixPattern::BOX, MatrixPattern::CROSS, MatrixPattern::DISK }) * BorderModes(),
+                     shape, function, mask_size, pattern, border_mode)
+{
+    std::mt19937                           generator(user_config.seed.get());
+    std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
+    const uint8_t                          constant_border_value = distribution_u8(generator);
+
+    // Create the mask
+    uint8_t mask[mask_size * mask_size];
+    fill_mask_from_pattern(mask, mask_size, mask_size, pattern);
+
+    // Compute function
+    CLTensor dst = compute_non_linear_filter(shape, function, mask_size, pattern, mask, border_mode, constant_border_value);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_non_linear_filter(shape, function, mask_size, pattern, mask, border_mode, constant_border_value);
+
+    // Calculate valid region
+    ValidRegion valid_region = shape_to_valid_region(shape);
+    if(border_mode == BorderMode::UNDEFINED)
+    {
+        valid_region = shape_to_valid_region_undefined_border(shape, BorderSize(static_cast<int>(mask_size / 2)));
+    }
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst, valid_region);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/Datasets.h b/tests/validation/Datasets.h
index 33776d2..e33cc62 100644
--- a/tests/validation/Datasets.h
+++ b/tests/validation/Datasets.h
@@ -34,6 +34,8 @@
 #include "dataset/GEMMDataset.h"
 #include "dataset/ImageDatasets.h"
 #include "dataset/InterpolationPolicyDataset.h"
+#include "dataset/MatrixPatternDataset.h"
+#include "dataset/NonLinearFilterFunctionDataset.h"
 #include "dataset/NormalizationTypeDataset.h"
 #include "dataset/PoolingLayerDataset.h"
 #include "dataset/PoolingTypesDataset.h"
@@ -250,6 +252,18 @@
 struct is_dataset<arm_compute::test::ThresholdDataset> : boost::mpl::true_
 {
 };
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::NonLinearFilterFunctions> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::MatrixPatterns> : boost::mpl::true_
+{
+};
 }
 }
 }
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h
index cbaea4b..c0c5865 100644
--- a/tests/validation/Helpers.h
+++ b/tests/validation/Helpers.h
@@ -25,6 +25,7 @@
 #define __ARM_COMPUTE_TEST_VALIDATION_HELPERS_H__
 
 #include "Types.h"
+#include "ValidationUserConfiguration.h"
 
 #include <type_traits>
 #include <utility>
@@ -117,6 +118,56 @@
 
     return bounds;
 }
+
+/** Fill mask with the corresponding given pattern.
+ *
+ * @param[in,out] mask    Mask to be filled according to pattern
+ * @param[in]     cols    Columns (width) of mask
+ * @param[in]     rows    Rows (height) of mask
+ * @param[in]     pattern Pattern to fill the mask according to
+ */
+inline void fill_mask_from_pattern(uint8_t *mask, int cols, int rows, MatrixPattern pattern)
+{
+    unsigned int                v = 0;
+    std::mt19937                gen(user_config.seed.get());
+    std::bernoulli_distribution dist(0.5);
+
+    for(int r = 0; r < rows; ++r)
+    {
+        for(int c = 0; c < cols; ++c, ++v)
+        {
+            uint8_t val = 0;
+
+            switch(pattern)
+            {
+                case MatrixPattern::BOX:
+                    val = 255;
+                    break;
+                case MatrixPattern::CROSS:
+                    val = ((r == (rows / 2)) || (c == (cols / 2))) ? 255 : 0;
+                    break;
+                case MatrixPattern::DISK:
+                    val = (((r - rows / 2.0f + 0.5f) * (r - rows / 2.0f + 0.5f)) / ((rows / 2.0f) * (rows / 2.0f)) + ((c - cols / 2.0f + 0.5f) * (c - cols / 2.0f + 0.5f)) / ((cols / 2.0f) *
+                            (cols / 2.0f))) <= 1.0f ? 255 : 0;
+                    break;
+                case MatrixPattern::OTHER:
+                    val = (dist(gen) ? 0 : 255);
+                    break;
+                default:
+                    return;
+            }
+
+            mask[v] = val;
+        }
+    }
+
+    if(pattern == MatrixPattern::OTHER)
+    {
+        std::uniform_int_distribution<uint8_t> distribution_u8(0, ((cols * rows) - 1));
+        mask[distribution_u8(gen)] = 255;
+    }
+}
+
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/NEON/NonLinearFilter.cpp b/tests/validation/NEON/NonLinearFilter.cpp
new file mode 100644
index 0000000..3b71eaa
--- /dev/null
+++ b/tests/validation/NEON/NonLinearFilter.cpp
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "PaddingCalculator.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Helpers.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+#include "validation/ValidationUserConfiguration.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NENonLinearFilter.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute NonLinearFilter function.
+ *
+     * @param[in] input                 Shape of the input and output tensors.
+     * @param[in] function              Non linear function to perform
+     * @param[in] mask_size             Mask size. Supported sizes: 3, 5
+     * @param[in] pattern               Mask pattern
+     * @param[in] mask                  The given mask. Will be used only if pattern is specified to PATTERN_OTHER
+     * @param[in] border_mode           Strategy to use for borders.
+     * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_non_linear_filter(const TensorShape &shape, NonLinearFilterFunction function, unsigned int mask_size,
+                                 MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode,
+                                 uint8_t constant_border_value)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::U8);
+
+    // Create and configure function
+    NENonLinearFilter filter;
+    filter.configure(&src, &dst, function, mask_size, pattern, mask, border_mode, constant_border_value);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src), 0);
+
+    // Compute function
+    filter.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(NonLinearFilter)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes())
+                     * NonLinearFilterFunctions() * boost::unit_test::data::make({ 3U, 5U })
+                     * MatrixPatterns() * BorderModes(),
+                     shape, function, mask_size, pattern, border_mode)
+{
+    std::mt19937                           generator(user_config.seed.get());
+    std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
+    const uint8_t                          constant_border_value = distribution_u8(generator);
+
+    // Create the mask
+    uint8_t mask[mask_size * mask_size];
+    fill_mask_from_pattern(mask, mask_size, mask_size, pattern);
+    const auto half_mask_size = static_cast<int>(mask_size / 2);
+
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::U8);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NENonLinearFilter filter;
+    filter.configure(&src, &dst, function, mask_size, pattern, mask, border_mode, constant_border_value);
+
+    // Validate valid region
+    const ValidRegion src_valid_region = shape_to_valid_region(shape);
+    ValidRegion       dst_valid_region = shape_to_valid_region(shape);
+    if(border_mode == BorderMode::UNDEFINED)
+    {
+        dst_valid_region = shape_to_valid_region_undefined_border(shape, BorderSize(half_mask_size));
+    }
+
+    validate(src.info()->valid_region(), src_valid_region);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), ((MatrixPattern::OTHER == pattern) ? 1 : 8));
+    calculator.set_border_mode(border_mode);
+    calculator.set_border_size(half_mask_size);
+
+    const PaddingSize write_padding = calculator.required_padding(PaddingCalculator::Option::EXCLUDE_BORDER);
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-half_mask_size);
+
+    const PaddingSize read_padding = calculator.required_padding(PaddingCalculator::Option::INCLUDE_BORDER);
+
+    validate(src.info()->padding(), read_padding);
+    validate(dst.info()->padding(), write_padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes()
+                     * NonLinearFilterFunctions() * boost::unit_test::data::make({ 3U, 5U })
+                     * MatrixPatterns() * BorderModes(),
+                     shape, function, mask_size, pattern, border_mode)
+{
+    std::mt19937                           generator(user_config.seed.get());
+    std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
+    const uint8_t                          constant_border_value = distribution_u8(generator);
+
+    // Create the mask
+    uint8_t mask[mask_size * mask_size];
+    fill_mask_from_pattern(mask, mask_size, mask_size, pattern);
+
+    // Compute function
+    Tensor dst = compute_non_linear_filter(shape, function, mask_size, pattern, mask, border_mode, constant_border_value);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_non_linear_filter(shape, function, mask_size, pattern, mask, border_mode, constant_border_value);
+
+    // Calculate valid region
+    ValidRegion valid_region = shape_to_valid_region(shape);
+    if(border_mode == BorderMode::UNDEFINED)
+    {
+        valid_region = shape_to_valid_region_undefined_border(shape, BorderSize(static_cast<int>(mask_size / 2)));
+    }
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, valid_region);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes()
+                     * NonLinearFilterFunctions() * boost::unit_test::data::make({ 3U, 5U })
+                     * MatrixPatterns() * BorderModes(),
+                     shape, function, mask_size, pattern, border_mode)
+{
+    std::mt19937                           generator(user_config.seed.get());
+    std::uniform_int_distribution<uint8_t> distribution_u8(0, 255);
+    const uint8_t                          constant_border_value = distribution_u8(generator);
+
+    // Create the mask
+    uint8_t mask[mask_size * mask_size];
+    fill_mask_from_pattern(mask, mask_size, mask_size, pattern);
+
+    // Compute function
+    Tensor dst = compute_non_linear_filter(shape, function, mask_size, pattern, mask, border_mode, constant_border_value);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_non_linear_filter(shape, function, mask_size, pattern, mask, border_mode, constant_border_value);
+
+    // Calculate valid region
+    ValidRegion valid_region = shape_to_valid_region(shape);
+    if(border_mode == BorderMode::UNDEFINED)
+    {
+        valid_region = shape_to_valid_region_undefined_border(shape, BorderSize(static_cast<int>(mask_size / 2)));
+    }
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, valid_region);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/Reference.cpp b/tests/validation/Reference.cpp
index f6f3cb8..8a2b73e 100644
--- a/tests/validation/Reference.cpp
+++ b/tests/validation/Reference.cpp
@@ -354,6 +354,22 @@
     return dst;
 }
 
+RawTensor Reference::compute_reference_non_linear_filter(const TensorShape &shape, NonLinearFilterFunction function, unsigned int mask_size,
+                                                         MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, uint8_t constant_border_value)
+{
+    // Create reference
+    RawTensor ref_src = library->get(shape, DataType::U8);
+    RawTensor ref_dst = library->get(shape, DataType::U8);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src, 0);
+
+    // Compute reference
+    ReferenceCPP::non_linear_filter(ref_src, ref_dst, function, mask_size, pattern, mask, border_mode, constant_border_value);
+
+    return ref_dst;
+}
+
 RawTensor Reference::compute_reference_pixel_wise_multiplication(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, float scale, ConvertPolicy convert_policy,
                                                                  RoundingPolicy rounding_policy)
 {
diff --git a/tests/validation/Reference.h b/tests/validation/Reference.h
index 22aa939..41d6a60 100644
--- a/tests/validation/Reference.h
+++ b/tests/validation/Reference.h
@@ -206,6 +206,20 @@
      */
     static RawTensor compute_reference_gemm(const TensorShape &src_shape1, const TensorShape &src_shape2, const TensorShape &src_shape3,
                                             const TensorShape &dst_shape, float alpha, float beta, DataType dt, int fixed_point_position = 0);
+    /** Compute reference non linear filter function
+     *
+     * @param[in] shape                 Shape of the input and output tensors.Data type supported: U8
+     * @param[in] function              Non linear function to perform
+     * @param[in] mask_size             Mask size. Supported sizes: 3, 5
+     * @param[in] pattern               Matrix pattern
+     * @param[in] mask                  The given mask. Will be used only if pattern is specified to PATTERN_OTHER
+     * @param[in] border_mode           Strategy to use for borders.
+     * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     *
+     * @return Computed raw tensor.
+    */
+    static RawTensor compute_reference_non_linear_filter(const TensorShape &shape, NonLinearFilterFunction function, unsigned int mask_size,
+                                                         MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, uint8_t constant_border_value = 0);
     /** Compute reference pixel-wise multiplication
      *
      * @param[in] shape           Shape of the input and output tensors.
diff --git a/tests/validation/ReferenceCPP.cpp b/tests/validation/ReferenceCPP.cpp
index 6264695..7c50f50 100644
--- a/tests/validation/ReferenceCPP.cpp
+++ b/tests/validation/ReferenceCPP.cpp
@@ -224,6 +224,15 @@
 
     boost::apply_visitor(tensor_visitors::gemm_visitor(s1, s2, s3, alpha, beta), d);
 }
+// Non linear filter
+void ReferenceCPP::non_linear_filter(const RawTensor &src, RawTensor &dst, NonLinearFilterFunction function, unsigned int mask_size,
+                                     MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, uint8_t constant_border_value)
+{
+    ARM_COMPUTE_ERROR_ON(src.data_type() != DataType::U8 || dst.data_type() != DataType::U8);
+    const Tensor<uint8_t> s(src.shape(), src.data_type(), src.fixed_point_position(), reinterpret_cast<const uint8_t *>(src.data()));
+    Tensor<uint8_t>       d(dst.shape(), dst.data_type(), dst.fixed_point_position(), reinterpret_cast<uint8_t *>(dst.data()));
+    tensor_operations::non_linear_filter(s, d, function, mask_size, pattern, mask, border_mode, constant_border_value);
+}
 
 // Pixel-wise multiplication
 void ReferenceCPP::pixel_wise_multiplication(const RawTensor &src1, const RawTensor &src2, RawTensor &dst, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
diff --git a/tests/validation/ReferenceCPP.h b/tests/validation/ReferenceCPP.h
index e1b71c5..8ee8f31 100644
--- a/tests/validation/ReferenceCPP.h
+++ b/tests/validation/ReferenceCPP.h
@@ -188,6 +188,19 @@
      */
     static void gemm(const RawTensor &src1, const RawTensor &src2, const RawTensor &src3,
                      RawTensor &dst, float alpha, float beta);
+    /** Compute non linear filter function.
+     *
+     * @param[in]  src                   First input tensor
+     * @param[out] dst                   Output tensor
+     * @param[in]  function              Non linear function to perform
+     * @param[in]  mask_size             Mask size. Supported sizes: 3, 5
+     * @param[in]  pattern               Matrix pattern
+     * @param[in]  mask                  The given mask.
+     * @param[in]  border_mode           Strategy to use for borders.
+     * @param[in]  constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+    */
+    static void non_linear_filter(const RawTensor &src, RawTensor &dst, NonLinearFilterFunction function, unsigned int mask_size,
+                                  MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, uint8_t constant_border_value = 0);
     /** Element-wise multiplication of @p src1, @p src2 and @p scale
      *
      * @param[in]  src1            First tensor.
diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h
index 0430d59..843c52f 100644
--- a/tests/validation/TensorOperations.h
+++ b/tests/validation/TensorOperations.h
@@ -711,6 +711,67 @@
     }
 }
 
+// Non linear filter
+template <typename T>
+void non_linear_filter(const Tensor<T> &in, Tensor<T> &out, NonLinearFilterFunction function, unsigned int mask_size,
+                       MatrixPattern pattern, const uint8_t *mask, BorderMode border_mode, uint8_t constant_border_value)
+{
+    ARM_COMPUTE_ERROR_ON(MatrixPattern::OTHER == pattern && nullptr == mask);
+
+    using intermediate_type = typename common_promoted_signed_type<T>::intermediate_type;
+
+    const int                      sq_mask_size   = mask_size * mask_size;
+    const int                      half_mask_size = mask_size / 2;
+    std::vector<intermediate_type> vals(sq_mask_size);
+    intermediate_type              current_value = 0;
+
+    ValidRegion valid_region = shape_to_valid_region(in.shape());
+    if(border_mode == BorderMode::UNDEFINED)
+    {
+        valid_region = shape_to_valid_region_undefined_border(in.shape(), BorderSize(half_mask_size));
+    }
+
+    for(int element_idx = 0, count = 0, index = 0; element_idx < in.num_elements(); ++element_idx, count = 0, index = 0)
+    {
+        Coordinates id = index2coord(in.shape(), element_idx);
+        if(is_in_valid_region(valid_region, id))
+        {
+            int idx = id.x();
+            int idy = id.y();
+            for(int y = idy - half_mask_size; y <= idy + half_mask_size; ++y)
+            {
+                for(int x = idx - half_mask_size; x <= idx + half_mask_size; ++x, ++index)
+                {
+                    id.set(0, x);
+                    id.set(1, y);
+                    current_value = tensor_elem_at(in, id, border_mode, constant_border_value);
+
+                    if(mask[index] == 255)
+                    {
+                        vals[count] = static_cast<intermediate_type>(current_value);
+                        ++count;
+                    }
+                }
+            }
+            std::sort(vals.begin(), vals.begin() + count);
+            switch(function)
+            {
+                case NonLinearFilterFunction::MIN:
+                    out[element_idx] = saturate_cast<T>(vals[0]);
+                    break;
+                case NonLinearFilterFunction::MAX:
+                    out[element_idx] = saturate_cast<T>(vals[count - 1]);
+                    break;
+                case NonLinearFilterFunction::MEDIAN:
+                    out[element_idx] = saturate_cast<T>(vals[count / 2]);
+                    break;
+                default:
+                    ARM_COMPUTE_ERROR("Unsupported NonLinearFilter function.");
+            }
+        }
+    }
+}
+
 // Pixel-wise multiplication
 template <typename T1, typename T2, typename T3>
 void pixel_wise_multiplication(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)