COMPMID-424 Add CL validation tests for Box3x3

* Add tests for different border modes
* Add padding calculator

Change-Id: Ic4708faddfb1c8e6b59d349cf9cb48c9a181d717
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78105
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com>
diff --git a/tests/validation/CL/BitwiseAnd.cpp b/tests/validation/CL/BitwiseAnd.cpp
index bbafa87..8b40aff 100644
--- a/tests/validation/CL/BitwiseAnd.cpp
+++ b/tests/validation/CL/BitwiseAnd.cpp
@@ -51,7 +51,7 @@
 
 namespace
 {
-/** Compute Neon bitwise and function.
+/** Compute CL bitwise and function.
  *
  * @param[in] shape Shape of the input and output tensors.
  *
diff --git a/tests/validation/CL/Box3x3.cpp b/tests/validation/CL/Box3x3.cpp
new file mode 100644
index 0000000..fa60453
--- /dev/null
+++ b/tests/validation/CL/Box3x3.cpp
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "PaddingCalculator.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+#include "validation/ValidationUserConfiguration.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLBox3x3.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute CL box3x3 filter.
+ *
+ * @param[in] shape                 Shape of the input and output tensors.
+ * @param[in] border_mode           BorderMode used by the input tensor.
+ * @param[in] constant_border_value Constant to use if @p border_mode == CONSTANT.
+ *
+ * @return Computed output tensor.
+ */
+CLTensor compute_box3x3(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value)
+{
+    // Create tensors
+    CLTensor src = create_tensor(shape, DataType::U8);
+    CLTensor dst = create_tensor(shape, DataType::U8);
+
+    // Create and configure function
+    CLBox3x3 box3x3;
+    box3x3.configure(&src, &dst, border_mode, constant_border_value);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(CLAccessor(src), 0);
+
+    // Compute function
+    box3x3.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(CL)
+BOOST_AUTO_TEST_SUITE(Box3x3)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * BorderModes(), shape, border_mode)
+{
+    // Create tensors
+    CLTensor src = create_tensor(shape, DataType::U8);
+    CLTensor dst = create_tensor(shape, DataType::U8);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    CLBox3x3 box3x3;
+    box3x3.configure(&src, &dst, border_mode);
+
+    // Validate valid region
+    const ValidRegion src_valid_region = shape_to_valid_region(shape);
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(1));
+    validate(src.info()->valid_region(), src_valid_region);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    PaddingCalculator calculator(shape.x(), 8);
+    calculator.set_border_size(1);
+    calculator.set_border_mode(border_mode);
+
+    const PaddingSize dst_padding = calculator.required_padding();
+
+    calculator.set_accessed_elements(16);
+    calculator.set_access_offset(-1);
+
+    const PaddingSize src_padding = calculator.required_padding();
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * BorderModes(), shape, border_mode)
+{
+    std::mt19937                           gen(user_config.seed.get());
+    std::uniform_int_distribution<uint8_t> distribution(0, 255);
+    const uint8_t                          border_value = distribution(gen);
+
+    // Compute function
+    CLTensor dst = compute_box3x3(shape, border_mode, border_value);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_box3x3(shape, border_mode, border_value);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst, shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(1)));
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * BorderModes(), shape, border_mode)
+{
+    std::mt19937                           gen(user_config.seed.get());
+    std::uniform_int_distribution<uint8_t> distribution(0, 255);
+    const uint8_t                          border_value = distribution(gen);
+
+    // Compute function
+    CLTensor dst = compute_box3x3(shape, border_mode, border_value);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_box3x3(shape, border_mode, border_value);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst, shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(1)));
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/CL/CMakeLists.txt b/tests/validation/CL/CMakeLists.txt
index 9b67d1d..44fde2f 100644
--- a/tests/validation/CL/CMakeLists.txt
+++ b/tests/validation/CL/CMakeLists.txt
@@ -29,9 +29,13 @@
     ${CMAKE_CURRENT_SOURCE_DIR}/CLFixture.h
     ${CMAKE_CURRENT_SOURCE_DIR}/CLFixture.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/BitwiseAnd.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/IntegralImage.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Box3x3.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/DepthConvert.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/FillBorder.cpp
+`   ${CMAKE_CURRENT_SOURCE_DIR}/IntegralImage.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Sobel3x3.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Sobel5x5.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Threshold.cpp
 )
 
 add_library(arm_compute_test_validation_OPENCL OBJECT
diff --git a/tests/validation/NEON/Box3x3.cpp b/tests/validation/NEON/Box3x3.cpp
index b2a66cd..c051676 100644
--- a/tests/validation/NEON/Box3x3.cpp
+++ b/tests/validation/NEON/Box3x3.cpp
@@ -31,6 +31,7 @@
 #include "validation/Datasets.h"
 #include "validation/Reference.h"
 #include "validation/Validation.h"
+#include "validation/ValidationUserConfiguration.h"
 
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/Types.h"
@@ -51,21 +52,23 @@
 
 namespace
 {
-/** Compute Neon 3-by-3 box filter.
+/** Compute Neon box3x3 filter.
  *
- * @param[in] shape Shape of the input and output tensors.
+ * @param[in] shape                 Shape of the input and output tensors.
+ * @param[in] border_mode           BorderMode used by the input tensor.
+ * @param[in] constant_border_value Constant to use if @p border_mode == CONSTANT.
  *
  * @return Computed output tensor.
  */
-Tensor compute_box3x3(const TensorShape &shape)
+Tensor compute_box3x3(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value)
 {
     // Create tensors
     Tensor src = create_tensor(shape, DataType::U8);
     Tensor dst = create_tensor(shape, DataType::U8);
 
     // Create and configure function
-    NEBox3x3 band;
-    band.configure(&src, &dst, BorderMode::UNDEFINED);
+    NEBox3x3 box3x3;
+    box3x3.configure(&src, &dst, border_mode, constant_border_value);
 
     // Allocate tensors
     src.allocator()->allocate();
@@ -78,7 +81,7 @@
     library->fill_tensor_uniform(NEAccessor(src), 0);
 
     // Compute function
-    band.run();
+    box3x3.run();
 
     return dst;
 }
@@ -89,7 +92,7 @@
 BOOST_AUTO_TEST_SUITE(Box3x3)
 
 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
-BOOST_DATA_TEST_CASE(Configuration, SmallShapes() + LargeShapes(), shape)
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * BorderModes(), shape, border_mode)
 {
     // Create tensors
     Tensor src = create_tensor(shape, DataType::U8);
@@ -99,18 +102,19 @@
     BOOST_TEST(dst.info()->is_resizable());
 
     // Create and configure function
-    NEBox3x3 band;
-    band.configure(&src, &dst, BorderMode::UNDEFINED);
+    NEBox3x3 box3x3;
+    box3x3.configure(&src, &dst, border_mode);
 
     // Validate valid region
     const ValidRegion src_valid_region = shape_to_valid_region(shape);
-    const ValidRegion dst_valid_region = shape_to_valid_region_undefined_border(shape, BorderSize(1));
+    const ValidRegion dst_valid_region = shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(1));
     validate(src.info()->valid_region(), src_valid_region);
     validate(dst.info()->valid_region(), dst_valid_region);
 
     // Validate padding
     PaddingCalculator calculator(shape.x(), 8);
     calculator.set_border_size(1);
+    calculator.set_border_mode(border_mode);
 
     const PaddingSize dst_padding = calculator.required_padding();
 
@@ -124,29 +128,37 @@
 }
 
 BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
-BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(), shape)
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * BorderModes(), shape, border_mode)
 {
+    std::mt19937                           gen(user_config.seed.get());
+    std::uniform_int_distribution<uint8_t> distribution(0, 255);
+    const uint8_t                          border_value = distribution(gen);
+
     // Compute function
-    Tensor dst = compute_box3x3(shape);
+    Tensor dst = compute_box3x3(shape, border_mode, border_value);
 
     // Compute reference
-    RawTensor ref_dst = Reference::compute_reference_box3x3(shape);
+    RawTensor ref_dst = Reference::compute_reference_box3x3(shape, border_mode, border_value);
 
     // Validate output
-    validate(NEAccessor(dst), ref_dst, shape_to_valid_region_undefined_border(shape, BorderSize(1)));
+    validate(NEAccessor(dst), ref_dst, shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(1)));
 }
 
 BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
-BOOST_DATA_TEST_CASE(RunLarge, LargeShapes(), shape)
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * BorderModes(), shape, border_mode)
 {
+    std::mt19937                           gen(user_config.seed.get());
+    std::uniform_int_distribution<uint8_t> distribution(0, 255);
+    const uint8_t                          border_value = distribution(gen);
+
     // Compute function
-    Tensor dst = compute_box3x3(shape);
+    Tensor dst = compute_box3x3(shape, border_mode, border_value);
 
     // Compute reference
-    RawTensor ref_dst = Reference::compute_reference_box3x3(shape);
+    RawTensor ref_dst = Reference::compute_reference_box3x3(shape, border_mode, border_value);
 
     // Validate output
-    validate(NEAccessor(dst), ref_dst, shape_to_valid_region_undefined_border(shape, BorderSize(1)));
+    validate(NEAccessor(dst), ref_dst, shape_to_valid_region(shape, border_mode == BorderMode::UNDEFINED, BorderSize(1)));
 }
 
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/tests/validation/NEON/CMakeLists.txt b/tests/validation/NEON/CMakeLists.txt
index 86bcce7..d928362 100644
--- a/tests/validation/NEON/CMakeLists.txt
+++ b/tests/validation/NEON/CMakeLists.txt
@@ -30,21 +30,31 @@
     ${CMAKE_CURRENT_SOURCE_DIR}/AccumulateWeighted.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ArithmeticAddition.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ArithmeticSubtraction.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/BatchNormalizationLayer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/BitwiseAnd.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/BitwiseNot.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/BitwiseOr.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/BitwiseXor.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Box3x3.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/ConvolutionLayer.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/ConvolutionLayerDirect.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/DepthConvert.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/FillBorder.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Fixedpoint/Exp_QS8.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Fixedpoint/Invsqrt_QS8.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Fixedpoint/Log_QS8.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Fixedpoint/Reciprocal_QS8.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/FullyConnectedLayer.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/GEMM.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/IntegralImage.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/NormalizationLayer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/PixelWiseMultiplication.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/IntegralImage.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/MeanStdDev.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Pooling/PoolingLayer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Sobel3x3.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/Sobel5x5.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/SoftmaxLayer.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Threshold.cpp
 )
 
 add_library(arm_compute_test_validation_NEON OBJECT
diff --git a/tests/validation/Reference.cpp b/tests/validation/Reference.cpp
index be3f28b..cab9d51 100644
--- a/tests/validation/Reference.cpp
+++ b/tests/validation/Reference.cpp
@@ -266,7 +266,7 @@
     return ref_dst;
 }
 
-RawTensor Reference::compute_reference_box3x3(const TensorShape &shape)
+RawTensor Reference::compute_reference_box3x3(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value)
 {
     // Create reference
     RawTensor ref_src = library->get(shape, DataType::U8);
@@ -276,7 +276,7 @@
     library->fill_tensor_uniform(ref_src, 0);
 
     // Compute reference
-    ReferenceCPP::box3x3(ref_src, ref_dst);
+    ReferenceCPP::box3x3(ref_src, ref_dst, border_mode, constant_border_value);
 
     return ref_dst;
 }
diff --git a/tests/validation/Reference.h b/tests/validation/Reference.h
index 4cfd3c2..973322d 100644
--- a/tests/validation/Reference.h
+++ b/tests/validation/Reference.h
@@ -152,13 +152,15 @@
      * @return Computed raw tensor.
      */
     static RawTensor compute_reference_bitwise_not(const TensorShape &shape);
-    /** Compute reference 3-by-3 box filter.
+    /** Compute reference box3x3 filter.
      *
-     * @param[in] shape Shape of the input and output tensors.
+     * @param[in] shape                 Shape of the input and output tensors.
+     * @param[in] border_mode           BorderMode used by the input tensor.
+     * @param[in] constant_border_value Constant to use if @p border_mode == CONSTANT.
      *
      * @return Computed raw tensor.
      */
-    static RawTensor compute_reference_box3x3(const TensorShape &shape);
+    static RawTensor compute_reference_box3x3(const TensorShape &shape, BorderMode border_mode, uint8_t constant_border_value);
     /** Compute reference depth convert.
      *
      * @param[in] shape                Shape of the input and output tensors.
diff --git a/tests/validation/ReferenceCPP.cpp b/tests/validation/ReferenceCPP.cpp
index ca6bb6c..15e6bac 100644
--- a/tests/validation/ReferenceCPP.cpp
+++ b/tests/validation/ReferenceCPP.cpp
@@ -178,13 +178,13 @@
     tensor_operations::bitwise_not(s, d);
 }
 
-// 3-by-3 box filter
-void ReferenceCPP::box3x3(const RawTensor &src, RawTensor &dst)
+// Box3x3 filter
+void ReferenceCPP::box3x3(const RawTensor &src, RawTensor &dst, BorderMode border_mode, uint8_t constant_border_value)
 {
     ARM_COMPUTE_ERROR_ON(src.data_type() != DataType::U8 || dst.data_type() != DataType::U8);
     const Tensor<uint8_t> s(src.shape(), src.data_type(), src.fixed_point_position(), reinterpret_cast<const uint8_t *>(src.data()));
     Tensor<uint8_t>       d(dst.shape(), dst.data_type(), dst.fixed_point_position(), reinterpret_cast<uint8_t *>(dst.data()));
-    tensor_operations::box3x3(s, d);
+    tensor_operations::box3x3(s, d, border_mode, constant_border_value);
 }
 
 // Depth conversion
diff --git a/tests/validation/ReferenceCPP.h b/tests/validation/ReferenceCPP.h
index b5e3fa0..aeafa76 100644
--- a/tests/validation/ReferenceCPP.h
+++ b/tests/validation/ReferenceCPP.h
@@ -145,12 +145,14 @@
      * @param[out] dst Result tensor.
      */
     static void bitwise_not(const RawTensor &src, RawTensor &dst);
-    /** Function to compute 3-by-3 box filtered result tensor.
+    /** Function to compute box3x3 filtered result tensor.
      *
-     * @param[in]  src Input tensor.
-     * @param[out] dst Result tensor.
+     * @param[in]  src                   Input tensor.
+     * @param[out] dst                   Result tensor.
+     * @param[in]  border_mode           Border mode.
+     * @param[in]  constant_border_value Constant border value if @p border_mode is BorderMode::CONSTANT.
      */
-    static void box3x3(const RawTensor &src, RawTensor &dst);
+    static void box3x3(const RawTensor &src, RawTensor &dst, BorderMode border_mode, uint8_t constant_border_value);
     /** Depth conversion from @p src to @p dst
      *
      * @param[in]  src    First tensor.
diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h
index fce2575..5695593 100644
--- a/tests/validation/TensorOperations.h
+++ b/tests/validation/TensorOperations.h
@@ -201,6 +201,7 @@
     }
 }
 
+// Return a tensor element at a specified coordinate with different border modes
 template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
 T tensor_elem_at(const Tensor<T> &in, Coordinates &coord, BorderMode border_mode, T constant_border_value)
 {
@@ -209,14 +210,10 @@
     const int width  = static_cast<int>(in.shape().x());
     const int height = static_cast<int>(in.shape().y());
 
-    // If on border
+    // If coordinates beyond range of tensor's width or height
     if(x < 0 || y < 0 || x >= width || y >= height)
     {
-        if(border_mode == BorderMode::CONSTANT)
-        {
-            return constant_border_value;
-        }
-        else if(border_mode == BorderMode::REPLICATE)
+        if(border_mode == BorderMode::REPLICATE)
         {
             coord.set(0, std::max(0, std::min(x, width - 1)));
             coord.set(1, std::max(0, std::min(y, height - 1)));
@@ -224,10 +221,7 @@
         }
         else
         {
-            // Return a random value if on border and border_mode == UNDEFINED
-            std::mt19937                     gen(user_config.seed.get());
-            std::uniform_int_distribution<T> distribution(0, 255);
-            return distribution(gen);
+            return constant_border_value;
         }
     }
     else
@@ -257,8 +251,7 @@
         {
             coord.set(0, i);
             coord.set(1, j);
-            double pixel_to_multiply = tensor_elem_at(in, coord, border_mode, constant_border_value);
-            val += static_cast<double>(*filter_itr) * pixel_to_multiply;
+            val += static_cast<double>(*filter_itr) * tensor_elem_at(in, coord, border_mode, constant_border_value);
             ++filter_itr;
         }
     }
@@ -508,20 +501,16 @@
     }
 }
 
-// 3-by-3 box filter
+// Box3x3 filter
 template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
-void box3x3(const Tensor<T> &in, Tensor<T> &out)
+void box3x3(const Tensor<T> &in, Tensor<T> &out, BorderMode border_mode, T constant_border_value)
 {
     const std::array<T, 9> filter{ { 1, 1, 1, 1, 1, 1, 1, 1, 1 } };
-    float             scale        = 1.f / static_cast<float>(filter.size());
-    const ValidRegion valid_region = shape_to_valid_region_undefined_border(in.shape(), BorderSize(1));
+    float scale = 1.f / static_cast<float>(filter.size());
     for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
     {
         const Coordinates id = index2coord(in.shape(), element_idx);
-        if(is_in_valid_region(valid_region, id))
-        {
-            apply_2d_spatial_filter(id, in, out, TensorShape(3U, 3U), filter.data(), scale, BorderMode::UNDEFINED);
-        }
+        apply_2d_spatial_filter(id, in, out, TensorShape(3U, 3U), filter.data(), scale, border_mode, constant_border_value);
     }
 }