COMPIMID-523: Fix CLDepthwiseConvolution test.

The specified output size of the failing test case was invalid.
Additionally the kernel has been cleaned up and asserts have been added
in case of invalid configurations.

Change-Id: I198f3574f003b71968e4081a54cf102d748af5c1
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/88821
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Steven Niu <steven.niu@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp
index c10e6be..6e56835 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp
@@ -53,34 +53,38 @@
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32);
     ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3);
 
+    std::pair<unsigned int, unsigned int> expected_output = scaled_dimensions(input->info()->tensor_shape().x(), input->info()->tensor_shape().y(),
+                                                                              weights->info()->tensor_shape().x(), weights->info()->tensor_shape().y(),
+                                                                              conv_info);
+
+    ARM_COMPUTE_UNUSED(expected_output);
+    ARM_COMPUTE_ERROR_ON(expected_output.first != output->info()->tensor_shape().x());
+    ARM_COMPUTE_ERROR_ON(expected_output.second != output->info()->tensor_shape().y());
+
     _input         = input;
     _output        = output;
     _weights       = weights;
     _conv_stride_x = conv_info.stride().first;
     _conv_stride_y = conv_info.stride().second;
-    _border_size   = BorderSize(weights->info()->dimension(1) / 2, weights->info()->dimension(0) / 2);
-    _conv_pad_x    = std::min(border_size().right, conv_info.pad().first);
-    _conv_pad_y    = std::min(border_size().bottom, conv_info.pad().second);
+    _conv_pad_x    = conv_info.pad().first;
+    _conv_pad_y    = conv_info.pad().second;
+    _border_size   = BorderSize(_conv_pad_y, _conv_pad_x);
 
     // Set build options
-    std::set<std::string> options;
-
-    options.emplace("-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x));
+    ARM_COMPUTE_ERROR_ON(_conv_stride_x < 1 || _conv_stride_x > 3);
+    std::set<std::string> options{ "-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x) };
 
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("depthwise_convolution_3x3", options));
 
     // Configure kernel window
     const unsigned int num_elems_processed_per_iteration = 2;
     const unsigned int num_elems_written_per_iteration   = 2;
-    const unsigned int num_elems_read_per_iteration      = (_conv_stride_x == 1) ? 4 : (_conv_stride_x == 2) ? 5 : 6;
+    const unsigned int num_elems_read_per_iteration      = 3 + _conv_stride_x;
     const unsigned int num_rows_read_per_iteration       = 3;
 
     Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
 
-    const int access_right  = border_size().left + ceil_to_multiple(border_size().left + input->info()->dimension(0), num_elems_read_per_iteration);
-    const int access_bottom = border_size().bottom + ceil_to_multiple(border_size().bottom + input->info()->dimension(1), num_rows_read_per_iteration);
-
-    AccessWindowStatic     input_access(input->info(), -border_size().left, -border_size().bottom, access_right, access_bottom);
+    AccessWindowRectangle  input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration, _conv_stride_x, _conv_stride_y);
     AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
     AccessWindowStatic     weights_access(weights->info(), 0, 0, weights->info()->dimension(0), weights->info()->dimension(1));
 
diff --git a/tests/datasets/DepthwiseConvolutionDataset.h b/tests/datasets/DepthwiseConvolutionDataset.h
index 6a7af63..593b823 100644
--- a/tests/datasets/DepthwiseConvolutionDataset.h
+++ b/tests/datasets/DepthwiseConvolutionDataset.h
@@ -121,6 +121,7 @@
         add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 5U, 5U), TensorShape(11U, 23U, 5U), PadStrideInfo(2, 1, 0, 0));
         add_config(TensorShape(33U, 27U, 7U), TensorShape(7U, 3U, 7U), TensorShape(10U, 13U, 7U), PadStrideInfo(3, 2, 1, 0));
+        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1));
         add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 9U, 2U), TensorShape(15U, 13U, 2U), PadStrideInfo(1, 2, 1, 1));
         add_config(TensorShape(23U, 27U, 5U), TensorShape(11U, 3U, 5U), TensorShape(13U, 13U, 5U), PadStrideInfo(1, 2, 0, 0));
         add_config(TensorShape(17U, 31U, 2U, 3U), TensorShape(5U, 9U, 2U), TensorShape(15U, 13U, 2U, 3U), PadStrideInfo(1, 2, 1, 1));
@@ -141,6 +142,31 @@
     }
 };
 
+class SmallDepthwiseConvolutionDataset3x3 final : public DepthwiseConvolutionDataset
+{
+public:
+    SmallDepthwiseConvolutionDataset3x3()
+    {
+        add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1));
+        add_config(TensorShape(21U, 31U, 9U), TensorShape(3U, 3U, 9U), TensorShape(21U, 15U, 9U), PadStrideInfo(1, 2, 1, 0));
+        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1));
+    }
+};
+
+class LargeDepthwiseConvolutionDataset3x3 final : public DepthwiseConvolutionDataset
+{
+public:
+    LargeDepthwiseConvolutionDataset3x3()
+    {
+        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0));
+        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0));
+        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0));
+        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1));
+        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1));
+    }
+};
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h b/tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h
index f6137ee..e615788 100644
--- a/tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h
+++ b/tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h
@@ -46,22 +46,6 @@
                    1,
                    DimensionRoundingType::FLOOR),
                    PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 512U), TensorShape(512U), TensorShape(14U, 14U, 512U), PadStrideInfo(1, 1, 1,
-                   1,
-                   DimensionRoundingType::FLOOR),
-                   PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 512U), TensorShape(512U), TensorShape(14U, 14U, 512U), PadStrideInfo(1, 1, 1,
-                   1,
-                   DimensionRoundingType::FLOOR),
-                   PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 512U), TensorShape(512U), TensorShape(14U, 14U, 512U), PadStrideInfo(1, 1, 1,
-                   1,
-                   DimensionRoundingType::FLOOR),
-                   PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 512U), TensorShape(512U), TensorShape(14U, 14U, 512U), PadStrideInfo(1, 1, 1,
-                   1,
-                   DimensionRoundingType::FLOOR),
-                   PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::FLOOR));
     }
 };
 } // namespace datasets
diff --git a/tests/validation/CL/DepthwiseConvolution.cpp b/tests/validation/CL/DepthwiseConvolution.cpp
index 1646ab6..5f1bde8 100644
--- a/tests/validation/CL/DepthwiseConvolution.cpp
+++ b/tests/validation/CL/DepthwiseConvolution.cpp
@@ -51,16 +51,30 @@
 template <typename T>
 using CLDepthwiseConvolutionFixture = DepthwiseConvolutionValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolution, T>;
 
-// FIXME: COMPMID-523 fix the bug in depthwise convolution
+TEST_SUITE(Generic)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionFixture<float>, framework::DatasetMode::PRECOMMIT, datasets::SmallDepthwiseConvolutionDataset())
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionFixture<float>, framework::DatasetMode::NIGHTLY, datasets::LargeDepthwiseConvolutionDataset())
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
+TEST_SUITE_END()
+
+template <typename T>
+using CLDepthwiseConvolutionFixture3x3 = DepthwiseConvolutionValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolution3x3, T>;
+
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::PRECOMMIT, datasets::SmallDepthwiseConvolutionDataset3x3())
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::NIGHTLY, datasets::LargeDepthwiseConvolutionDataset3x3())
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
 
 TEST_SUITE_END()
 TEST_SUITE_END()
diff --git a/tests/validation/CL/DepthwiseSeparableConvolutionLayer.cpp b/tests/validation/CL/DepthwiseSeparableConvolutionLayer.cpp
index f0e30cd..4d9f6b8 100644
--- a/tests/validation/CL/DepthwiseSeparableConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseSeparableConvolutionLayer.cpp
@@ -53,9 +53,8 @@
 
 template <typename T>
 using CLDepthwiseSeparableConvolutionLayerFixture = DepthwiseSeparableConvolutionValidationFixture<CLTensor, CLAccessor, CLDepthwiseSeparableConvolutionLayer, T>;
-//
-// FIXME: COMPMID-523 fix the bug in depthwise convolution
-DISABLED_FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseSeparableConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, datasets::MobileNetDepthwiseSeparableConvolutionLayerDataset())
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseSeparableConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, datasets::MobileNetDepthwiseSeparableConvolutionLayerDataset())
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num);
diff --git a/tests/validation/fixtures/DepthwiseConvolutionFixture.h b/tests/validation/fixtures/DepthwiseConvolutionFixture.h
index 462c0f8..4a890f6 100644
--- a/tests/validation/fixtures/DepthwiseConvolutionFixture.h
+++ b/tests/validation/fixtures/DepthwiseConvolutionFixture.h
@@ -78,7 +78,7 @@
         TensorType dst     = create_tensor<TensorType>(output_shape, DataType::F32);
 
         // Create Depthwise Convolution configure function
-        CLDepthwiseConvolution depthwise_convolution;
+        FunctionType depthwise_convolution;
         depthwise_convolution.configure(&src, &dst, &weights, pad_stride_info);
 
         // Allocate tensors