COMPMID-2600: Implement a new and generic depthwise convolution for CL QASYMM8 NHWC

The NCHW case is supported at function level by permuting the
inputs/outputs to NHWC.

This patch also removes CLDirectConvolutionLayerOutputStageKernel which
is deprecated and some kernels which were only used in the generic case
of depthwise convolution.

Change-Id: I91e0f02d0a2f4a4a352e08c248e648944137fe68
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2056
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h
index d061727..2990b13 100644
--- a/tests/datasets/DepthwiseConvolutionLayerDataset.h
+++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h
@@ -120,7 +120,7 @@
 public:
     SmallDepthwiseConvolutionLayerDataset()
     {
-        add_config(TensorShape(7U, 7U, 1U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(7U, 7U, 1U), Size2D(1U, 1U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(23U, 27U, 5U), Size2D(3U, 5U), PadStrideInfo(2, 1, 0, 0));
         add_config(TensorShape(33U, 27U, 7U), Size2D(7U, 3U), PadStrideInfo(3, 2, 1, 0));
         // Asymmetric padding
@@ -135,16 +135,16 @@
 public:
     LargeDepthwiseConvolutionLayerDataset()
     {
-        add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 1));
+        add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 4U), PadStrideInfo(1, 2, 0, 1));
         add_config(TensorShape(17U, 31U, 2U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1));
         add_config(TensorShape(23U, 27U, 5U), Size2D(11U, 3U), PadStrideInfo(1, 2, 0, 0));
         add_config(TensorShape(17U, 31U, 2U, 3U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1));
-        add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U), Size2D(3U, 3U), PadStrideInfo(3, 2, 1, 0));
-        add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1));
-        add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 1));
-        add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1));
+        add_config(TensorShape(233U, 277U, 55U), Size2D(1U, 1U), PadStrideInfo(2, 1, 0, 0));
+        add_config(TensorShape(333U, 277U, 77U), Size2D(1U, 1U), PadStrideInfo(3, 2, 1, 0));
+        add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 4U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 4U), PadStrideInfo(1, 2, 0, 0));
+        add_config(TensorShape(333U, 277U, 77U), Size2D(3U, 4U), PadStrideInfo(2, 3, 0, 1));
+        add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 4U), PadStrideInfo(2, 1, 1, 1));
         // Asymmetric padding
         add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR));
         add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR));