COMPMID-2600: Implement a new and generic depthwise convolution for CL QASYMM8 NHWC

The NCHW case is supported at function level by permuting the
inputs/outputs to NHWC.

This patch also removes CLDirectConvolutionLayerOutputStageKernel which
is deprecated and some kernels which were only used in the generic case
of depthwise convolution.

Change-Id: I91e0f02d0a2f4a4a352e08c248e648944137fe68
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2056
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
diff --git a/src/runtime/CL/tuners/BifrostTuner.cpp b/src/runtime/CL/tuners/BifrostTuner.cpp
index 187f52f..5b23baa 100644
--- a/src/runtime/CL/tuners/BifrostTuner.cpp
+++ b/src/runtime/CL/tuners/BifrostTuner.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -171,24 +171,6 @@
     k.set_lws_hint(lws_hint);
 }
 
-void tune_depthwise_im2col_kernel(CLDepthwiseIm2ColKernel &k)
-{
-    cl::NDRange     lws_hint   = k.lws_hint();
-    const GPUTarget gpu_target = k.get_target();
-
-    // Configure the local work size for Bifrost with a value obtained
-    // via exhaustive autotuning for the MobileNets tensor shapes.
-    if(gpu_target_is_in(gpu_target,
-                        GPUTarget::G71, GPUTarget::G72, GPUTarget::G76,
-                        GPUTarget::G51, GPUTarget::G51BIG, GPUTarget::G51LIT,
-                        GPUTarget::G52, GPUTarget::G52LIT))
-    {
-        lws_hint = cl::NDRange(1, 2, 1);
-    }
-
-    k.set_lws_hint(lws_hint);
-}
-
 void tune_gemv_kernel(CLGEMMMatrixVectorMultiplyKernel &k)
 {
     cl::NDRange     lws_hint   = k.lws_hint();
@@ -311,10 +293,6 @@
     {
         tune_im2col_kernel(*utils::cast::polymorphic_downcast<CLIm2ColKernel *>(&kernel));
     }
-    else if(dynamic_cast<CLDepthwiseIm2ColKernel *>(&kernel) != nullptr)
-    {
-        tune_depthwise_im2col_kernel(*utils::cast::polymorphic_downcast<CLDepthwiseIm2ColKernel *>(&kernel));
-    }
     else if(dynamic_cast<CLGEMMMatrixVectorMultiplyKernel *>(&kernel) != nullptr)
     {
         tune_gemv_kernel(*utils::cast::polymorphic_downcast<CLGEMMMatrixVectorMultiplyKernel *>(&kernel));