Port DepthConvert to new Api

- Renames DepthConvert to Cast
- Ports both NEDepthConverLayer and CLDepthConvert variants
- Removes legacy shift capability from DepthConvert, allowing only
shifts of 0

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I806a0f8eb23d23502b632c529fda7edde19c8176
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5565
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index e5de45c..3d2dbdb 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -34,7 +34,6 @@
 class IMemoryManager;
 class ICLTensor;
 class ITensorInfo;
-class CLDepthConvertLayerKernel;
 class CLGEMMLowpMatrixMultiplyNativeKernel;
 class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel;
 class CLGEMMLowpOffsetContributionKernel;
@@ -49,6 +48,14 @@
 } // namespace kernels
 } // namespace opencl
 
+namespace opencl
+{
+namespace kernels
+{
+class ClCastKernel;
+} // namespace kernels
+} // namespace opencl
+
 /** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. */
 class CLGEMMLowpMatrixMultiplyCore : public IFunction
 {
@@ -143,7 +150,7 @@
     MemoryGroup _memory_group;
 
     // Kernels used
-    std::unique_ptr<CLDepthConvertLayerKernel>                     _weights_to_qasymm8;
+    std::unique_ptr<opencl::kernels::ClCastKernel>                 _weights_to_qasymm8;
     std::unique_ptr<CLGEMMLowpMatrixMultiplyNativeKernel>          _mm_native_kernel;
     std::unique_ptr<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel> _mm_reshaped_only_rhs_kernel;
     std::unique_ptr<opencl::kernels::ClGemmReshapeRhsMatrixKernel> _mtx_b_reshape_kernel;