Port DepthConvert to new Api

- Renames DepthConvert to Cast
- Ports both NEDepthConverLayer and CLDepthConvert variants
- Removes legacy shift capability from DepthConvert, allowing only
shifts of 0

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I806a0f8eb23d23502b632c529fda7edde19c8176
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5565
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/CL/functions/CLCast.h b/arm_compute/runtime/CL/functions/CLCast.h
index 6e4cf62..d2cea7a 100644
--- a/arm_compute/runtime/CL/functions/CLCast.h
+++ b/arm_compute/runtime/CL/functions/CLCast.h
@@ -24,10 +24,11 @@
 #ifndef ARM_COMPUTE_CLCAST_H
 #define ARM_COMPUTE_CLCAST_H
 
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/runtime/IFunction.h"
 
-#include <cstdint>
+#include "arm_compute/core/Types.h"
+
+#include <memory>
 
 namespace arm_compute
 {
@@ -35,10 +36,22 @@
 class ICLTensor;
 class ITensorInfo;
 
-/** Basic function to run @ref CLDepthConvertLayerKernel. */
-class CLCast : public ICLSimpleFunction
+/** Basic function to run @ref opencl::kernels::ClCastKernel */
+class CLCast : public IFunction
 {
 public:
+    /** Constructor */
+    CLCast();
+    /** Destructor */
+    ~CLCast();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLCast(const CLCast &) = delete;
+    /** Default move constructor */
+    CLCast(CLCast &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLCast &operator=(const CLCast &) = delete;
+    /** Default move assignment operator */
+    CLCast &operator=(CLCast &&);
     /** Initialize the function's source, destination
      *
      * Valid data layouts:
@@ -91,6 +104,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_CLCAST_H*/
diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
index 34dfdd7..58deb7e 100644
--- a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
@@ -24,10 +24,11 @@
 #ifndef ARM_COMPUTE_CLDEPTHCONVERT_H
 #define ARM_COMPUTE_CLDEPTHCONVERT_H
 
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/runtime/IFunction.h"
 
-#include <cstdint>
+#include "arm_compute/core/Types.h"
+
+#include <memory>
 
 namespace arm_compute
 {
@@ -35,10 +36,22 @@
 class ICLTensor;
 class ITensorInfo;
 
-/** Basic function to run @ref CLDepthConvertLayerKernel. */
-class CLDepthConvertLayer : public ICLSimpleFunction
+/** Basic function to run @ref opencl::kernels::ClCastKernel */
+class CLDepthConvertLayer : public IFunction
 {
 public:
+    /** Constructor */
+    CLDepthConvertLayer();
+    /** Destructor */
+    ~CLDepthConvertLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDepthConvertLayer(const CLDepthConvertLayer &) = delete;
+    /** Default move constructor */
+    CLDepthConvertLayer(CLDepthConvertLayer &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDepthConvertLayer &operator=(const CLDepthConvertLayer &) = delete;
+    /** Default move assignment operator */
+    CLDepthConvertLayer &operator=(CLDepthConvertLayer &&);
     /** Initialize the function's source, destination
      *
      * Valid data layouts:
@@ -94,6 +107,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_CLDEPTHCONVERT_H*/
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index e5de45c..3d2dbdb 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -34,7 +34,6 @@
 class IMemoryManager;
 class ICLTensor;
 class ITensorInfo;
-class CLDepthConvertLayerKernel;
 class CLGEMMLowpMatrixMultiplyNativeKernel;
 class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel;
 class CLGEMMLowpOffsetContributionKernel;
@@ -49,6 +48,14 @@
 } // namespace kernels
 } // namespace opencl
 
+namespace opencl
+{
+namespace kernels
+{
+class ClCastKernel;
+} // namespace kernels
+} // namespace opencl
+
 /** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. */
 class CLGEMMLowpMatrixMultiplyCore : public IFunction
 {
@@ -143,7 +150,7 @@
     MemoryGroup _memory_group;
 
     // Kernels used
-    std::unique_ptr<CLDepthConvertLayerKernel>                     _weights_to_qasymm8;
+    std::unique_ptr<opencl::kernels::ClCastKernel>                 _weights_to_qasymm8;
     std::unique_ptr<CLGEMMLowpMatrixMultiplyNativeKernel>          _mm_native_kernel;
     std::unique_ptr<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel> _mm_reshaped_only_rhs_kernel;
     std::unique_ptr<opencl::kernels::ClGemmReshapeRhsMatrixKernel> _mtx_b_reshape_kernel;
diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h
index eb7de1f..30499f5 100644
--- a/arm_compute/runtime/NEON/functions/NECast.h
+++ b/arm_compute/runtime/NEON/functions/NECast.h
@@ -24,20 +24,35 @@
 #ifndef ARM_COMPUTE_NECAST_H
 #define ARM_COMPUTE_NECAST_H
 
+#include "arm_compute/runtime/IFunction.h"
+
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
 class ITensorInfo;
 
-/** Basic function to run @ref NEDepthConvertLayerKernel.
+/** Basic function to run @ref cpu::kernels::CpuCastKernel.
  * This function ignores the scale and zeroPoint of quanized tensors,so QASYMM8 input is treated as uint8 values.
  */
-class NECast : public INESimpleFunctionNoBorder
+class NECast : public IFunction
 {
 public:
+    /** Constructor */
+    NECast();
+    /** Destructor */
+    ~NECast();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NECast(const NECast &) = delete;
+    /** Default move constructor */
+    NECast(NECast &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NECast &operator=(const NECast &) = delete;
+    /** Default move assignment operator */
+    NECast &operator=(NECast &&);
     /** Initialize the function's source, destination
      *
      * Valid data layouts:
@@ -71,6 +86,13 @@
      * @return a status
      */
     static Status validate(ITensorInfo *input, ITensorInfo *output, ConvertPolicy policy);
+
+    // Inherited methods overridden
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NECAST_H*/
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
index 17cf539..eb0724a 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
@@ -24,28 +24,33 @@
 #ifndef ARM_COMPUTE_NEDEPTHCONVERT_H
 #define ARM_COMPUTE_NEDEPTHCONVERT_H
 
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
 
-#include <cstdint>
+#include "arm_compute/core/Types.h"
+
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
 class ITensorInfo;
 
-/**Basic function to run @ref NEDepthConvertLayerKernel */
-class NEDepthConvertLayer : public INESimpleFunctionNoBorder
+/**Basic function to run @ref cpu::kernels::CpuCastKernel */
+class NEDepthConvertLayer : public IFunction
 {
 public:
-    /* Contructor */
-    NEDepthConvertLayer() = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
+    /** Constructor */
+    NEDepthConvertLayer();
+    /** Destructor */
+    ~NEDepthConvertLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEDepthConvertLayer(const NEDepthConvertLayer &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    const NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete;
-    /** Default destructor */
-    ~NEDepthConvertLayer() = default;
+    /** Default move constructor */
+    NEDepthConvertLayer(NEDepthConvertLayer &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete;
+    /** Default move assignment operator */
+    NEDepthConvertLayer &operator=(NEDepthConvertLayer &&);
     /** Initialize the function's source, destination
      *
      * Valid data layouts:
@@ -80,6 +85,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
+
+    // Inherited methods overridden
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEDEPTHCONVERT_H*/