Use the stable CKW API in the GPU dynamic fusion backend

- Refactor all kernels to work with the CKW stable API
- Add support for sub-tile in the op_load/op_store CKW operator
- Fix mismatch in resize
- Add comments in all kernels written with CKW to help developers
understand the structure of the code
- Add texture image support in depthwise convolution written with CKW
- Add support for different block sizes in depthwise convolution
- Remove the use of the dynamic fusion helper functions.
- Add support for floor in the op_unary() of CKW

Resolves: COMPMID-6708, COMPMID-6743, COMPMID-6530

Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Signed-off-by: Jakub Sujak <jakub.sujak@arm.com>

Change-Id: I8104ce4d04a3138a1aeb0b84940e1f1c89e76069
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10914
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h
index 5b6c1b9..0030528 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUADD
-#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUADD
+#ifndef ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUADD_H
+#define ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUADD_H
 
 #include "arm_compute/core/Error.h"
 
@@ -52,9 +52,6 @@
      * |:--------------|:--------------|:-------------|
      * |F16            |F16            |F16           |
      * |F32            |F32            |F32           |
-     * |S32            |S32            |S32           |
-     * |S16            |S16            |S16           |
-     * |U8             |U8             |U8            |
      *
      * Valid data layouts:
      * - Any
@@ -86,4 +83,4 @@
 } // namespace dynamic_fusion
 } // namespace experimental
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUADD */
+#endif // ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUADD_H
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h
index 1593cec..9735dcf 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCAST
-#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCAST
+#ifndef ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCAST_H
+#define ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCAST_H
 
 #include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h"
 
@@ -49,13 +49,8 @@
      * Valid data type configurations:
      * |src            |dst                                    |
      * |:--------------|:--------------------------------------|
-     * |U8             | S8, U16, S16, U32, S32, F16, F32      |
-     * |U16            | U8, S8, S16, U32, S32, F16, F32       |
-     * |S16            | U8, S8, U16, U32, S32, F16, F32       |
-     * |U32            | U8, S8, U16, S16, S32, F16, F32       |
-     * |S32            | U8, S8, U16, S16, U32, F16, F32       |
-     * |F16            | U8, S8, U16, S16, U32, S32, F32       |
-     * |F32            | U8, S8, U16, S16, U32, S32, F16       |
+     * |F16            | F32                                   |
+     * |F32            | F16                                   |
      *
      * Input data type must be different than output data type.
      *
@@ -90,4 +85,4 @@
 } // namespace dynamic_fusion
 } // namespace experimental
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCAST */
+#endif // ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCAST_H
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h
index 4d2db0e..a1b0c33 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,12 +42,6 @@
 class GpuPool2dSettings
 {
 public:
-    /* Get mixed_precision*/
-    bool mixed_precision() const;
-
-    /* Set mixed_precision */
-    GpuPool2dSettings &mixed_precision(bool mixed_precision);
-
     /* Get using -infinity as limit flag */
     bool use_inf_as_limit() const;
 
@@ -55,7 +49,6 @@
     GpuPool2dSettings use_inf_as_limit(bool use_inf_as_limit);
 
 private:
-    bool _mixed_precision{false};
     bool _use_inf_as_limit{true};
 };
 
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h
index e2ece80..e02f9da 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,8 +22,8 @@
  * SOFTWARE.
  */
 
-#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESIZE
-#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESIZE
+#ifndef ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESIZE_H
+#define ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESIZE_H
 
 #include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h"
 
@@ -51,12 +51,8 @@
      * Valid data type configurations:
      * |src            |dst            |
      * |:--------------|:--------------|
-     * |QASYMM8        |QASYMM8        |
-     * |QASYMM8_SIGNED |QASYMM8_SIGNED |
      * |F16            |F16            |
      * |F32            |F32            |
-     * |U8             |U8             |
-     * |S16            |S16            |
      *
      * Valid data layouts:
      * - NHWC
@@ -90,4 +86,4 @@
 } // namespace dynamic_fusion
 } // namespace experimental
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESIZE */
+#endif // ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESIZE_H
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h
index 2d9255f..8c44ec5 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUSUB
-#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUSUB
+#ifndef ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUSUB_H
+#define ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUSUB_H
 
 #include "arm_compute/core/Error.h"
 
@@ -52,9 +52,6 @@
      * |:--------------|:--------------|:-------------|
      * |F16            |F16            |F16           |
      * |F32            |F32            |F32           |
-     * |S32            |S32            |S32           |
-     * |S16            |S16            |S16           |
-     * |U8             |U8             |U8            |
      *
      * Valid data layouts:
      * - Any
@@ -88,4 +85,4 @@
 } // namespace dynamic_fusion
 } // namespace experimental
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUSUB */
+#endif // ACL_ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUSUB_H