Use the stable CKW API in the GPU dynamic fusion backend

- Refactor all kernels to work with the CKW stable API
- Add support for sub-tile in the op_load/op_store CKW operator
- Fix mismatch in resize
- Add comments in all kernels written with CKW to help developers
understand the structure of the code
- Add texture image support in depthwise convolution written with CKW
- Add support for different block sizes in depthwise convolution
- Remove the use of the dynamic fusion helper functions.
- Add support for floor in the op_unary() of CKW

Resolves: COMPMID-6708, COMPMID-6743, COMPMID-6530

Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Signed-off-by: Jakub Sujak <jakub.sujak@arm.com>

Change-Id: I8104ce4d04a3138a1aeb0b84940e1f1c89e76069
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10914
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp
index b7d146b..f392cd8 100644
--- a/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp
+++ b/compute_kernel_writer/src/cl/helpers/CLMemoryOpImage2dHelper.cpp
@@ -33,18 +33,15 @@
 #include "src/cl/CLTile.h"
 #include "src/ITensor.h"
 #include "src/Tensor3dMapper.h"
+#include "src/TileView.h"
 
 namespace ckw
 {
-void CLMemoryOpImage2dHelper::initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b)
+void CLMemoryOpImage2dHelper::initialize(const CLTile *x, const CLTile *z, const CLTile *b)
 {
-    CKW_ASSERT(validate(_writer, _tensor, _sampler, _mapper.get(), _op, dst));
-
-    _dst           = dst;
-    _ls_width_full = dst->info().width();
-    _coord_x       = x->scalar(0, 0).str;
-    _coord_z       = z->scalar(0, 0).str;
-    _coord_b       = b->scalar(0, 0).str;
+    _coord_x = x->scalar(0, 0).str;
+    _coord_z = z->scalar(0, 0).str;
+    _coord_b = b->scalar(0, 0).str;
 }
 
 void CLMemoryOpImage2dHelper::write_row(int32_t row_id, const std::string &coord_y)
@@ -52,7 +49,7 @@
     // The only check required is on Y.
     out_of_bound_initialize_y(coord_y);
 
-    const std::string dst     = _dst->vector(row_id).str;
+    const std::string dst     = _dst.vector(row_id).str;
     const std::string sampler = to_ls_image2d_sampler();
     const std::string coord   = to_ls_image2d_address(_coord_x, coord_y, _coord_z, _coord_b);
     const std::string ls_buf  = to_ls_image2d(_op, _ls_width_full, dst, sampler, coord);
@@ -66,16 +63,16 @@
 {
 }
 
-bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter *writer,
-                                       const ITensor        *tensor,
-                                       const TensorSampler  *sampler,
-                                       const Tensor3dMapper *mapper,
-                                       MemoryOperation       op,
-                                       const CLTile         *dst)
+bool CLMemoryOpImage2dHelper::validate(const CLKernelWriter   *writer,
+                                       const ITensor          *tensor,
+                                       const TensorSampler    *sampler,
+                                       const Tensor3dMapper   *mapper,
+                                       MemoryOperation         op,
+                                       const TileView<CLTile> &dst)
 {
     CKW_UNUSED(writer, tensor, mapper);
 
-    if (dst->info().width() != 4)
+    if (dst.width() != 4)
     {
         return false;
     }
@@ -95,7 +92,7 @@
     {
         return false;
     }
-    if ((dst->info().data_type() != DataType::Fp32) && (dst->info().data_type() != DataType::Fp16))
+    if ((dst.data_type() != DataType::Fp32) && (dst.data_type() != DataType::Fp16))
     {
         return false;
     }
@@ -143,10 +140,12 @@
                                                    const std::string &address) const
 {
     CKW_UNUSED(vector_width);
+    CKW_ASSERT_MSG(_dst.data_type() == DataType::Fp32 || _dst.data_type() == DataType::Fp16,
+                   "Image2d only supports floating-point data type");
 
     const TensorStorageType tensor_storage = _sampler->storage();
     const std::string       image2d_obj    = _tensor->storage(tensor_storage).val;
-    const std::string       post_fix       = _dst->info().data_type() == DataType::Fp32 ? "f" : "h";
+    const std::string       post_fix       = _dst.data_type() == DataType::Fp32 ? "f" : "h";
 
     switch (op)
     {