Use the stable CKW API in the GPU dynamic fusion backend

- Refactor all kernels to work with the CKW stable API
- Add support for sub-tile in the op_load/op_store CKW operator
- Fix mismatch in resize
- Add comments in all kernels written with CKW to help developers
understand the structure of the code
- Add texture image support in depthwise convolution written with CKW
- Add support for different block sizes in depthwise convolution
- Remove the use of the dynamic fusion helper functions.
- Add support for floor in the op_unary() of CKW

Resolves: COMPMID-6708, COMPMID-6743, COMPMID-6530

Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Signed-off-by: Jakub Sujak <jakub.sujak@arm.com>

Change-Id: I8104ce4d04a3138a1aeb0b84940e1f1c89e76069
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10914
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/compute_kernel_writer/include/ckw/KernelWriter.h b/compute_kernel_writer/include/ckw/KernelWriter.h
index 0d739e8..da41b94 100644
--- a/compute_kernel_writer/include/ckw/KernelWriter.h
+++ b/compute_kernel_writer/include/ckw/KernelWriter.h
@@ -25,11 +25,22 @@
 #ifndef CKW_INCLUDE_CKW_KERNELWRITER_H
 #define CKW_INCLUDE_CKW_KERNELWRITER_H
 
+#include "ckw/Kernel.h"
+#include "ckw/TensorInfo.h"
 #include "ckw/TensorOperand.h"
+#include "ckw/TensorSampler.h"
+#include "ckw/TileInfo.h"
 #include "ckw/TileOperand.h"
 #include "ckw/types/ConstantData.h"
 #include "ckw/types/ConvertPolicy.h"
+#include "ckw/types/DataType.h"
 #include "ckw/types/Operators.h"
+#include "ckw/types/TargetArchitecture.h"
+#include "ckw/types/TargetLanguage.h"
+#include "ckw/types/TensorComponentType.h"
+#include "ckw/types/TensorDataLayout.h"
+#include "ckw/types/TensorSamplerTypes.h"
+#include "ckw/types/TensorStorageType.h"
 
 #include <functional>
 #include <memory>
@@ -39,16 +50,8 @@
 namespace ckw
 {
 
-/** Forward Declerations */
-class Kernel;
-class TensorInfo;
-class TensorSampler;
+/** Forward Declarations */
 class TileArea;
-class TileInfo;
-
-enum class DataType;
-enum class TargetArchitecture;
-enum class TargetLanguage;
 
 /** A kernel writer.
  *
@@ -350,7 +353,6 @@
                                   const TileOperand   &z,
                                   const TileOperand   &batch_op) = 0;
 
-protected:
     // =============================================================================================
     // ID space management
     // =============================================================================================
@@ -367,6 +369,7 @@
     /** Get the current ID space. */
     int32_t id_space() const;
 
+protected:
     /** Set the current ID space.
      *
      * @param[in] value The ID space to be used.
diff --git a/compute_kernel_writer/include/ckw/TensorOperand.h b/compute_kernel_writer/include/ckw/TensorOperand.h
index 2672cd5..a3e53d1 100644
--- a/compute_kernel_writer/include/ckw/TensorOperand.h
+++ b/compute_kernel_writer/include/ckw/TensorOperand.h
@@ -43,6 +43,15 @@
     // Only kernel writer class interacts with tensor operand hence we allow it to access this field.
     friend class KernelWriter;
 
+    /** Create an empty tensor operand.
+     *
+     * The new tensor operand doesn't refer to any tensor therefore it is not useable.
+     */
+    TensorOperand();
+
+    /** Check if the tensor operand contains a tensor and therefore useable. */
+    bool is_valid() const;
+
     /** Get the tensor info. */
     const TensorInfo &info() const;
 
@@ -92,7 +101,7 @@
     /** Initialize a new instance of @ref TensorOperand class for a tensor. */
     TensorOperand(ITensor &tensor);
 
-    ITensor &_tensor;
+    ITensor *_tensor;
 };
 
 } // namespace ckw
diff --git a/compute_kernel_writer/include/ckw/TileOperand.h b/compute_kernel_writer/include/ckw/TileOperand.h
index 56dc5e7..556d589 100644
--- a/compute_kernel_writer/include/ckw/TileOperand.h
+++ b/compute_kernel_writer/include/ckw/TileOperand.h
@@ -33,6 +33,7 @@
 class KernelWriter;
 class TensorOperand;
 class ITile;
+class TileInfo;
 
 /** A tile operand refers to a tile object that can be used for kernel writing. */
 class TileOperand
@@ -43,6 +44,18 @@
     friend class KernelWriter;
     friend class TensorOperand;
 
+    /** Create an empty tile operand.
+     *
+     * The new tile operand doesn't refer to any tile therefore it is not useable.
+     */
+    TileOperand();
+
+    /** Check if the tile operand contains a tile and therefore useable. */
+    bool is_valid() const;
+
+    /** Get the tile info. */
+    const TileInfo &tile_info() const;
+
     /** Get a row vector of the current tile operand.
      *
      * @param[in] row The index of the row to be accessed in the current tile operand.
diff --git a/compute_kernel_writer/include/ckw/types/ConstantData.h b/compute_kernel_writer/include/ckw/types/ConstantData.h
index 7708818..ea95049 100644
--- a/compute_kernel_writer/include/ckw/types/ConstantData.h
+++ b/compute_kernel_writer/include/ckw/types/ConstantData.h
@@ -53,6 +53,10 @@
     template <typename T>
     ConstantData(std::initializer_list<std::initializer_list<T>> values, DataType data_type);
 
+    /** Templated constructor */
+    template <typename T>
+    ConstantData(const std::vector<std::vector<T>> &values, DataType data_type);
+
 private:
     /** Validate the given data type and the template type
      *
diff --git a/compute_kernel_writer/include/ckw/types/Operators.h b/compute_kernel_writer/include/ckw/types/Operators.h
index 1e5f9bd..77b0519 100644
--- a/compute_kernel_writer/include/ckw/types/Operators.h
+++ b/compute_kernel_writer/include/ckw/types/Operators.h
@@ -43,6 +43,7 @@
     Fabs  = 0x0014,
     Log   = 0x0015,
     Round = 0x0016,
+    Floor = 0x0017,
 };
 
 /** Assignment operators. */