Fix direct conv2d in dynamic fusion

* Put input and output tensor shape value directly to the CL code.
* Use texture for weights when it is possible.

Resolves: COMPMID-5938
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: Ib53b310a80ce857eac36564b352136fdde55b131
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9249
Reviewed-by: SiCong Li <sicong.li@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
index e69103e..ca531fe 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
@@ -91,12 +91,12 @@
 {
 #define _IWEI_WIDTH {{WEI_WIDTH}}
 #define _IWEI_HEIGHT {{WEI_HEIGHT}}
-#define _ISRC_WIDTH {{src}}_w
-#define _ISRC_HEIGHT {{src}}_h
-#define _ISRC_CHANNELS {{src}}_c
-#define _IDST_WIDTH {{arg_dst}}_w
-#define _IDST_HEIGHT {{arg_dst}}_h
-#define _IDST_CHANNELS {{arg_dst}}_c
+#define _ISRC_WIDTH {{SRC_WIDTH}}
+#define _ISRC_HEIGHT {{SRC_HEIGHT}}
+#define _ISRC_CHANNELS {{SRC_CHANNELS}}
+#define _IDST_WIDTH {{DST_WIDTH}}
+#define _IDST_HEIGHT {{DST_HEIGHT}}
+#define _IDST_CHANNELS {{DST_CHANNELS}}
 #define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT)
 
     TILE(int, M0, 1, xi);
@@ -214,8 +214,8 @@
 code += R"_(
     LOOP_UNROLLING(int, i, 0, 1, M0,
     {
-        g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{arg_dst}}_w * {{arg_dst}}_h) - 1);
-        g_dst_indirect_y[i].v += g_ind_2 * (int)({{arg_dst}}_w * {{arg_dst}}_h);
+        g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{DST_WIDTH}} * {{DST_HEIGHT}}) - 1);
+        g_dst_indirect_y[i].v += g_ind_2 * (int)({{DST_WIDTH}} * {{DST_HEIGHT}});
     })
 }
 //------------------ END KERNEL {{meta_kernel_id}} ---------------------
@@ -294,9 +294,19 @@
     }
     const auto width_idx  = 1;
     const auto height_idx = 2;
+    const auto channel_idx = 0;
+
+    lut["SRC_WIDTH"] = _src->dimension(width_idx);
+    lut["SRC_HEIGHT"] = _src->dimension(height_idx);
+    lut["SRC_CHANNELS"] = _src->dimension(channel_idx);
+
     lut["WEI_WIDTH"]      = _weight->dimension(width_idx);
     lut["WEI_HEIGHT"]     = _weight->dimension(height_idx);
 
+    lut["DST_WIDTH"] = _dst->dimension(width_idx);
+    lut["DST_HEIGHT"] = _dst->dimension(height_idx);
+    lut["DST_CHANNELS"] = _dst->dimension(channel_idx);
+
     lut["STRIDE_X"] = _attributes.stride().x();
     lut["STRIDE_Y"] = _attributes.stride().y();