Fix direct conv2d in dynamic fusion

* Put input and output tensor shape value directly to the CL code.
* Use texture for weights when it is possible.

Resolves: COMPMID-5938
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: Ib53b310a80ce857eac36564b352136fdde55b131
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9249
Reviewed-by: SiCong Li <sicong.li@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
index 690371f..e00f095 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
@@ -45,49 +45,6 @@
 {
 namespace
 {
-bool export_to_cl_image_support(const ITensorInfo *tensor, GPUTarget gpu_target, const cl::Device &device, DataLayout data_layout)
-{
-    if(tensor->tensor_shape()[0] % 4 || (data_layout != DataLayout::NHWC))
-    {
-        return false;
-    }
-
-    // If not floating point
-    if(!is_data_type_float(tensor->data_type()))
-    {
-        return false;
-    }
-
-    if(gpu_target == GPUTarget::G71 || get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
-    {
-        return false;
-    }
-
-    // Check if the cl_khr_image2d_from_buffer extension is supported on the target platform
-    if(!image2d_from_buffer_supported(device))
-    {
-        return false;
-    }
-
-    // Check cl image pitch alignment
-    if(get_cl_image_pitch_alignment(device) == 0)
-    {
-        return false;
-    }
-
-    const size_t image_w     = tensor->tensor_shape()[0] / 4;
-    const size_t image_h     = tensor->tensor_shape()[1] * tensor->tensor_shape()[2] * tensor->tensor_shape()[3];
-    const size_t max_image_w = device.getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
-    const size_t max_image_h = device.getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
-
-    if(image_w > max_image_w || image_h > max_image_h)
-    {
-        return false;
-    }
-
-    return true;
-}
-
 DirectConvComputeKernelInfo config_direct_convolution_nhwc(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info)
 {
     // Get GPU target
@@ -126,7 +83,6 @@
     TensorInfo         dst_info_to_validate;
     const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
 
-    const DataLayout data_layout = src->data_layout();
     if(dst != nullptr)
     {
         dst_info_to_validate_ptr = dst;
@@ -151,9 +107,6 @@
             const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
             auto       settings   = ClComponentDirectConv2d::Settings();
 
-            settings.export_to_cl_image(
-                export_to_cl_image_support(src, gpu_target, cl_compile_ctx->get_device(), data_layout));
-
             settings.fast_relaxed_math(
                 (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
                 && (dst_info_to_validate_ptr->data_type() == DataType::F32 || dst_info_to_validate_ptr->data_type() == DataType::F16));
@@ -251,7 +204,6 @@
 
     const auto sketch_ctx = sketch.implementation().context();
 
-    const auto data_layout = src->data_layout();
     const auto gpu_target  = sketch_ctx->gpu_target();
 
     if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
@@ -266,20 +218,17 @@
 
             auto settings = ClComponentDirectConv2d::Settings();
 
-            settings.export_to_cl_image(
-                export_to_cl_image_support(src, gpu_target, cl_compile_ctx->get_device(), data_layout));
-
             settings.fast_relaxed_math(
                 (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
                 && (dst->data_type() == DataType::F32 || dst->data_type() == DataType::F16));
 
+            settings.direct_conv_descriptor(desc);
+
             if(settings.export_to_cl_image())
             {
                 arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(wei);
             }
 
-            settings.direct_conv_descriptor(desc);
-
             ArgumentPack<ITensorInfo> arguments;
             arguments.add_const_tensor(ACL_SRC_0, src);
             arguments.add_const_tensor(ACL_SRC_1, wei);