Add GpuKernelArgumentBinding for runtime argument setting

* Add flexible runtime argument setting that accept argument bindings
exported from ckw.

* Introduce internal build flag ACL_INTERNAL_TEST_CKW_IN_DF. If set to
true, ckw will be tested in dynamic fusion validation tests. Otherwise
it will not be tested and the dynamic fusion will keep using
ClTemplateWriter instead.

* Fix CKW sampler for elementwise binary to deal with tile sizes > 1
in both dimensions

Resolves: COMPMID-6282
Partially resolves: COMPMID-6260

Signed-off-by: SiCong Li <sicong.li@arm.com>
Change-Id: I0ab225a4484eb2119643d900a4e72806558626ee
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9917
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-by: Anitha Raj <Anitha.Raj@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/filelist.json b/filelist.json
index 18fcaa8..b7845a7 100644
--- a/filelist.json
+++ b/filelist.json
@@ -2286,74 +2286,80 @@
     }
   },
   "experimental": {
-    "dynamic_fusion": [
-      "src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp",
-      "src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp",
-      "src/dynamic_fusion/sketch/attributes/CastAttributes.cpp",
-      "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
-      "src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp",
-      "src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp",
-      "src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp",
-      "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp",
-      "src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp",
-      "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp",
-      "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp",
-      "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp",
-      "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp",
-      "src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp",
-      "src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp",
-      "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp",
-      "src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp",
-      "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp",
-      "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp",
-      "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp",
-      "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp",
-      "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp",
-      "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp",
-      "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp",
-      "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp",
-      "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp",
-      "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp",
-      "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp",
-      "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp",
-      "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp",
-      "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp",
-      "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp",
-      "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp",
-      "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp",
-      "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp",
-      "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp",
-      "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp",
-      "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp",
-      "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp",
-      "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp",
-      "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp",
-      "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp",
-      "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp",
-      "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp",
-      "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp",
-      "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp",
-      "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp",
-      "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
-      "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
-      "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp",
-      "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp",
-      "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp",
-      "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp"
-    ]
+    "dynamic_fusion": {
+      "common": [
+        "src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp",
+        "src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp",
+        "src/dynamic_fusion/sketch/attributes/CastAttributes.cpp",
+        "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
+        "src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp",
+        "src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp",
+        "src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp",
+        "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp",
+        "src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp",
+        "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp",
+        "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp",
+        "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp",
+        "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp",
+        "src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp",
+        "src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp",
+        "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp",
+        "src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp",
+        "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp",
+        "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp",
+        "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp",
+        "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp",
+        "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp",
+        "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp",
+        "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp",
+        "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp",
+        "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp",
+        "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp",
+        "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp",
+        "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp",
+        "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp"
+      ],
+      "template_writer": [
+        "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp",
+        "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp",
+        "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp",
+        "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp",
+        "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp",
+        "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp",
+        "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp",
+        "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp",
+        "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp",
+        "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp",
+        "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp",
+        "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp",
+        "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp"
+      ],
+      "ckw_driver": [
+        "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp",
+        "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp",
+        "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp",
+        "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp",
+        "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
+        "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
+        "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp",
+        "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp",
+        "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp",
+        "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp"
+      ]
+    }
   }
 }