Add WBSM tuning to CLTuner

Add WBSM as possible parameter to be tuned
Add helper functions to check WBSM support and setting the value in the kernel
Update tuning parameter lists to use WBSM
Update CLTuner to use WBSM
The WBSM tuning is exposed as a parameter to be set at compile time by setting the CLTuningInfo
CLTuningInfo contains information about the tuning mode and if wbsm tuning enabled

Resolves: COMPMID-3936

Change-Id: Id53697c9c6d2cef41c049f368002f6197351b3ed
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4914
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/arm_compute/core/CL/CLCompileContext.h b/arm_compute/core/CL/CLCompileContext.h
index 6f6dc18..46a8c9b 100644
--- a/arm_compute/core/CL/CLCompileContext.h
+++ b/arm_compute/core/CL/CLCompileContext.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -296,6 +296,12 @@
      */
     bool int64_base_atomics_supported() const;
 
+    /* Returns true if the workgroup batch size modifier parameter is supported on the cl device
+    *
+    * @return true if the workgroup batch size modifier parameter is supported, false otherwise
+    */
+    bool is_wbsm_supported() const;
+
 private:
     /** Load program and its dependencies.
      *
@@ -327,6 +333,7 @@
     CLDevice    _device;                                              /**< Underlying CL device. */
     mutable std::map<std::string, const Program> _programs_map;       /**< Map with all already loaded program data. */
     mutable std::map<std::string, cl::Program>   _built_programs_map; /**< Map with all already built program data. */
+    bool _is_wbsm_supported;                                          /**< Support of worksize batch size modifier support boolean*/
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLCOMPILECONTEXT_H */
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index cf18e16..0e9aa5d 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,11 @@
 #include <set>
 #include <string>
 
+/* CL Device capabilities */
+#define ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM 0x41E4
+/* Workgroup Batch Size Modifier */
+#define ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM 0x41E6
+
 namespace arm_compute
 {
 class CLCoreRuntimeContext;
@@ -226,5 +231,20 @@
  */
 cl::NDRange create_lws_hint_parallel_implementations(unsigned int input_dimension, unsigned int vector_size);
 
+/* Helper function to check if the workgroup batch size modifier parameter is supported on the cl device
+ *
+ * @param[in] device cl device to check for support
+ *
+ * @return true if the workgroup batch size modifier parameter is supported, false otherwise
+ */
+bool get_wbsm_support_info(const cl::Device &device);
+
+/* Helper function to set the workgroup batch size modifier parameter in the kernel
+ *
+ * @param[in] kernel    cl kernel to set the workgroup batch size modifier parameter
+ * @param[in] wbsm_hint workgroup batch size modifier to use
+ */
+void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint);
+
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLHELPERS_H */
diff --git a/arm_compute/core/CL/CLKernelLibrary.h b/arm_compute/core/CL/CLKernelLibrary.h
index 1933893..0d8e4a6 100644
--- a/arm_compute/core/CL/CLKernelLibrary.h
+++ b/arm_compute/core/CL/CLKernelLibrary.h
@@ -148,6 +148,12 @@
      */
     std::string get_program_name(const std::string &kernel_name) const;
 
+    /* Returns true if the workgroup batch size modifier parameter is supported on the cl device
+    *
+    * @return true if the workgroup batch size modifier parameter is supported, false otherwise
+    */
+    bool is_wbsm_supported();
+
     /** Sets the CL context used to create programs.
      *
      * @note Setting the context also resets the device to the
diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
index f9796d7..155c3e4 100644
--- a/arm_compute/core/CL/OpenCL.h
+++ b/arm_compute/core/CL/OpenCL.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -135,6 +135,7 @@
     DECLARE_FUNCTION_PTR(clEnqueueMarker);
     DECLARE_FUNCTION_PTR(clWaitForEvents);
     DECLARE_FUNCTION_PTR(clCreateImage);
+    DECLARE_FUNCTION_PTR(clSetKernelExecInfo);
 
     // Third-party extensions
     DECLARE_FUNCTION_PTR(clImportMemoryARM);