diff --git a/src/core/CL/CLCompileContext.cpp b/src/core/CL/CLCompileContext.cpp
index 0afb7e5..3db0fe5 100644
--- a/src/core/CL/CLCompileContext.cpp
+++ b/src/core/CL/CLCompileContext.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -137,15 +137,16 @@
 {
 }
 CLCompileContext::CLCompileContext()
-    : _context(), _device(), _programs_map(), _built_programs_map()
+    : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
 {
 }
 
 CLCompileContext::CLCompileContext(cl::Context context, const cl::Device &device)
-    : _context(), _device(), _programs_map(), _built_programs_map()
+    : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
 {
-    _context = std::move(context);
-    _device  = CLDevice(device);
+    _context           = std::move(context);
+    _device            = CLDevice(device);
+    _is_wbsm_supported = get_wbsm_support_info(device);
 }
 
 Kernel CLCompileContext::create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source,
@@ -318,7 +319,8 @@
 
 void CLCompileContext::set_device(cl::Device device)
 {
-    _device = std::move(device);
+    _device            = std::move(device);
+    _is_wbsm_supported = get_wbsm_support_info(device);
 }
 
 cl::NDRange CLCompileContext::default_ndrange() const
@@ -346,6 +348,11 @@
     return _device.supported("cl_khr_int64_base_atomics");
 }
 
+bool CLCompileContext::is_wbsm_supported() const
+{
+    return _is_wbsm_supported;
+}
+
 size_t CLCompileContext::max_local_workgroup_size(const cl::Kernel &kernel) const
 {
     size_t result;
diff --git a/src/core/CL/CLHelpers.cpp b/src/core/CL/CLHelpers.cpp
index 895bb72..aff8977 100644
--- a/src/core/CL/CLHelpers.cpp
+++ b/src/core/CL/CLHelpers.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -415,4 +415,26 @@
     const unsigned int num_of_threads = ((input_dimension + border_width) / 16);
     return cl::NDRange(std::min(8U, num_of_threads));
 }
+
+bool get_wbsm_support_info(const cl::Device &device)
+{
+    cl_bitfield capabilities = 0;
+    cl_int      err          = clGetDeviceInfo(device.get(), ARM_COMPUTE_LIBRARY_OPENCL_DEVICE_CAPABILITIES_ARM, sizeof(cl_bitfield), &capabilities, nullptr);
+    if((err == CL_SUCCESS) && (capabilities & ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM))
+    {
+        return true;
+    }
+    return false;
+}
+
+void set_wbsm(cl::Kernel &kernel, cl_int wbsm_hint)
+{
+    cl_int err = clSetKernelExecInfo(kernel.get(),
+                                     ARM_COMPUTE_LIBRARY_OPENCL_EXEC_WBSM_ARM,
+                                     sizeof(cl_int),
+                                     &wbsm_hint);
+    ARM_COMPUTE_UNUSED(err);
+    ARM_COMPUTE_ERROR_ON(err != CL_SUCCESS);
+}
+
 } // namespace arm_compute
diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp
index cf1c52e..75f76ea 100644
--- a/src/core/CL/CLKernelLibrary.cpp
+++ b/src/core/CL/CLKernelLibrary.cpp
@@ -1206,6 +1206,11 @@
     return _compile_context.int64_base_atomics_supported();
 }
 
+bool CLKernelLibrary::is_wbsm_supported()
+{
+    return _compile_context.is_wbsm_supported();
+}
+
 std::pair<std::string, bool> CLKernelLibrary::get_program(const std::string &program_name) const
 {
 #ifdef EMBEDDED_KERNELS
diff --git a/src/core/CL/ICLKernel.cpp b/src/core/CL/ICLKernel.cpp
index 2b259bf..1c6963f 100644
--- a/src/core/CL/ICLKernel.cpp
+++ b/src/core/CL/ICLKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,8 +29,6 @@
 
 #include <cstddef>
 
-using namespace arm_compute;
-
 void arm_compute::enqueue(cl::CommandQueue &queue, ICLKernel &kernel, const Window &window, const cl::NDRange &lws_hint, bool use_dummy_work_items)
 {
     if(kernel.kernel()() == nullptr)
@@ -77,9 +75,15 @@
         lws = valid_lws;
     }
 
+    if(CLKernelLibrary::get().is_wbsm_supported())
+    {
+        set_wbsm(kernel.kernel(), kernel.wbsm_hint());
+    }
     queue.enqueueNDRangeKernel(kernel.kernel(), cl::NullRange, gws, lws);
 }
 
+namespace arm_compute
+{
 template <unsigned int dimension_size>
 void ICLKernel::add_tensor_argument(unsigned &idx, const ICLTensor *tensor, const Window &window)
 {
@@ -146,3 +150,4 @@
 
     return gws;
 }
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/core/CL/ICLKernel.h b/src/core/CL/ICLKernel.h
index a24cd8c..6737109 100644
--- a/src/core/CL/ICLKernel.h
+++ b/src/core/CL/ICLKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,6 +31,7 @@
 #include "arm_compute/core/IKernel.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/core/experimental/Types.h"
+#include "arm_compute/runtime/CL/CLTuningParams.h"
 
 #include <string>
 
@@ -67,19 +68,30 @@
 protected:
     /** Configure the kernel's window and local workgroup size hint.
      *
-     * @param[in] window   The maximum window which will be returned by window()
-     * @param[in] lws_hint (Optional) Local-Workgroup-Size to use.
+     * @param[in] window    The maximum window which will be returned by window()
+     * @param[in] lws_hint  Local-Workgroup-Size to use.
+     * @param[in] wbsm_hint (Optional) Workgroup-Batch-Size-Modifier to use.
      */
-    void configure_internal(const Window &window, cl::NDRange lws_hint = CLKernelLibrary::get().default_ndrange())
+    void configure_internal(const Window &window, cl::NDRange lws_hint, cl_int wbsm_hint = 0)
     {
-        _lws_hint = lws_hint;
+        configure_internal(window, CLTuningParams(lws_hint, wbsm_hint));
+    }
+
+    /** Configure the kernel's window and tuning parameters hints.
+     *
+     * @param[in] window             The maximum window which will be returned by window()
+     * @param[in] tuning_params_hint (Optional) Tuning parameters to use.
+     */
+    void configure_internal(const Window &window, CLTuningParams tuning_params_hint = CLTuningParams(CLKernelLibrary::get().default_ndrange(), 0))
+    {
+        _tuning_params_hint = tuning_params_hint;
         IKernel::configure(window);
     }
 
 public:
     /** Constructor */
     ICLKernel()
-        : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _lws_hint()
+        : _kernel(nullptr), _target(GPUTarget::MIDGARD), _config_id(arm_compute::default_config_id), _max_workgroup_size(0), _tuning_params_hint()
     {
     }
     /** Returns a reference to the OpenCL kernel of this object.
@@ -254,7 +266,7 @@
     void set_lws_hint(const cl::NDRange &lws_hint)
     {
         ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // lws_hint will be overwritten by configure()
-        _lws_hint = lws_hint;
+        _tuning_params_hint.set_lws(lws_hint);
     }
 
     /** Return the Local-Workgroup-Size hint
@@ -263,7 +275,28 @@
      */
     cl::NDRange lws_hint() const
     {
-        return _lws_hint;
+        return _tuning_params_hint.get_lws();
+    }
+
+    /** Set the workgroup batch size modifier hint
+     *
+     * @note This method should be called after the configuration of the kernel
+     *
+     * @param[in] wbsm_hint workgroup batch size modifier value
+     */
+    void set_wbsm_hint(const cl_int &wbsm_hint)
+    {
+        ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); // wbsm_hint will be overwritten by configure()
+        _tuning_params_hint.set_wbsm(wbsm_hint);
+    }
+
+    /** Return the workgroup batch size modifier hint
+     *
+     * @return Current wbsm hint
+     */
+    cl_int wbsm_hint() const
+    {
+        return _tuning_params_hint.get_wbsm();
     }
 
     /** Get the configuration ID
@@ -344,7 +377,7 @@
     std::string _config_id;          /**< Configuration ID */
     size_t      _max_workgroup_size; /**< The maximum workgroup size for this kernel */
 private:
-    cl::NDRange _lws_hint; /**< Local workgroup size hint for the OpenCL kernel */
+    CLTuningParams _tuning_params_hint; /**< Tuning parameters hint for the OpenCL kernel */
 };
 
 /** Add the kernel to the command queue with the given window.
diff --git a/src/core/CL/OpenCL.cpp b/src/core/CL/OpenCL.cpp
index 6c70861..aff6285 100644
--- a/src/core/CL/OpenCL.cpp
+++ b/src/core/CL/OpenCL.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -134,6 +134,7 @@
     LOAD_FUNCTION_PTR(clEnqueueMarker, handle);
     LOAD_FUNCTION_PTR(clWaitForEvents, handle);
     LOAD_FUNCTION_PTR(clCreateImage, handle);
+    LOAD_FUNCTION_PTR(clSetKernelExecInfo, handle);
 
     // Third-party extensions
     LOAD_FUNCTION_PTR(clImportMemoryARM, handle);
@@ -962,6 +963,23 @@
     }
 }
 
+cl_int clSetKernelExecInfo(cl_kernel           kernel,
+                           cl_kernel_exec_info param_name,
+                           size_t              param_value_size,
+                           const void         *param_value)
+{
+    arm_compute::CLSymbols::get().load_default();
+    auto func = arm_compute::CLSymbols::get().clSetKernelExecInfo_ptr;
+    if(func != nullptr)
+    {
+        return func(kernel, param_name, param_value_size, param_value);
+    }
+    else
+    {
+        return CL_OUT_OF_RESOURCES;
+    }
+}
+
 cl_mem
 clImportMemoryARM(cl_context                      context,
                   cl_mem_flags                    flags,
