APPBROWSER-357: Fix Transpose performance issue by tuning lws

Change-Id: Ia71435f6e5c5610e2b76d6d4eb61a8847ca42305
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114829
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/arm_compute/core/GLES_COMPUTE/IGCKernel.h b/arm_compute/core/GLES_COMPUTE/IGCKernel.h
index 11b2b17..ee1e166 100644
--- a/arm_compute/core/GLES_COMPUTE/IGCKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/IGCKernel.h
@@ -140,6 +140,17 @@
      */
     virtual void run(const Window &window) = 0;
 
+    /** Set the Local-Workgroup-Size hint
+     *
+     * @note This method should be called after the configuration of the kernel
+     *
+     * @param[in] lws_hint Local-Workgroup-Size to use
+     */
+    void set_lws_hint(gles::NDRange &lws_hint)
+    {
+        _lws_hint = lws_hint;
+    }
+
 private:
     /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
      *
@@ -159,7 +170,8 @@
     unsigned int           num_arguments_per_tensor() const;
 
 protected:
-    GCKernel _kernel; /**< GLES kernel to run */
+    GCKernel      _kernel;   /**< GLES kernel to run */
+    gles::NDRange _lws_hint; /**< Local workgroup size hint for the GLES kernel */
 };
 
 /** Add the kernel to the command queue with the given window.