APPBROWSER-357: Fix Transpose performance issue by tuning lws
Change-Id: Ia71435f6e5c5610e2b76d6d4eb61a8847ca42305
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114829
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Anthony Barbier <anthony.barbier@arm.com>
diff --git a/arm_compute/core/GLES_COMPUTE/IGCKernel.h b/arm_compute/core/GLES_COMPUTE/IGCKernel.h
index 11b2b17..ee1e166 100644
--- a/arm_compute/core/GLES_COMPUTE/IGCKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/IGCKernel.h
@@ -140,6 +140,17 @@
*/
virtual void run(const Window &window) = 0;
+ /** Set the Local-Workgroup-Size hint
+ *
+ * @note This method should be called after the configuration of the kernel
+ *
+ * @param[in] lws_hint Local-Workgroup-Size to use
+ */
+ void set_lws_hint(gles::NDRange &lws_hint)
+ {
+ _lws_hint = lws_hint;
+ }
+
private:
/** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
*
@@ -159,7 +170,8 @@
unsigned int num_arguments_per_tensor() const;
protected:
- GCKernel _kernel; /**< GLES kernel to run */
+ GCKernel _kernel; /**< GLES kernel to run */
+ gles::NDRange _lws_hint; /**< Local workgroup size hint for the GLES kernel */
};
/** Add the kernel to the command queue with the given window.