Add GemmLowp MMUL Reshaped Only Rhs Support for QASYMM8/QASYMM8_SIGNED

This patch introduces a GEMMLowp routine that is optimized for Arm(R) Mali(TM)-G715 and Arm(R) Mali(TM)-G615

Resolves: COMPMID-5398

Signed-off-by: Freddie Liardet <frederick.liardet@arm.com>
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: I8d06453645688f3658b6c7c06f1ebc25a2505661
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7932
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/gpu/cl/ClKernelLibrary.cpp b/src/gpu/cl/ClKernelLibrary.cpp
index 52661d6..0f08f5d 100644
--- a/src/gpu/cl/ClKernelLibrary.cpp
+++ b/src/gpu/cl/ClKernelLibrary.cpp
@@ -303,6 +303,7 @@
     { "gemmlowp_mm_reshaped_lhs_nt_rhs_t", "common/gemmlowp.cl" },
     { "gemmlowp_mm_reshaped_only_rhs_t", "common/gemmlowp.cl" },
     { "gemmlowp_mm_reshaped_only_rhs_t_fused_output_stage_fixedpoint", "common/gemmlowp.cl" },
+    { "gemmlowp_mm_reshaped_only_rhs_mmul", "common/gemmlowp_reshaped_only_rhs_mmul.cl" },
     { "gemmlowp_offset_contribution", "common/gemmlowp.cl" },
     { "gemmlowp_offset_contribution_quantize_down", "common/gemmlowp.cl" },
     { "gemmlowp_offset_contribution_quantize_down_fixedpoint", "common/gemmlowp.cl" },
@@ -618,6 +619,10 @@
 #include "./cl_kernels/common/gemmlowp.clembed"
     },
     {
+        "common/gemmlowp_reshaped_only_rhs_mmul.cl",
+#include "./cl_kernels/common/gemmlowp_reshaped_only_rhs_mmul.clembed"
+    },
+    {
         "common/gemv.cl",
 #include "./cl_kernels/common/gemv.clembed"
     },