Add Gemm MMUL Reshaped Only Rhs Support for FP32/FP16

This patch introduces a GEMM routine that is optimized for Arm(R) Mali(TM)-G715 and Arm(R) Mali(TM)-G615

Resolves: COMPMID-5216
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: I2e5d7806f5904347185bb3e250f73d73d6669dba
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7914
Reviewed-by: SiCong Li <sicong.li@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/Android.bp b/Android.bp
index 2590469..16e67ad 100644
--- a/Android.bp
+++ b/Android.bp
@@ -40,6 +40,7 @@
         "src/core/CL/cl_kernels/common/floor.cl",
         "src/core/CL/cl_kernels/common/gather.cl",
         "src/core/CL/cl_kernels/common/gemm.cl",
+        "src/core/CL/cl_kernels/common/gemm_reshaped_only_rhs_mmul.cl",
         "src/core/CL/cl_kernels/common/gemm_utils.cl",
         "src/core/CL/cl_kernels/common/gemmlowp.cl",
         "src/core/CL/cl_kernels/common/gemv.cl",
@@ -617,6 +618,7 @@
         "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp",
         "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp",
         "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp",
+        "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.cpp",
         "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp",
         "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp",
         "src/gpu/cl/kernels/ClHeightConcatenateKernel.cpp",