Improve start-up timer for GeMM (floating-point):

 - Pass M,N,K at runtime as kernel parameters
 - Add a guard macro to compile only kernel of interest
 - Move reshpaing kernels to gemm_utils.cl
 - Remove the fallback reshaping kernel with Y-Padding support

Resolves: COMPMID-4888
Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com>
Change-Id: Ida3851326f0b77e410633271de9ecca106e37931
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6662
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/gpu/cl/ClKernelLibrary.cpp b/src/gpu/cl/ClKernelLibrary.cpp
index c47cf8e..f87b226 100644
--- a/src/gpu/cl/ClKernelLibrary.cpp
+++ b/src/gpu/cl/ClKernelLibrary.cpp
@@ -290,10 +290,10 @@
     { "gemm_mm_reshaped_only_rhs_t_post_act_eltwise_op_act", "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.cl" },
     { "gemm_mm_reshaped_only_rhs_t_texture_post_act_eltwise_op_act", "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_reshaped_only_rhs.cl" },
     { "gemm_lc_vm_f32", "common/gemm.cl" },
-    { "gemm_reshape_lhs_matrix_nt", "common/gemm.cl" },
-    { "gemm_reshape_lhs_matrix_t", "common/gemm.cl" },
-    { "gemm_reshape_rhs_matrix_nt", "common/gemm.cl" },
-    { "gemm_reshape_rhs_matrix_t", "common/gemm.cl" },
+    { "gemm_reshape_lhs_matrix_nt", "common/gemm_utils.cl" },
+    { "gemm_reshape_lhs_matrix_t", "common/gemm_utils.cl" },
+    { "gemm_reshape_rhs_matrix_nt", "common/gemm_utils.cl" },
+    { "gemm_reshape_rhs_matrix_t", "common/gemm_utils.cl" },
     { "gemmlowp_matrix_a_reduction", "common/gemmlowp.cl" },
     { "gemmlowp_matrix_a_reduction_dot8", "common/gemmlowp.cl" },
     { "gemmlowp_matrix_b_reduction", "common/gemmlowp.cl" },
@@ -590,6 +590,10 @@
 #include "./cl_kernels/common/gemm.clembed"
     },
     {
+        "common/gemm_utils.cl",
+#include "./cl_kernels/common/gemm_utils.clembed"
+    },
+    {
         "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl",
 #include "./cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.clembed"
     },