Implement OpenCL MatMul heuristic for Arm® Mali™-G710

- Add heuristic for f32/f16 and int8 quantized data types
- Include MatMul configuration selection in the CLMatMul operator

Resolves COMPMID-5950, COMPMID-5957, COMPMID-5959, COMPMID-5925,
COMPMID-5926, COMPMID-5927, COMPMID-5928

Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Change-Id: Ic222148da0337b88d4d8c960e3b6ac31003d8bcb
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9564
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/gpu/cl/operators/ClMatMul.cpp b/src/gpu/cl/operators/ClMatMul.cpp
index 3ad6d91..1583321 100644
--- a/src/gpu/cl/operators/ClMatMul.cpp
+++ b/src/gpu/cl/operators/ClMatMul.cpp
@@ -26,6 +26,11 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "src/common/utils/Log.h"
 #include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h"
+#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
+
+using namespace arm_compute::cl_matmul;
 
 namespace arm_compute
 {
@@ -41,9 +46,12 @@
 }
 Status ClMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &matmul_info)
 {
-    MatMulKernelInfo kernel_info;
-    kernel_info.adj_lhs = matmul_info.adj_lhs();
-    kernel_info.adj_rhs = matmul_info.adj_rhs();
+    const GPUTarget gpu_target = CLScheduler::get().target();
+
+    std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
+
+    MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
+
     return ClMatMulNativeKernel::validate(lhs, rhs, output, kernel_info);
 }
 void ClMatMul::configure(const CLCompileContext &compile_context, ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *output, const MatMulInfo &matmul_info)
@@ -55,16 +63,9 @@
     ARM_COMPUTE_ERROR_THROW_ON(validate(lhs, rhs, output, matmul_info));
     const GPUTarget gpu_target = CLScheduler::get().target();
 
-    // Placeholder: Getting the heuristics calculated values for M0, N0, K0, and whether to export RHS to texture pipe
+    std::unique_ptr<IClMatMulNativeKernelConfig> t = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
 
-    // Filling the MatMul Kernel info
-    MatMulKernelInfo kernel_info;
-    kernel_info.adj_lhs                = matmul_info.adj_lhs();
-    kernel_info.adj_rhs                = matmul_info.adj_rhs();
-    kernel_info.m0                     = 1;     // to be properly calculated from heuristics
-    kernel_info.n0                     = 4;     // to be properly calculated from heuristics
-    kernel_info.k0                     = 4;     // to be properly calculated from heuristics
-    kernel_info.export_rhs_to_cl_image = false; // to be properly determined from heuristics
+    MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
 
     // Set the target for the kernels
     _native_matmul_kernel->set_target(gpu_target);