Add Mali™-G720 and Mali™-G620 as GpuTargets

This patch adds adds the latest Gpus as Gpu Target and sets up kernel selection heuristics for MatMul to address some nightly issues.

Resolves: COMPMID-6766

Change-Id: I29dbb08c5ecfb3fcd63230b0b1675ab557074aca
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10902
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/gpu/cl/operators/ClMatMul.cpp b/src/gpu/cl/operators/ClMatMul.cpp
index 4330300..28a2aa2 100644
--- a/src/gpu/cl/operators/ClMatMul.cpp
+++ b/src/gpu/cl/operators/ClMatMul.cpp
@@ -34,6 +34,7 @@
 #include "src/gpu/cl/kernels/ClMatMulNativeMMULKernel.h"
 #include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
 #include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h"
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelVariant.h"
 #include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
 
 using namespace arm_compute::cl_matmul;
@@ -42,57 +43,6 @@
 {
 namespace opencl
 {
-namespace
-{
-enum class MatMulKernelType
-{
-    /** Native matrix multiplication for FP types */
-    NATIVE_FP,
-
-    /** Native matrix multiplication for quantized types */
-    NATIVE_QUANTIZED,
-
-    /** Native matrix multiplication using MMUL extension for FP types */
-    NATIVE_MMUL_FP,
-
-    /** Native matrix multiplication using MMUL extension for Quantized types */
-    NATIVE_MMUL_QUANTIZED
-};
-
-MatMulKernelType get_matmul_kernel(const ITensorInfo         *lhs,
-                                   const ITensorInfo         *rhs,
-                                   const MatMulInfo          &matmul_info,
-                                   const ActivationLayerInfo &act_info)
-{
-    ARM_COMPUTE_UNUSED(lhs, rhs, matmul_info, act_info);
-
-    const bool is_quantized      = is_data_type_quantized_asymmetric(lhs->data_type());
-    const bool is_mmul_supported = arm_matrix_multiply_supported(CLKernelLibrary::get().get_device());
-
-    const int k = matmul_info.adj_lhs() ? lhs->tensor_shape().y() : lhs->tensor_shape().x();
-
-    if (is_quantized)
-    {
-        // MMUL kernel works only when K is a multiple of 16
-        if (is_mmul_supported && !act_info.enabled() && k % 16 == 0)
-        {
-            return MatMulKernelType::NATIVE_MMUL_QUANTIZED;
-        }
-
-        return MatMulKernelType::NATIVE_QUANTIZED;
-    }
-    else
-    {
-        // MMUL kernel works only when K is a multiple of 4
-        if (is_mmul_supported && !act_info.enabled() && k % 4 == 0)
-        {
-            return MatMulKernelType::NATIVE_MMUL_FP;
-        }
-
-        return MatMulKernelType::NATIVE_FP;
-    }
-}
-} // namespace
 using namespace arm_compute::opencl::kernels;
 
 ClMatMul::ClMatMul()
@@ -117,7 +67,10 @@
 
     const MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
 
-    switch (get_matmul_kernel(lhs, rhs, matmul_info, act_info))
+    const auto             kernel_selector = ClMatMulNativeKernelVariantFactory::create(gpu_target);
+    const MatMulKernelType kernel_type     = kernel_selector->select_kernel(lhs, rhs, matmul_info, act_info);
+
+    switch (kernel_type)
     {
         case MatMulKernelType::NATIVE_FP:
             return ClMatMulNativeKernel::validate(lhs, rhs, nullptr /* bias */, dst, kernel_info, act_info);
@@ -149,7 +102,10 @@
     const auto             kernel_config = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
     const MatMulKernelInfo kernel_info   = kernel_config->configure(lhs, rhs, matmul_info);
 
-    switch (get_matmul_kernel(lhs, rhs, matmul_info, act_info))
+    const auto             kernel_selector = ClMatMulNativeKernelVariantFactory::create(gpu_target);
+    const MatMulKernelType kernel_type     = kernel_selector->select_kernel(lhs, rhs, matmul_info, act_info);
+
+    switch (kernel_type)
     {
         case MatMulKernelType::NATIVE_FP:
         {