Add Mali™-G720 and Mali™-G620 as GpuTargets This patch adds adds the latest Gpus as Gpu Target and sets up kernel selection heuristics for MatMul to address some nightly issues. Resolves: COMPMID-6766 Change-Id: I29dbb08c5ecfb3fcd63230b0b1675ab557074aca Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10902 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>

commit: 85cafff0dd99b6f94a77a7d7933682fa7c6a4a70 [log] [tgz]
author: Gunes Bayir <gunes.bayir@arm.com> Mon Dec 18 13:29:31 2023 +0000
committer: Gunes Bayir <gunes.bayir@arm.com> Fri Dec 22 15:13:39 2023 +0000
tree: bf4ed33f50e9bd99712942d17c7f9d39b2214150
parent: dda691c3b6d45de1e2dea28674ae6af6e17e9815 [diff] [blame]
diff --git a/src/gpu/cl/operators/ClMatMul.cpp b/src/gpu/cl/operators/ClMatMul.cpp
index 4330300..28a2aa2 100644
--- a/src/gpu/cl/operators/ClMatMul.cpp
+++ b/src/gpu/cl/operators/ClMatMul.cpp

@@ -34,6 +34,7 @@
 #include "src/gpu/cl/kernels/ClMatMulNativeMMULKernel.h"
 #include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
 #include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h"
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelVariant.h"
 #include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
 
 using namespace arm_compute::cl_matmul;
@@ -42,57 +43,6 @@
 {
 namespace opencl
 {
-namespace
-{
-enum class MatMulKernelType
-{
-    /** Native matrix multiplication for FP types */
-    NATIVE_FP,
-
-    /** Native matrix multiplication for quantized types */
-    NATIVE_QUANTIZED,
-
-    /** Native matrix multiplication using MMUL extension for FP types */
-    NATIVE_MMUL_FP,
-
-    /** Native matrix multiplication using MMUL extension for Quantized types */
-    NATIVE_MMUL_QUANTIZED
-};
-
-MatMulKernelType get_matmul_kernel(const ITensorInfo         *lhs,
-                                   const ITensorInfo         *rhs,
-                                   const MatMulInfo          &matmul_info,
-                                   const ActivationLayerInfo &act_info)
-{
-    ARM_COMPUTE_UNUSED(lhs, rhs, matmul_info, act_info);
-
-    const bool is_quantized      = is_data_type_quantized_asymmetric(lhs->data_type());
-    const bool is_mmul_supported = arm_matrix_multiply_supported(CLKernelLibrary::get().get_device());
-
-    const int k = matmul_info.adj_lhs() ? lhs->tensor_shape().y() : lhs->tensor_shape().x();
-
-    if (is_quantized)
-    {
-        // MMUL kernel works only when K is a multiple of 16
-        if (is_mmul_supported && !act_info.enabled() && k % 16 == 0)
-        {
-            return MatMulKernelType::NATIVE_MMUL_QUANTIZED;
-        }
-
-        return MatMulKernelType::NATIVE_QUANTIZED;
-    }
-    else
-    {
-        // MMUL kernel works only when K is a multiple of 4
-        if (is_mmul_supported && !act_info.enabled() && k % 4 == 0)
-        {
-            return MatMulKernelType::NATIVE_MMUL_FP;
-        }
-
-        return MatMulKernelType::NATIVE_FP;
-    }
-}
-} // namespace
 using namespace arm_compute::opencl::kernels;
 
 ClMatMul::ClMatMul()
@@ -117,7 +67,10 @@
 
     const MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
 
-    switch (get_matmul_kernel(lhs, rhs, matmul_info, act_info))
+    const auto             kernel_selector = ClMatMulNativeKernelVariantFactory::create(gpu_target);
+    const MatMulKernelType kernel_type     = kernel_selector->select_kernel(lhs, rhs, matmul_info, act_info);
+
+    switch (kernel_type)
     {
         case MatMulKernelType::NATIVE_FP:
             return ClMatMulNativeKernel::validate(lhs, rhs, nullptr /* bias */, dst, kernel_info, act_info);
@@ -149,7 +102,10 @@
     const auto             kernel_config = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
     const MatMulKernelInfo kernel_info   = kernel_config->configure(lhs, rhs, matmul_info);
 
-    switch (get_matmul_kernel(lhs, rhs, matmul_info, act_info))
+    const auto             kernel_selector = ClMatMulNativeKernelVariantFactory::create(gpu_target);
+    const MatMulKernelType kernel_type     = kernel_selector->select_kernel(lhs, rhs, matmul_info, act_info);
+
+    switch (kernel_type)
     {
         case MatMulKernelType::NATIVE_FP:
         {
commit	85cafff0dd99b6f94a77a7d7933682fa7c6a4a70	[log] [tgz]
author	Gunes Bayir <gunes.bayir@arm.com>	Mon Dec 18 13:29:31 2023 +0000
committer	Gunes Bayir <gunes.bayir@arm.com>	Fri Dec 22 15:13:39 2023 +0000
tree	bf4ed33f50e9bd99712942d17c7f9d39b2214150
parent	dda691c3b6d45de1e2dea28674ae6af6e17e9815 [diff] [blame]