Add Mali™-G720 and Mali™-G620 as GpuTargets
This patch adds adds the latest Gpus as Gpu Target and sets up kernel selection heuristics for MatMul to address some nightly issues.
Resolves: COMPMID-6766
Change-Id: I29dbb08c5ecfb3fcd63230b0b1675ab557074aca
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10902
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/gpu/cl/operators/ClMatMul.cpp b/src/gpu/cl/operators/ClMatMul.cpp
index 4330300..28a2aa2 100644
--- a/src/gpu/cl/operators/ClMatMul.cpp
+++ b/src/gpu/cl/operators/ClMatMul.cpp
@@ -34,6 +34,7 @@
#include "src/gpu/cl/kernels/ClMatMulNativeMMULKernel.h"
#include "src/runtime/heuristics/matmul_native/ClMatMulNativeDefaultConfigValhall.h"
#include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelConfig.h"
+#include "src/runtime/heuristics/matmul_native/ClMatMulNativeKernelVariant.h"
#include "src/runtime/heuristics/matmul_native/IClMatMulNativeKernelConfig.h"
using namespace arm_compute::cl_matmul;
@@ -42,57 +43,6 @@
{
namespace opencl
{
-namespace
-{
-enum class MatMulKernelType
-{
- /** Native matrix multiplication for FP types */
- NATIVE_FP,
-
- /** Native matrix multiplication for quantized types */
- NATIVE_QUANTIZED,
-
- /** Native matrix multiplication using MMUL extension for FP types */
- NATIVE_MMUL_FP,
-
- /** Native matrix multiplication using MMUL extension for Quantized types */
- NATIVE_MMUL_QUANTIZED
-};
-
-MatMulKernelType get_matmul_kernel(const ITensorInfo *lhs,
- const ITensorInfo *rhs,
- const MatMulInfo &matmul_info,
- const ActivationLayerInfo &act_info)
-{
- ARM_COMPUTE_UNUSED(lhs, rhs, matmul_info, act_info);
-
- const bool is_quantized = is_data_type_quantized_asymmetric(lhs->data_type());
- const bool is_mmul_supported = arm_matrix_multiply_supported(CLKernelLibrary::get().get_device());
-
- const int k = matmul_info.adj_lhs() ? lhs->tensor_shape().y() : lhs->tensor_shape().x();
-
- if (is_quantized)
- {
- // MMUL kernel works only when K is a multiple of 16
- if (is_mmul_supported && !act_info.enabled() && k % 16 == 0)
- {
- return MatMulKernelType::NATIVE_MMUL_QUANTIZED;
- }
-
- return MatMulKernelType::NATIVE_QUANTIZED;
- }
- else
- {
- // MMUL kernel works only when K is a multiple of 4
- if (is_mmul_supported && !act_info.enabled() && k % 4 == 0)
- {
- return MatMulKernelType::NATIVE_MMUL_FP;
- }
-
- return MatMulKernelType::NATIVE_FP;
- }
-}
-} // namespace
using namespace arm_compute::opencl::kernels;
ClMatMul::ClMatMul()
@@ -117,7 +67,10 @@
const MatMulKernelInfo kernel_info = t->configure(lhs, rhs, matmul_info);
- switch (get_matmul_kernel(lhs, rhs, matmul_info, act_info))
+ const auto kernel_selector = ClMatMulNativeKernelVariantFactory::create(gpu_target);
+ const MatMulKernelType kernel_type = kernel_selector->select_kernel(lhs, rhs, matmul_info, act_info);
+
+ switch (kernel_type)
{
case MatMulKernelType::NATIVE_FP:
return ClMatMulNativeKernel::validate(lhs, rhs, nullptr /* bias */, dst, kernel_info, act_info);
@@ -149,7 +102,10 @@
const auto kernel_config = ClMatMulNativeKernelConfigurationFactory::create(gpu_target);
const MatMulKernelInfo kernel_info = kernel_config->configure(lhs, rhs, matmul_info);
- switch (get_matmul_kernel(lhs, rhs, matmul_info, act_info))
+ const auto kernel_selector = ClMatMulNativeKernelVariantFactory::create(gpu_target);
+ const MatMulKernelType kernel_type = kernel_selector->select_kernel(lhs, rhs, matmul_info, act_info);
+
+ switch (kernel_type)
{
case MatMulKernelType::NATIVE_FP:
{