Add Gemm MMUL Reshaped Only Rhs Support for FP32/FP16

This patch introduces a GEMM routine that is optimized for Arm(R) Mali(TM)-G715 and Arm(R) Mali(TM)-G615

Resolves: COMPMID-5216
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: I2e5d7806f5904347185bb3e250f73d73d6669dba
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7914
Reviewed-by: SiCong Li <sicong.li@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp
index cc6689c..427ea51 100644
--- a/src/runtime/CL/functions/CLGEMM.cpp
+++ b/src/runtime/CL/functions/CLGEMM.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,7 +30,6 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/Utils.h"
-#include "arm_compute/runtime/CL/functions/CLGEMM.h"
 #include "src/core/helpers/MemoryHelpers.h"
 #include "src/gpu/cl/operators/ClGemm.h"
 
diff --git a/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.cpp b/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.cpp
index 64271a8..4c7daf9 100644
--- a/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.cpp
+++ b/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -79,10 +79,28 @@
         { DataType::QSYMM8_PER_CHANNEL, &CLGEMMDefaultTypeValhall::default_q8 }
     };
 
+    // Mali-G715 and Mali-G615 configurations
+    static std::map<DataType, FunctionExecutorPtr> gemm_g715_configs =
+    {
+        { DataType::F32, &CLGEMMDefaultTypeValhall::g715_f32 },
+        { DataType::F16, &CLGEMMDefaultTypeValhall::g715_f16 },
+        { DataType::QASYMM8, &CLGEMMDefaultTypeValhall::default_q8 },
+        { DataType::QASYMM8_SIGNED, &CLGEMMDefaultTypeValhall::default_q8 },
+        { DataType::QSYMM8, &CLGEMMDefaultTypeValhall::default_q8 },
+        { DataType::QSYMM8_PER_CHANNEL, &CLGEMMDefaultTypeValhall::default_q8 }
+    };
+
     const DataType data_type = params.data_type;
 
     switch(_target)
     {
+        case GPUTarget::G715:
+        case GPUTarget::G615:
+            if(gemm_g715_configs.find(data_type) != gemm_g715_configs.end())
+            {
+                return (this->*gemm_g715_configs[data_type])(params.m, params.n, params.k, params.b, params.is_rhs_constant);
+            }
+            ARM_COMPUTE_ERROR("Not supported data type");
         case GPUTarget::G78:
             if(gemm_g78_configs.find(data_type) != gemm_g78_configs.end())
             {
@@ -306,5 +324,46 @@
 
     return CLGEMMKernelType::RESHAPED_ONLY_RHS;
 }
+
+CLGEMMKernelType CLGEMMDefaultTypeValhall::g715_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
+{
+    if(!is_rhs_constant)
+    {
+        return default_f32(m, n, k, b, is_rhs_constant);
+    }
+
+    unsigned int best_m0;
+    unsigned int best_n0;
+
+    if(opencl::kernels::gemm::is_mmul_kernel_preferred(m, n, k, b, DataType::F32, best_m0, best_n0))
+    {
+        return CLGEMMKernelType::RESHAPED_ONLY_RHS_MMUL;
+    }
+    else
+    {
+        return default_f32(m, n, k, b, is_rhs_constant);
+    }
+}
+
+CLGEMMKernelType CLGEMMDefaultTypeValhall::g715_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant)
+{
+    if(!is_rhs_constant)
+    {
+        return g78_f16(m, n, k, b, is_rhs_constant);
+    }
+
+    unsigned int best_m0;
+    unsigned int best_n0;
+
+    if(opencl::kernels::gemm::is_mmul_kernel_preferred(m, n, k, b, DataType::F16, best_m0, best_n0))
+    {
+        return CLGEMMKernelType::RESHAPED_ONLY_RHS_MMUL;
+    }
+    else
+    {
+        return g78_f16(m, n, k, b, is_rhs_constant);
+    }
+}
+
 } // namespace cl_gemm
 } // namespace arm_compute
diff --git a/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.h b/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.h
index c88fbcf..0893f11 100644
--- a/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.h
+++ b/src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,6 +50,8 @@
     CLGEMMKernelType g77_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
     CLGEMMKernelType g78_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
     CLGEMMKernelType g78_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+    CLGEMMKernelType g715_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
+    CLGEMMKernelType g715_f16(unsigned int m, unsigned int n, unsigned int k, unsigned int b, bool is_rhs_constant);
 };
 } // namespace cl_gemm
 } // namespace arm_compute