COMPMID-3326: Update heuristic for GEMMReshaped and GEMMReshapedOnlyRHS - Update the heuristic for Arm Mali-G76 (F32) in order to use the OpenCL image2d object on GEMM - Create utility function to validate the support for image2d Change-Id: I0913ac5f27fd07992b0ac188af753a2abeb034ca Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3559 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>

commit: ed5fe69b6612a5cf0dd52340f6781885d77afbc9 [log] [tgz]
author: Gian Marco Iodice <gianmarco.iodice@arm.com> Thu Jul 09 08:41:10 2020 +0100
committer: Gian Marco Iodice <gianmarco.iodice@arm.com> Wed Jul 15 08:37:31 2020 +0000
tree: fc0ca94be92fe7c39e4c2047379b3bd301e9d67e
parent: 4667dddc0ed403c636348294cd7f70261e5540cf [diff] [blame]
diff --git a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp
index f662089..581c2d2 100644
--- a/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp
+++ b/src/core/CL/gemm/reshaped_only_rhs/CLGEMMReshapedOnlyRHSKernelConfigurationBifrost.cpp

@@ -27,6 +27,9 @@
 #include "arm_compute/core/CL/CLKernelLibrary.h"
 #include "arm_compute/core/CL/gemm/CLGEMMHelpers.h"
 #include "arm_compute/core/GPUTarget.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 
 #include <map>
 #include <utility>
@@ -35,6 +38,8 @@
 {
 namespace cl_gemm
 {
+using namespace arm_compute::misc::shape_calculator;
+
 CLGEMMReshapedOnlyRHSKernelConfigurationBifrost::CLGEMMReshapedOnlyRHSKernelConfigurationBifrost(GPUTarget gpu)
     : ICLGEMMKernelConfiguration(gpu)
 {
@@ -139,14 +144,47 @@
     ARM_COMPUTE_UNUSED(k);
     ARM_COMPUTE_UNUSED(b);
 
+    GEMMLHSMatrixInfo lhs_info_buf;
+    GEMMRHSMatrixInfo rhs_info_buf;
+    GEMMLHSMatrixInfo lhs_info_img;
+    GEMMRHSMatrixInfo rhs_info_img;
+
+    // Get lhs_info/rhs_info in case of OpenCL buffer
     if(m == 1)
     {
         const unsigned int h0 = std::max(n / 2, 1U);
-        return configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true);
+        std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 1, 2, 8, 1, h0, false, true, false, true);
     }
     else
     {
-        return configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true);
+        std::tie(lhs_info_buf, rhs_info_buf) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, 2, false, true, false, true);
+    }
+
+    // Get lhs_info/rhs_info in case of OpenCL image
+    if(m == 1)
+    {
+        std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 1, 4, 4, 1, 4, false, true, false, false, true);
+    }
+    else
+    {
+        const int h0 = std::max(std::min(static_cast<int>(n / 4), static_cast<int>(16)), static_cast<int>(1));
+        std::tie(lhs_info_img, rhs_info_img) = configure_lhs_rhs_info(m, n, 4, 4, 4, 1, h0, false, true, false, false, true);
+    }
+
+    const TensorInfo  tensor_rhs_info(TensorShape(n, k, b), 1, DataType::F32);
+    const TensorShape shape = compute_rhs_reshaped_shape(tensor_rhs_info, rhs_info_img);
+    const TensorInfo  tensor_reshaped_info(shape, 1, DataType::F32);
+
+    // In case of vector by matrix with few work-items, we use the OpenCL buffer rather than the OpenCL image2d
+    const bool use_cl_image2d = (m == 1 && n <= 4096) ? false : true;
+
+    if(bool(validate_image2d_support_on_rhs(tensor_reshaped_info, rhs_info_img)) && use_cl_image2d)
+    {
+        return std::make_pair(lhs_info_img, rhs_info_img);
+    }
+    else
+    {
+        return std::make_pair(lhs_info_buf, rhs_info_buf);
     }
 }
commit	ed5fe69b6612a5cf0dd52340f6781885d77afbc9	[log] [tgz]
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	Thu Jul 09 08:41:10 2020 +0100
committer	Gian Marco Iodice <gianmarco.iodice@arm.com>	Wed Jul 15 08:37:31 2020 +0000
tree	fc0ca94be92fe7c39e4c2047379b3bd301e9d67e
parent	4667dddc0ed403c636348294cd7f70261e5540cf [diff] [blame]