COMPMID-2097: Implement a heuristic to dispatch CLGEMMReshapedOnlyRHS kernel from CLGEMM

Change-Id: I4170a80647b02501aa669e2c0347ddc39888ee76
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/928
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/CL/ICLGEMMReshapedConfiguration.h b/arm_compute/runtime/CL/ICLGEMMReshapedConfiguration.h
deleted file mode 100644
index 500d9cd..0000000
--- a/arm_compute/runtime/CL/ICLGEMMReshapedConfiguration.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_ICLGEMMRESHAPEDCONFIGURATION_H__
-#define __ARM_COMPUTE_ICLGEMMRESHAPEDCONFIGURATION_H__
-
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-/** Basic interface for the GEMM selection */
-class ICLGEMMReshapedConfiguration
-{
-public:
-    /** Virtual destructor */
-    virtual ~ICLGEMMReshapedConfiguration() = default;
-    /** Given M, N, K and B, this method returns the @ref GEMMLHSMatrixInfo and @ref GEMMRHSMatrixInfo to be used with @ref CLGEMMMatrixMultiplyReshapedKernel
-     *
-     * @param[in] m         Number of rows LHS matrix
-     * @param[in] n         Number of columns RHS matrix
-     * @param[in] k         Number of columns LHS matrix or number of rows RHS matrix
-     * @param[in] b         Batch size
-     * @param[in] data_type Data type
-     */
-    virtual std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) = 0;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_ICLGEMMRESHAPEDCONFIGURATION_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 0bad446..8c462fa 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -27,6 +27,7 @@
 #include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
 #include "arm_compute/runtime/CL/CLMemoryGroup.h"
@@ -40,10 +41,11 @@
 
 /** Basic function to execute GEMM on OpenCL. This function calls the following OpenCL kernels:
  *
- *  -# @ref CLGEMMReshapeLHSMatrixKernel (only if the reshaped GEMM is selected by the heuristic model)
- *  -# @ref CLGEMMReshapeRHSMatrixKernel (only if the reshaped GEMM is selected by the heuristic model)
- *  -# @ref CLGEMMMatrixMultiplyKernel (if GPU target is NOT G76 or if the reshaped GEMM is NOT selected)
- *  -# @ref CLGEMMMatrixMultiplyReshapedKernel (only if the reshaped GEMM is selected by the heuristic model and the GPU target IS Mali-G76)
+ *  -# @ref CLGEMMReshapeLHSMatrixKernel (only if the RESHAPED_V1 is selected by the heuristic model)
+ *  -# @ref CLGEMMReshapeRHSMatrixKernel (only if either the RESHAPED_V1 or RESHAPED_ONLY_RHS is selected by the select_gemm_type method())
+ *  -# @ref CLGEMMMatrixMultiplyKernel (only if either the NATIVE or RESHAPED_V1 is selected by the select_gemm_type method())
+ *  -# @ref CLGEMMMatrixMultiplyReshapedKernel (only if RESHAPED_V1 is selected by the select_gemm_type method())
+ *  -# @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (only if RESHAPED_ONLY_RHS is selected by the select_gemm_type method())
  *  -# @ref CLGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0)
  *
  */
@@ -102,20 +104,41 @@
     void prepare() override;
 
 private:
-    CLMemoryGroup                      _memory_group;
-    CLGEMMMatrixMultiplyKernel         _mm_kernel;
-    CLGEMMMatrixAdditionKernel         _ma_kernel;
-    CLGEMMReshapeLHSMatrixKernel       _reshape_lhs_kernel;
-    CLGEMMReshapeRHSMatrixKernel       _reshape_rhs_kernel;
-    CLGEMMMatrixMultiplyReshapedKernel _mm_reshaped_kernel;
-    CLTensor                           _tmp_a;
-    CLTensor                           _tmp_b;
-    const ICLTensor                   *_original_b;
-    bool                               _is_interleaved_transposed;
-    bool                               _run_addition;
-    bool                               _reshape_b_only_on_first_run;
-    bool                               _is_prepared;
-    bool                               _is_new_gemm_reshaped; // Remove when COMPMID-1892 is completed
+    enum class GEMMType
+    {
+        NATIVE,
+        RESHAPED_V1,
+        RESHAPED_V2,
+        RESHAPED_ONLY_RHS
+    };
+
+    // TODO (COMPMID-2095)
+    static GEMMType select_gemm_type(unsigned int m, unsigned int n, unsigned int k, DataType data_type, bool reshape_b_only_on_first_run, GPUTarget gpu_target);
+
+    void configure_native(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    void configure_reshaped_v1(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    void configure_reshaped_v2(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    void configure_reshaped_only_rhs(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
+
+    static Status validate_native(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    static Status validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    static Status validate_reshaped_v2(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
+    static Status validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
+
+    CLMemoryGroup                             _memory_group;
+    CLGEMMMatrixMultiplyKernel                _mm_kernel;
+    CLGEMMMatrixAdditionKernel                _ma_kernel;
+    CLGEMMReshapeLHSMatrixKernel              _reshape_lhs_kernel;
+    CLGEMMReshapeRHSMatrixKernel              _reshape_rhs_kernel;
+    CLGEMMMatrixMultiplyReshapedKernel        _mm_reshaped_kernel;
+    CLGEMMMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
+    CLTensor                                  _tmp_a;
+    CLTensor                                  _tmp_b;
+    const ICLTensor                          *_original_b;
+    bool                                      _run_addition;
+    bool                                      _reshape_b_only_on_first_run;
+    bool                                      _is_prepared;
+    GEMMType                                  _gemm_type;
 };
 } // namespace arm_compute
 
diff --git a/arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfiguration.h b/arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfiguration.h
deleted file mode 100644
index 3458911..0000000
--- a/arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfiguration.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLGEMMRESHAPEDCONFIGURATION_H__
-#define __ARM_COMPUTE_CLGEMMRESHAPEDCONFIGURATION_H__
-
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/ICLGEMMReshapedConfiguration.h"
-#include "arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfigurationBifrost.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Tuner factory class */
-class CLGEMMReshapedConfigurationFactory final
-{
-public:
-    static std::unique_ptr<ICLGEMMReshapedConfiguration> create()
-    {
-        GPUTarget arch = get_arch_from_target(CLScheduler::get().target());
-
-        switch(arch)
-        {
-            case GPUTarget::BIFROST:
-                return support::cpp14::make_unique<CLGEMMReshapedConfigurationBifrost>();
-            default:
-                return nullptr;
-        }
-    }
-};
-} // namespace tuners
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLGEMMRESHAPEDCONFIGURATION_H__ */
diff --git a/arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfigurationBifrost.h b/arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfigurationBifrost.h
deleted file mode 100644
index c452e15..0000000
--- a/arm_compute/runtime/CL/gemm_reshaped/CLGEMMReshapedConfigurationBifrost.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLGEMMRESHAPEDCONFIGURATIONBIFROST_H__
-#define __ARM_COMPUTE_CLGEMMRESHAPEDCONFIGURATIONBIFROST_H__
-
-#include "arm_compute/runtime/CL/ICLGEMMReshapedConfiguration.h"
-
-namespace arm_compute
-{
-namespace cl_gemm
-{
-/** Bifrost based OpenCL GEMM reshaped configuration */
-class CLGEMMReshapedConfigurationBifrost final : public ICLGEMMReshapedConfiguration
-{
-public:
-    // Inherited overridden method
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure(unsigned int m, unsigned int n, unsigned int k, unsigned int b, DataType data_type) override;
-
-private:
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_f32(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G7x_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-    std::pair<GEMMLHSMatrixInfo, GEMMRHSMatrixInfo> configure_G76_u8(unsigned int m, unsigned int n, unsigned int k, unsigned int b);
-};
-} // namespace cl_gemm
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLGEMMRESHAPEDCONFIGURATIONBIFROST_H__ */