Port the ClGemmLowp kernels to the new API

Ported kernels:
 - CLGEMMLowpMatrixMultiplyNativeKernel
 - CLGEMMLowpMatrixMultiplyReshapedKernel
 - CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
 - CLGEMMLowpOffsetContributionKernel
 - CLGEMMLowpOffsetContributionOutputStageKernel
 - CLGEMMLowpQuantizeDownInt32ScaleByFixedPointKernel
 - CLGEMMLowpQuantizeDownInt32ScaleByFloatKernel
 - CLGEMMLowpQuantizeDownInt32ScaleKernel

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I9d5a744d6a2dd2f2726fdfb291bad000b6970de2
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5870
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp
index 3d3f7fe..3c8976f 100644
--- a/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp
+++ b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp
@@ -33,7 +33,7 @@
 #include "arm_compute/runtime/CL/CLTuner.h"
 #include "examples/gemm_tuner/CommonGemmExampleOptions.h"
 #include "examples/gemm_tuner/GemmTunerHelpers.h"
-#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
+#include "src/core/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.h"
 #include "src/core/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
 #include "tests/CL/Helper.h"
 #include "utils/Utils.h"
@@ -168,8 +168,8 @@
 
 } // namespace
 
-using CLGEMMReshapeLHSMatrix           = test::CLSynthetizeOperator<ClGemmReshapeLhsMatrixKernel>;
-using CLGEMMLowpMatrixMultiplyReshaped = test::CLSynthetizeFunction<CLGEMMLowpMatrixMultiplyReshapedKernel>;
+using ClGemmReshapeLHSMatrix           = test::CLSynthetizeOperator<ClGemmReshapeLhsMatrixKernel>;
+using ClGemmLowpMatrixMultiplyReshaped = test::CLSynthetizeOperator<ClGemmLowpMatrixMultiplyReshapedKernel>;
 
 class CLGEMMLowpMatrixMultiplyReshapedExample : public Example
 {
@@ -269,20 +269,20 @@
         // Validate argments
         if(!reshape_lhs.validate(lhs.info(), lhs_reshaped.info(), lhs_info, gemm_info.reinterpret_input_as_3d()))
         {
-            std::cerr << "Invalid arguments for CLGEMMReshapeLHSMatrixKernel." << std::endl;
+            std::cerr << "Invalid arguments for ClGemmReshapeLHSMatrixKernel." << std::endl;
             return false;
         }
 
         if(!gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, gemm_info))
         {
-            std::cerr << "Invalid arguments for CLGEMMLowpMatrixMultiplyReshapedKernel." << std::endl;
+            std::cerr << "Invalid arguments for ClGemmLowpMatrixMultiplyReshapedKernel." << std::endl;
             return false;
         }
 
         // Configure functions
         reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
 
-        gemm.configure(&lhs_reshaped, &rhs_reshaped, &dst, lhs_info, rhs_info, gemm_info);
+        gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, gemm_info);
 
         // Allocate tensors
         lhs.allocator()->allocate();
@@ -298,7 +298,8 @@
         ITensorPack reshape_lsh_pack({ { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } });
         reshape_lhs.run(reshape_lsh_pack);
 
-        gemm.run();
+        ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
+        gemm.run(gemm_pack);
 
         // Make sure all the OpenCL jobs are done executing:
         CLScheduler::get().sync();
@@ -315,8 +316,8 @@
     CLTensor                         rhs_reshaped{};
     CLTensor                         dst{};
     CLTuner                          tuner{};
-    CLGEMMReshapeLHSMatrix           reshape_lhs{};
-    CLGEMMLowpMatrixMultiplyReshaped gemm{};
+    ClGemmReshapeLHSMatrix           reshape_lhs{};
+    ClGemmLowpMatrixMultiplyReshaped gemm{};
 };
 
 /** Main test program for gemmlowp reshaped