COMPMID-2096: Refactor the CLGEMMLowp function selection (heuristic)

Change-Id: I15a8b39e0354d3b6686ed4cc8c361782c0512037
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1410
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: VidhyaSudhan Loganathan <vidhyasudhan.loganathan@arm.com>
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index a07101c..541985b 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -25,6 +25,7 @@
 #define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H__
 
 #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
@@ -100,7 +101,8 @@
 
 private:
     CLMemoryGroup                                 _memory_group;
-    CLGEMMLowpMatrixMultiplyKernel                _mm_kernel;
+    CLGEMMLowpMatrixMultiplyKernel                _mm_midgard_kernel;
+    CLGEMMLowpMatrixMultiplyNativeKernel          _mm_native_kernel;
     CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
     CLGEMMReshapeRHSMatrixKernel                  _mtx_b_reshape_kernel;
     CLGEMMLowpMatrixAReductionKernel              _mtx_a_reduction_kernel;
@@ -115,6 +117,7 @@
     int32_t                                       _a_offset;
     int32_t                                       _b_offset;
     bool                                          _is_gemm_reshaped;
+    bool                                          _is_midgard;
     bool                                          _reshape_b_only_on_first_run;
     bool                                          _is_prepared;
     bool                                          _fuse_output_stage;