Integrate MLGO into CLGEMM and CLGEMMLowpMatrixMultiplyCore: Part2

* Associate CLScheduler with CLGEMMHeuristicsHandle

* Add option in arm_compute_validation for mlgo file path

* Extend logging for the selection of gemm configurations

Resolves: COMPMID-3843, COMPMID-3844

Signed-off-by: SiCong Li <sicong.li@arm.com>
Change-Id: I869c3a4122414ae6a7bbd721966c1da37621ca11
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5002
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/runtime/CL/CLScheduler.cpp b/src/runtime/CL/CLScheduler.cpp
index 6fc7bae..ef5cb03 100644
--- a/src/runtime/CL/CLScheduler.cpp
+++ b/src/runtime/CL/CLScheduler.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,6 +49,11 @@
     return _target;
 }
 
+CLGEMMHeuristicsHandle *CLScheduler::gemm_heuristics() const
+{
+    return _gemm_heuristics;
+}
+
 void CLScheduler::set_queue(cl::CommandQueue queue)
 {
     _queue = std::move(queue);
@@ -92,7 +97,7 @@
 std::once_flag CLScheduler::_initialize_symbols;
 
 CLScheduler::CLScheduler()
-    : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false), _cl_tuner(nullptr), _cl_default_static_tuner(nullptr)
+    : _context(), _queue(), _target(GPUTarget::MIDGARD), _is_initialised(false), _cl_tuner(nullptr), _cl_default_static_tuner(nullptr), _gemm_heuristics(nullptr)
 {
 }
 
@@ -103,20 +108,20 @@
     return scheduler;
 }
 
-void CLScheduler::default_init_with_context(cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner)
+void CLScheduler::default_init_with_context(cl::Device &device, cl::Context &ctx, ICLTuner *cl_tuner, CLGEMMHeuristicsHandle *gemm_h)
 {
     if(!_is_initialised)
     {
         const std::string cl_kernels_folder("./cl_kernels/");
         cl::CommandQueue  queue = cl::CommandQueue(ctx, device);
         CLKernelLibrary::get().init(cl_kernels_folder, ctx, device);
-        init(ctx, queue, device, cl_tuner);
+        init(ctx, queue, device, cl_tuner, gemm_h);
         _cl_default_static_tuner = tuners::TunerFactory::create_tuner(_target);
         _cl_tuner                = (cl_tuner == nullptr) ? _cl_default_static_tuner.get() : cl_tuner;
     }
 }
 
-void CLScheduler::default_init(ICLTuner *cl_tuner)
+void CLScheduler::default_init(ICLTuner *cl_tuner, CLGEMMHeuristicsHandle *gemm_h)
 {
     if(!_is_initialised)
     {
@@ -127,7 +132,7 @@
         ARM_COMPUTE_ERROR_ON_MSG(err != CL_SUCCESS, "Failed to create OpenCL context");
         cl::CommandQueue queue = cl::CommandQueue(ctx, dev);
         CLKernelLibrary::get().init("./cl_kernels/", ctx, dev);
-        init(ctx, queue, dev, cl_tuner);
+        init(ctx, queue, dev, cl_tuner, gemm_h);
         // Create a default static tuner and set if none was provided
         _cl_default_static_tuner = tuners::TunerFactory::create_tuner(_target);
     }
@@ -142,13 +147,14 @@
     CLKernelLibrary::get().set_context(_context);
 }
 
-void CLScheduler::init(cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner)
+void CLScheduler::init(cl::Context context, cl::CommandQueue queue, const cl::Device &device, ICLTuner *cl_tuner, CLGEMMHeuristicsHandle *gemm_h)
 {
     set_context(std::move(context));
-    _queue          = std::move(queue);
-    _target         = get_target_from_device(device);
-    _is_initialised = true;
-    _cl_tuner       = cl_tuner;
+    _queue           = std::move(queue);
+    _target          = get_target_from_device(device);
+    _is_initialised  = true;
+    _cl_tuner        = cl_tuner;
+    _gemm_heuristics = gemm_h;
 }
 
 void CLScheduler::enqueue_common(ICLKernel &kernel, ITensorPack &tensors, bool flush)