COMPMID-997: Add support for node's name in GraphAPI.

Change-Id: I0ca02e42807c1ad9afeffb7202a3556feb11442f
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/129701
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
diff --git a/tests/SConscript b/tests/SConscript
index 7aa4d93..6a6ee62 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -60,12 +60,13 @@
 if env['os'] in ['android', 'bare_metal'] or env['standalone']:
     Import("arm_compute_a")
     Import("arm_compute_core_a")
-    test_env.Append(LIBS = [arm_compute_a, arm_compute_core_a])
-    arm_compute_lib = arm_compute_a
+    Import("arm_compute_graph_a")
+    test_env.Append(LIBS = [arm_compute_graph_a, arm_compute_a, arm_compute_core_a])
+    arm_compute_lib = arm_compute_graph_a
 else:
-    Import("arm_compute_so")
-    test_env.Append(LIBS = ["arm_compute", "arm_compute_core"])
-    arm_compute_lib = arm_compute_so
+    Import("arm_compute_graph_so")
+    test_env.Append(LIBS = ["arm_compute_graph", "arm_compute", "arm_compute_core"])
+    arm_compute_lib = arm_compute_graph_so
 
 #FIXME Delete before release
 if env['internal_only']:
@@ -158,24 +159,15 @@
                 cl_examples += [ test_env.Program(example, [ test_env.Object(source=file, target=example) ] + files_validate_examples, LIBS = test_env["LIBS"]) ]
             arm_compute_validate_examples += cl_examples
             if test_env['opencl'] and test_env['neon']:
-                if env['os'] in ['android', 'bare_metal'] or env['standalone']:
-                    Import('arm_compute_graph_a')
-                    graph_dependency = [ arm_compute_graph_a]
-                else:
-                    Import('arm_compute_graph_so')
-                    graph_dependency = [ arm_compute_graph_so]
-
                 graph_utils = test_env.Object(source="../utils/GraphUtils.cpp", target="GraphUtils")
                 for file in Glob("validate_examples/graph_*.cpp"):
                     example = "validate_" + os.path.basename(os.path.splitext(str(file))[0])
                     if env['os'] in ['android', 'bare_metal'] or env['standalone']:
-                        prog = test_env.Program(example, [ test_env.Object(source=file, target=example), graph_utils]+ files_validate_examples, LIBS = test_env["LIBS"], LINKFLAGS=test_env["LINKFLAGS"]+['-Wl,--whole-archive',graph_dependency,'-Wl,--no-whole-archive'])
-                        Depends(prog, graph_dependency)
+                        prog = test_env.Program(example, [ test_env.Object(source=file, target=example), graph_utils]+ files_validate_examples, LIBS = test_env["LIBS"], LINKFLAGS=test_env["LINKFLAGS"]+['-Wl,--whole-archive',arm_compute_lib,'-Wl,--no-whole-archive'])
                         arm_compute_validate_examples += [ prog ]
                     else:
                         #-Wl,--allow-shlib-undefined: Ignore dependencies of dependencies
                         prog = test_env.Program(example, [ test_env.Object(source=file, target=example), graph_utils]+ files_validate_examples, LIBS = test_env["LIBS"] + ["arm_compute_graph"], LINKFLAGS=test_env["LINKFLAGS"]+['-Wl,--allow-shlib-undefined'] )
-                        Depends(prog, graph_dependency)
                         arm_compute_validate_examples += [ prog ]
         Depends(arm_compute_validate_examples, arm_compute_test_framework)
         Depends(arm_compute_validate_examples, arm_compute_lib)
@@ -201,23 +193,15 @@
         arm_compute_benchmark_examples += cl_examples
 
     # Graph examples
-    if env['os'] in ['android', 'bare_metal'] or env['standalone']:
-        Import('arm_compute_graph_a')
-        graph_dependency = [arm_compute_graph_a]
-    else:
-        Import('arm_compute_graph_so')
-        graph_dependency = [arm_compute_graph_so]
     graph_utils = test_env.Object(source="../utils/GraphUtils.cpp", target="GraphUtils")
     for file in Glob("../examples/graph_*.cpp"):
         example = "benchmark_" + os.path.basename(os.path.splitext(str(file))[0])
         if env['os'] in ['android', 'bare_metal'] or env['standalone']:
-            prog = test_env.Program(example, [ test_env.Object(source=file, target=example), graph_utils]+ files_benchmark_examples, LIBS = test_env["LIBS"], LINKFLAGS=test_env["LINKFLAGS"]+['-Wl,--whole-archive',graph_dependency,'-Wl,--no-whole-archive'])
-            Depends(prog, [graph_dependency])
+            prog = test_env.Program(example, [ test_env.Object(source=file, target=example), graph_utils]+ files_benchmark_examples, LIBS = test_env["LIBS"], LINKFLAGS=test_env["LINKFLAGS"]+['-Wl,--whole-archive',arm_compute_lib,'-Wl,--no-whole-archive'])
             arm_compute_benchmark_examples += [ prog ]
         else:
             #-Wl,--allow-shlib-undefined: Ignore dependencies of dependencies
             prog = test_env.Program(example, [ test_env.Object(source=file, target=example), graph_utils]+ files_benchmark_examples, LIBS = test_env["LIBS"] + ["arm_compute_graph"], LINKFLAGS=test_env["LINKFLAGS"]+['-Wl,--allow-shlib-undefined'] )
-            Depends(prog, graph_dependency)
             arm_compute_benchmark_examples += [ prog ]
     Depends(arm_compute_benchmark_examples, arm_compute_test_framework)
     Depends(arm_compute_benchmark_examples, arm_compute_lib)
diff --git a/tests/framework/Profiler.cpp b/tests/framework/Profiler.cpp
index 69ea527..7b95279 100644
--- a/tests/framework/Profiler.cpp
+++ b/tests/framework/Profiler.cpp
@@ -55,9 +55,9 @@
 
 void Profiler::stop()
 {
-    for(auto &instrument : _instruments)
+    for(auto instrument = _instruments.rbegin(); instrument != _instruments.rend(); instrument++)
     {
-        instrument->stop();
+        (*instrument)->stop();
     }
     for(const auto &instrument : _instruments)
     {
@@ -70,9 +70,9 @@
 
 void Profiler::test_stop()
 {
-    for(auto &instrument : _instruments)
+    for(auto instrument = _instruments.rbegin(); instrument != _instruments.rend(); instrument++)
     {
-        instrument->test_stop();
+        (*instrument)->test_stop();
     }
 
     for(const auto &instrument : _instruments)
diff --git a/tests/framework/instruments/OpenCLTimer.cpp b/tests/framework/instruments/OpenCLTimer.cpp
index d9d16bc..4af6dae 100644
--- a/tests/framework/instruments/OpenCLTimer.cpp
+++ b/tests/framework/instruments/OpenCLTimer.cpp
@@ -26,6 +26,7 @@
 #include "../Framework.h"
 #include "../Utils.h"
 
+#include "arm_compute/graph/INode.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 
 #ifndef ARM_COMPUTE_CL
@@ -44,7 +45,7 @@
 }
 
 OpenCLTimer::OpenCLTimer(ScaleFactor scale_factor)
-    : real_function(CLSymbols::get().clEnqueueNDRangeKernel_ptr)
+    : _kernels(), _real_function(nullptr), _real_graph_function(nullptr), _prefix()
 {
     auto                        q     = CLScheduler::get().queue();
     cl_command_queue_properties props = q.getInfo<CL_QUEUE_PROPERTIES>();
@@ -76,20 +77,23 @@
     }
 }
 
-void OpenCLTimer::start()
+void OpenCLTimer::test_start()
 {
-    kernels.clear();
     // Start intercepting enqueues:
-    auto interceptor = [this](
-                           cl_command_queue command_queue,
-                           cl_kernel        kernel,
-                           cl_uint          work_dim,
-                           const size_t    *gwo,
-                           const size_t    *gws,
-                           const size_t    *lws,
-                           cl_uint          num_events_in_wait_list,
-                           const cl_event * event_wait_list,
-                           cl_event *       event)
+    ARM_COMPUTE_ERROR_ON(_real_function != nullptr);
+    ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
+    _real_function       = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
+    _real_graph_function = graph::TaskExecutor::get().execute_function;
+    auto interceptor     = [this](
+                               cl_command_queue command_queue,
+                               cl_kernel        kernel,
+                               cl_uint          work_dim,
+                               const size_t    *gwo,
+                               const size_t    *gws,
+                               const size_t    *lws,
+                               cl_uint          num_events_in_wait_list,
+                               const cl_event * event_wait_list,
+                               cl_event *       event)
     {
         ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported");
         ARM_COMPUTE_UNUSED(event);
@@ -97,7 +101,7 @@
         OpenCLTimer::kernel_info info;
         cl::Kernel               cpp_kernel(kernel, true);
         std::stringstream        ss;
-        ss << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
+        ss << this->_prefix << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
         if(gws != nullptr)
         {
             ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]";
@@ -108,26 +112,50 @@
         }
         info.name = ss.str();
         cl_event tmp;
-        cl_int   retval = this->real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
+        cl_int   retval = this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
         info.event      = tmp;
-        this->kernels.push_back(std::move(info));
+        this->_kernels.push_back(std::move(info));
         return retval;
     };
 
+    // Start intercepting tasks:
+    auto task_interceptor = [this](graph::ExecutionTask & task)
+    {
+        if(task.node != nullptr && !task.node->name().empty())
+        {
+            this->_prefix = task.node->name() + "/";
+        }
+        else
+        {
+            this->_prefix = "";
+        }
+        this->_real_graph_function(task);
+        this->_prefix = "";
+    };
+
     CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
+    graph::TaskExecutor::get().execute_function = task_interceptor;
 }
 
-void OpenCLTimer::stop()
+void OpenCLTimer::start()
+{
+    _kernels.clear();
+}
+
+void OpenCLTimer::test_stop()
 {
     // Restore real function
-    CLSymbols::get().clEnqueueNDRangeKernel_ptr = real_function;
+    CLSymbols::get().clEnqueueNDRangeKernel_ptr = _real_function;
+    graph::TaskExecutor::get().execute_function = _real_graph_function;
+    _real_graph_function                        = nullptr;
+    _real_function                              = nullptr;
 }
 
 Instrument::MeasurementsMap OpenCLTimer::measurements() const
 {
     MeasurementsMap measurements;
     unsigned int    kernel_number = 0;
-    for(auto kernel : kernels)
+    for(auto kernel : _kernels)
     {
         cl_ulong start = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
         cl_ulong end   = kernel.event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
diff --git a/tests/framework/instruments/OpenCLTimer.h b/tests/framework/instruments/OpenCLTimer.h
index 4457878..059f449 100644
--- a/tests/framework/instruments/OpenCLTimer.h
+++ b/tests/framework/instruments/OpenCLTimer.h
@@ -30,6 +30,8 @@
 #include "arm_compute/core/CL/OpenCL.h"
 #endif /* ARM_COMPUTE_CL */
 
+#include "arm_compute/graph/Workload.h"
+
 #include <list>
 
 namespace arm_compute
@@ -48,17 +50,22 @@
      */
     OpenCLTimer(ScaleFactor scale_factor);
     std::string     id() const override;
+    void            test_start() override;
     void            start() override;
-    void            stop() override;
+    void            test_stop() override;
     MeasurementsMap measurements() const override;
+
+private:
 #ifdef ARM_COMPUTE_CL
     struct kernel_info
     {
         cl::Event   event{}; /**< OpenCL event associated to the kernel enqueue */
         std::string name{};  /**< OpenCL Kernel name */
     };
-    std::list<kernel_info>                          kernels{};
-    std::function<decltype(clEnqueueNDRangeKernel)> real_function;
+    std::list<kernel_info>                          _kernels;
+    std::function<decltype(clEnqueueNDRangeKernel)> _real_function;
+    std::function<decltype(graph::execute_task)>    _real_graph_function;
+    std::string                                     _prefix;
 #endif /* ARM_COMPUTE_CL */
 
 private:
diff --git a/tests/framework/instruments/SchedulerTimer.cpp b/tests/framework/instruments/SchedulerTimer.cpp
index e42cebd..1b37b18 100644
--- a/tests/framework/instruments/SchedulerTimer.cpp
+++ b/tests/framework/instruments/SchedulerTimer.cpp
@@ -25,6 +25,8 @@
 
 #include "WallClockTimer.h"
 #include "arm_compute/core/CPP/ICPPKernel.h"
+#include "arm_compute/core/utils/misc/Cast.h"
+#include "arm_compute/graph/INode.h"
 
 namespace arm_compute
 {
@@ -42,7 +44,7 @@
 public:
     /** Default constructor. */
     Interceptor(std::list<SchedulerTimer::kernel_info> &kernels, IScheduler &real_scheduler, ScaleFactor scale_factor)
-        : _kernels(kernels), _real_scheduler(real_scheduler), _timer(scale_factor)
+        : _kernels(kernels), _real_scheduler(real_scheduler), _timer(scale_factor), _prefix()
     {
     }
 
@@ -56,6 +58,11 @@
         return _real_scheduler.num_threads();
     }
 
+    void set_prefix(std::string prefix)
+    {
+        _prefix = std::move(prefix);
+    }
+
     void schedule(ICPPKernel *kernel, unsigned int split_dimension) override
     {
         _timer.start();
@@ -64,6 +71,7 @@
 
         SchedulerTimer::kernel_info info;
         info.name         = kernel->name();
+        info.prefix       = _prefix;
         info.measurements = _timer.measurements();
         _kernels.push_back(std::move(info));
     }
@@ -72,32 +80,68 @@
     std::list<SchedulerTimer::kernel_info> &_kernels;
     IScheduler                             &_real_scheduler;
     WallClockTimer                          _timer;
+    std::string                             _prefix;
 };
 
 SchedulerTimer::SchedulerTimer(ScaleFactor scale_factor)
-    : _kernels(), _real_scheduler(nullptr), _real_scheduler_type(), _scale_factor(scale_factor)
+    : _kernels(), _real_scheduler(nullptr), _real_scheduler_type(), _real_graph_function(nullptr), _scale_factor(scale_factor), _interceptor(nullptr)
 {
 }
 
-void SchedulerTimer::start()
+void SchedulerTimer::test_start()
 {
+    // Start intercepting tasks:
+    ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
+    _real_graph_function  = graph::TaskExecutor::get().execute_function;
+    auto task_interceptor = [this](graph::ExecutionTask & task)
+    {
+        Interceptor *scheduler = nullptr;
+        if(dynamic_cast<Interceptor *>(this->_interceptor.get()) != nullptr)
+        {
+            scheduler = arm_compute::utils::cast::polymorphic_downcast<Interceptor *>(_interceptor.get());
+            if(task.node != nullptr && !task.node->name().empty())
+            {
+                scheduler->set_prefix(task.node->name() + "/");
+            }
+            else
+            {
+                scheduler->set_prefix("");
+            }
+        }
+
+        this->_real_graph_function(task);
+
+        if(scheduler != nullptr)
+        {
+            scheduler->set_prefix("");
+        }
+    };
+
     ARM_COMPUTE_ERROR_ON(_real_scheduler != nullptr);
     _real_scheduler_type = Scheduler::get_type();
     //Note: We can't currently replace a custom scheduler
     if(_real_scheduler_type != Scheduler::Type::CUSTOM)
     {
-        _real_scheduler  = &Scheduler::get();
-        auto interceptor = std::make_shared<Interceptor>(_kernels, *_real_scheduler, _scale_factor);
-        Scheduler::set(std::static_pointer_cast<IScheduler>(interceptor));
+        _real_scheduler = &Scheduler::get();
+        _interceptor    = std::make_shared<Interceptor>(_kernels, *_real_scheduler, _scale_factor);
+        Scheduler::set(std::static_pointer_cast<IScheduler>(_interceptor));
+        graph::TaskExecutor::get().execute_function = task_interceptor;
     }
+}
+
+void SchedulerTimer::start()
+{
     _kernels.clear();
 }
 
-void SchedulerTimer::stop()
+void SchedulerTimer::test_stop()
 {
     // Restore real scheduler
     Scheduler::set(_real_scheduler_type);
-    _real_scheduler = nullptr;
+    _real_scheduler                             = nullptr;
+    _interceptor                                = nullptr;
+    graph::TaskExecutor::get().execute_function = _real_graph_function;
+    _real_graph_function                        = nullptr;
 }
 
 Instrument::MeasurementsMap SchedulerTimer::measurements() const
@@ -106,7 +150,7 @@
     unsigned int    kernel_number = 0;
     for(auto kernel : _kernels)
     {
-        measurements.emplace(kernel.name + " #" + support::cpp11::to_string(kernel_number++), kernel.measurements.begin()->second);
+        measurements.emplace(kernel.prefix + kernel.name + " #" + support::cpp11::to_string(kernel_number++), kernel.measurements.begin()->second);
     }
 
     return measurements;
diff --git a/tests/framework/instruments/SchedulerTimer.h b/tests/framework/instruments/SchedulerTimer.h
index ec282cc..55d5f25 100644
--- a/tests/framework/instruments/SchedulerTimer.h
+++ b/tests/framework/instruments/SchedulerTimer.h
@@ -25,7 +25,9 @@
 #define ARM_COMPUTE_TEST_SCHEDULER_TIMER
 
 #include "Instrument.h"
+#include "arm_compute/graph/Workload.h"
 #include "arm_compute/runtime/Scheduler.h"
+
 #include <list>
 
 namespace arm_compute
@@ -50,8 +52,9 @@
     SchedulerTimer &operator=(const SchedulerTimer &) = delete;
 
     std::string                 id() const override;
+    void                        test_start() override;
     void                        start() override;
-    void                        stop() override;
+    void                        test_stop() override;
     Instrument::MeasurementsMap measurements() const override;
 
     /** Kernel information */
@@ -59,13 +62,16 @@
     {
         Instrument::MeasurementsMap measurements{}; /**< Time it took the kernel to run */
         std::string                 name{};         /**< Kernel name */
+        std::string                 prefix{};       /**< Kernel prefix */
     };
 
 private:
-    std::list<kernel_info> _kernels;
-    IScheduler            *_real_scheduler;
-    Scheduler::Type        _real_scheduler_type;
-    ScaleFactor            _scale_factor;
+    std::list<kernel_info>                       _kernels;
+    IScheduler                                  *_real_scheduler;
+    Scheduler::Type                              _real_scheduler_type;
+    std::function<decltype(graph::execute_task)> _real_graph_function;
+    ScaleFactor                                  _scale_factor;
+    std::shared_ptr<IScheduler>                  _interceptor;
 };
 } // namespace framework
 } // namespace test
diff --git a/tests/framework/printers/PrettyPrinter.cpp b/tests/framework/printers/PrettyPrinter.cpp
index ef8f91a..3181951 100644
--- a/tests/framework/printers/PrettyPrinter.cpp
+++ b/tests/framework/printers/PrettyPrinter.cpp
@@ -129,8 +129,8 @@
         if(instrument.second.size() > 1)
         {
             *_stream << ", STDDEV=" << arithmetic_to_string(stats.relative_standard_deviation(), 2) << " %";
-            *_stream << ", MIN=" << stats.min() << ", ";
-            *_stream << ", MAX=" << stats.max() << ", ";
+            *_stream << ", MIN=" << stats.min();
+            *_stream << ", MAX=" << stats.max();
             *_stream << ", MEDIAN=" << stats.median().value() << " " << stats.median().unit();
         }
         *_stream << end_color() << "\n";