Minor improvement of inference profiling

* Start inference profiling at the actual beginning
* Add profiling events for EnqueueInputs and EnqueueOutputs
* Add profiling event for working memory allocation
* Refactor Execute body to remove code duplication
* forward arguments to constructors rather than copy

Change-Id: Iacab85f0a02e88e2423885f86f97e4dba4037319
Signed-off-by: Derek Lamberti <derek.lamberti@arm.com>
diff --git a/src/armnn/Profiling.hpp b/src/armnn/Profiling.hpp
index 08d7f7b..08e55a1 100644
--- a/src/armnn/Profiling.hpp
+++ b/src/armnn/Profiling.hpp
@@ -115,7 +115,7 @@
     using InstrumentPtr = std::unique_ptr<Instrument>;
 
     template<typename... Args>
-    ScopedProfilingEvent(const BackendId& backendId, const std::string& name, Args... args)
+    ScopedProfilingEvent(const BackendId& backendId, const std::string& name, Args&&... args)
         : m_Event(nullptr)
         , m_Profiler(ProfilerManager::GetInstance().GetProfiler())
     {
@@ -123,7 +123,7 @@
         {
             std::vector<InstrumentPtr> instruments(0);
             instruments.reserve(sizeof...(args)); //One allocation
-            ConstructNextInVector(instruments, args...);
+            ConstructNextInVector(instruments, std::forward<Args>(args)...);
             m_Event = m_Profiler->BeginEvent(backendId, name, std::move(instruments));
         }
     }
@@ -144,10 +144,10 @@
     }
 
     template<typename Arg, typename... Args>
-    void ConstructNextInVector(std::vector<InstrumentPtr>& instruments, Arg arg, Args... args)
+    void ConstructNextInVector(std::vector<InstrumentPtr>& instruments, Arg&& arg, Args&&... args)
     {
-        instruments.emplace_back(std::make_unique<Arg>(arg));
-        ConstructNextInVector(instruments, args...);
+        instruments.emplace_back(std::make_unique<Arg>(std::forward<Arg>(arg)));
+        ConstructNextInVector(instruments, std::forward<Args>(args)...);
     }
 
     Event* m_Event;       ///< Event to track