Adding more performance metrics

* Implemented CLTuning flow for ExecuteNetwork tests
  * Added --tuning-path to specify tuning file to use/create
  * Added --tuning-level to specify tuning level to use as well as enable extra tuning run to generate the tuning file
* Fixed issue where TuningLevel was being parsed incorrectly
* Added measurements for initialization, network parsing, network optimization, tuning, and shutdown
* Added flag to control number of iterations inference is run for

Signed-off-by: alered01 <Alex.Redshaw@arm.com>
Change-Id: Ic739ff26e136e32aff9f0995217c1c3207008ca4
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index 57b8692..66d8e13 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -35,6 +35,10 @@
     uint32_t counterCapturePeriod;
     std::string fileFormat;
 
+    size_t iterations = 1;
+    int tuningLevel = 0;
+    std::string tuningPath;
+
     double thresholdTime = 0.0;
 
     size_t subgraphId = 0;
@@ -121,6 +125,14 @@
              "If profiling is enabled in 'file-only' mode this is the capture period that will be used in the test")
             ("file-format", po::value(&fileFormat)->default_value("binary"),
              "If profiling is enabled specifies the output file format")
+            ("iterations", po::value<size_t>(&iterations)->default_value(1),
+             "Number of iterations to run the network for, default is set to 1")
+            ("tuning-path", po::value(&tuningPath),
+            "Path to tuning file. Enables use of CL tuning")
+            ("tuning-level", po::value<int>(&tuningLevel)->default_value(0),
+             "Sets the tuning level which enables a tuning run which will update/create a tuning file. "
+             "Available options are: 1 (Rapid), 2 (Normal), 3 (Exhaustive). "
+             "Requires tuning-path to be set, default is set to 0 (No tuning run)")
             ("parse-unsupported", po::bool_switch()->default_value(false),
                 "Add unsupported operators as stand-in layers (where supported by parser)");
     }
@@ -275,6 +287,33 @@
         // Remove duplicates from the list of compute devices.
         RemoveDuplicateDevices(computeDevices);
 
+#if defined(ARMCOMPUTECL_ENABLED)
+        std::shared_ptr<armnn::IGpuAccTunedParameters> tuned_params;
+
+        if (tuningPath != "")
+        {
+            if (tuningLevel != 0)
+            {
+                RunCLTuning(tuningPath, tuningLevel, modelFormat, inputTensorShapes, computeDevices,
+                    dynamicBackendsPath, modelPath, inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput,
+                    outputTypes, outputNames, outputTensorFiles, dequantizeOutput, enableProfiling,
+                    enableFp16TurboMode, enableBf16TurboMode, thresholdTime, printIntermediate, subgraphId,
+                    enableLayerDetails, parseUnsupported);
+            }
+            ARMNN_LOG(info) << "Using tuning params: " << tuningPath << "\n";
+            options.m_BackendOptions.emplace_back(
+                armnn::BackendOptions
+                {
+                    "GpuAcc",
+                    {
+                        {"TuningLevel", 0},
+                        {"TuningFile", tuningPath.c_str()},
+                        {"KernelProfilingEnabled", enableProfiling}
+                    }
+                }
+            );
+        }
+#endif
         try
         {
             CheckOptionDependencies(vm);
@@ -288,9 +327,9 @@
         // Create runtime
         std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(options));
 
-        return RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath, inputNames,
-                       inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames,
-                       outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
-                       thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, runtime);
+        return RunTest(modelFormat, inputTensorShapes, computeDevices, dynamicBackendsPath, modelPath,
+            inputNames, inputTensorDataFilePaths, inputTypes, quantizeInput, outputTypes, outputNames,
+            outputTensorFiles, dequantizeOutput, enableProfiling, enableFp16TurboMode, enableBf16TurboMode,
+            thresholdTime, printIntermediate, subgraphId, enableLayerDetails, parseUnsupported, iterations, runtime);
     }
 }