This application allow to benchmark tflite models by providing average inference time.

Usage: armnn_tfl_benchmark -m <model .tflite>
-m --model_file <.tflite file path>:  .tflite model to be executed
-b --backend <device>:                preferred backend device to run
                                      layers on by default. Possible
                                      choices: CpuAcc, CpuRef
-l --loops <int>:                     provide the number of time the
                                      inference will be executed
                                      (by default nb_loops=1)

Signed-off-by: Vincent ABRIOU <vincent.abriou@st.com>
Signed-off-by: Jim Flynn <jim.flynn@arm.com>
Change-Id: Ia26fafd4f382f0ad03856436dcae6e71b5abbd26
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index edea34d..135f649 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -137,6 +137,10 @@
         addDllCopyCommands(${testName})
     endmacro()
 
+    set(TfLiteBenchmark-Armnn_sources
+        TfLiteBenchmark-Armnn/TfLiteBenchmark-Armnn.cpp)
+    TfLiteParserTest(TfLiteBenchmark-Armnn "${TfLiteBenchmark-Armnn_sources}")
+
     set(TfLiteMobilenetQuantized-Armnn_sources
         TfLiteMobilenetQuantized-Armnn/TfLiteMobilenetQuantized-Armnn.cpp
         ImagePreprocessor.hpp
diff --git a/tests/TfLiteBenchmark-Armnn/TfLiteBenchmark-Armnn.cpp b/tests/TfLiteBenchmark-Armnn/TfLiteBenchmark-Armnn.cpp
new file mode 100644
index 0000000..a010717
--- /dev/null
+++ b/tests/TfLiteBenchmark-Armnn/TfLiteBenchmark-Armnn.cpp
@@ -0,0 +1,232 @@
+//
+// Copyright © 2020 STMicroelectronics and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <algorithm>
+#include <getopt.h>
+#include <numeric>
+#include <signal.h>
+#include <string>
+#include <sys/time.h>
+#include <vector>
+
+#include <armnn/BackendId.hpp>
+#include <armnn/BackendRegistry.hpp>
+#include <armnn/IRuntime.hpp>
+#include <armnn/utility/NumericCast.hpp>
+#include <armnnTfLiteParser/ITfLiteParser.hpp>
+
+// Application parameters
+std::vector<armnn::BackendId> preferred_backends_order = {armnn::Compute::CpuAcc, armnn::Compute::CpuRef};
+std::string model_file_str;
+std::string preferred_backend_str;
+int nb_loops = 1;
+
+double get_us(struct timeval t)
+{
+    return (armnn::numeric_cast<double>(t.tv_sec) *
+            armnn::numeric_cast<double>(1000000) +
+            armnn::numeric_cast<double>(t.tv_usec));
+}
+
+double get_ms(struct timeval t)
+{
+    return (armnn::numeric_cast<double>(t.tv_sec) *
+            armnn::numeric_cast<double>(1000) +
+            armnn::numeric_cast<double>(t.tv_usec) / 1000);
+}
+
+static void print_help(char** argv)
+{
+    std::cout <<
+        "Usage: " << argv[0] << " -m <model .tflite>\n"
+        "\n"
+        "-m --model_file <.tflite file path>:  .tflite model to be executed\n"
+        "-b --backend <device>:                preferred backend device to run layers on by default. Possible choices: "
+                                               << armnn::BackendRegistryInstance().GetBackendIdsAsString() << "\n"
+        "-l --loops <int>:                     provide the number of time the inference will be executed\n"
+        "                                      (by default nb_loops=1)\n"
+        "--help:                               show this help\n";
+    exit(1);
+}
+
+void process_args(int argc, char** argv)
+{
+    const char* const short_opts = "m:b:l:h";
+    const option long_opts[] = {
+        {"model_file",   required_argument, nullptr, 'm'},
+        {"backend",      required_argument, nullptr, 'b'},
+        {"loops",        required_argument, nullptr, 'l'},
+        {"help",         no_argument,       nullptr, 'h'},
+        {nullptr,        no_argument,       nullptr, 0}
+    };
+
+    while (true)
+    {
+        const auto opt = getopt_long(argc, argv, short_opts, long_opts, nullptr);
+
+        if (-1 == opt)
+        {
+            break;
+        }
+
+        switch (opt)
+        {
+        case 'm':
+            model_file_str = std::string(optarg);
+            std::cout << "model file set to: " << model_file_str << std::endl;
+            break;
+        case 'b':
+            preferred_backend_str = std::string(optarg);
+            // Overwrite the prefered backend order
+            if (preferred_backend_str == "CpuAcc")
+            {
+                preferred_backends_order = {armnn::Compute::CpuAcc, armnn::Compute::CpuRef};
+            }
+            else if (preferred_backend_str == "CpuRef")
+            {
+                preferred_backends_order = {armnn::Compute::CpuRef, armnn::Compute::CpuAcc};
+            }
+
+            std::cout << "preferred backend device set to:";
+            for (unsigned int i = 0; i < preferred_backends_order.size(); i++)
+            {
+                std::cout << " " << preferred_backends_order.at(i);
+            }
+            std::cout << std::endl;
+            break;
+        case 'l':
+            nb_loops = std::stoi(optarg);
+            std::cout << "benchmark will execute " << nb_loops << " inference(s)" << std::endl;
+            break;
+        case 'h': // -h or --help
+        case '?': // Unrecognized option
+        default:
+            print_help(argv);
+            break;
+        }
+    }
+
+    if (model_file_str.empty())
+    {
+        print_help(argv);
+    }
+}
+
+int main(int argc, char* argv[])
+{
+    std::vector<double> inferenceTimes;
+
+    // Get options
+    process_args(argc, argv);
+
+    // Create the runtime
+    armnn::IRuntime::CreationOptions options;
+    armnn::IRuntimePtr runtime(armnn::IRuntime::Create(options));
+
+    // Create Parser
+    armnnTfLiteParser::ITfLiteParserPtr armnnparser(armnnTfLiteParser::ITfLiteParser::Create());
+
+    // Create a network
+    armnn::INetworkPtr network = armnnparser->CreateNetworkFromBinaryFile(model_file_str.c_str());
+    if (!network)
+    {
+        throw armnn::Exception("Failed to create an ArmNN network");
+    }
+
+    // Optimize the network
+    armnn::IOptimizedNetworkPtr optimizedNet = armnn::Optimize(*network,
+                                                               preferred_backends_order,
+                                                               runtime->GetDeviceSpec());
+    armnn::NetworkId networkId;
+
+    // Load the network in to the runtime
+    runtime->LoadNetwork(networkId, std::move(optimizedNet));
+
+    // Check the number of subgraph
+    if (armnnparser->GetSubgraphCount() != 1)
+    {
+        std::cout << "Model with more than 1 subgraph is not supported by this benchmark application.\n";
+        exit(0);
+    }
+    size_t subgraphId = 0;
+
+    // Set up the input network
+    std::cout << "\nModel information:" << std::endl;
+    std::vector<armnnTfLiteParser::BindingPointInfo> inputBindings;
+    std::vector<armnn::TensorInfo>                   inputTensorInfos;
+    std::vector<std::string> inputTensorNames = armnnparser->GetSubgraphInputTensorNames(subgraphId);
+    for (unsigned int i = 0; i < inputTensorNames.size() ; i++)
+    {
+        std::cout << "inputTensorNames[" << i << "] = " << inputTensorNames[i] << std::endl;
+        armnnTfLiteParser::BindingPointInfo inputBinding = armnnparser->GetNetworkInputBindingInfo(
+                                                                           subgraphId,
+                                                                           inputTensorNames[i]);
+        armnn::TensorInfo inputTensorInfo = runtime->GetInputTensorInfo(networkId, inputBinding.first);
+        inputBindings.push_back(inputBinding);
+        inputTensorInfos.push_back(inputTensorInfo);
+    }
+
+    // Set up the output network
+    std::vector<armnnTfLiteParser::BindingPointInfo> outputBindings;
+    std::vector<armnn::TensorInfo>                   outputTensorInfos;
+    std::vector<std::string> outputTensorNames = armnnparser->GetSubgraphOutputTensorNames(subgraphId);
+    for (unsigned int i = 0; i < outputTensorNames.size() ; i++)
+    {
+        std::cout << "outputTensorNames[" << i << "] = " << outputTensorNames[i] << std::endl;
+        armnnTfLiteParser::BindingPointInfo outputBinding = armnnparser->GetNetworkOutputBindingInfo(
+                                                                             subgraphId,
+                                                                             outputTensorNames[i]);
+        armnn::TensorInfo outputTensorInfo = runtime->GetOutputTensorInfo(networkId, outputBinding.first);
+        outputBindings.push_back(outputBinding);
+        outputTensorInfos.push_back(outputTensorInfo);
+    }
+
+    // Allocate input tensors
+    unsigned int nb_inputs = armnn::numeric_cast<unsigned int>(inputTensorInfos.size());
+    armnn::InputTensors inputTensors;
+    std::vector<std::vector<float>> in;
+    for (unsigned int i = 0 ; i < nb_inputs ; i++)
+    {
+        std::vector<float> in_data(inputTensorInfos.at(i).GetNumElements());
+        in.push_back(in_data);
+        inputTensors.push_back({ inputBindings[i].first, armnn::ConstTensor(inputBindings[i].second, in.data()) });
+    }
+
+    // Allocate output tensors
+    unsigned int nb_ouputs = armnn::numeric_cast<unsigned int>(outputTensorInfos.size());
+    armnn::OutputTensors outputTensors;
+    std::vector<std::vector<float>> out;
+    for (unsigned int i = 0; i < nb_ouputs ; i++)
+    {
+        std::vector<float> out_data(outputTensorInfos.at(i).GetNumElements());
+        out.push_back(out_data);
+        outputTensors.push_back({ outputBindings[i].first, armnn::Tensor(outputBindings[i].second, out[i].data()) });
+    }
+
+    // Run the inferences
+    std::cout << "\ninferences are running: " << std::flush;
+    for (int i = 0 ; i < nb_loops ; i++)
+    {
+        struct timeval start_time, stop_time;
+        gettimeofday(&start_time, nullptr);
+
+        runtime->EnqueueWorkload(networkId, inputTensors, outputTensors);
+
+        gettimeofday(&stop_time, nullptr);
+        inferenceTimes.push_back((get_us(stop_time) - get_us(start_time)));
+        std::cout << "# " << std::flush;
+    }
+
+    auto maxInfTime = *std::max_element(inferenceTimes.begin(), inferenceTimes.end());
+    auto minInfTime = *std::min_element(inferenceTimes.begin(), inferenceTimes.end());
+    auto avgInfTime = accumulate(inferenceTimes.begin(), inferenceTimes.end(), 0.0) /
+            armnn::numeric_cast<double>(inferenceTimes.size());
+    std::cout << "\n\ninference time: ";
+    std::cout << "min=" << minInfTime << "us  ";
+    std::cout << "max=" << maxInfTime << "us  ";
+    std::cout << "avg=" << avgInfTime << "us" << std::endl;
+
+    return 0;
+}