IVGCVSW-5612 Fix tiny_wav2letter_relu_fixed_int8 delegate output

 * fix delegate perchannel quantization
 * change delegate to check reshape options before inputs
 * Add int8 "qsymms8" option to ExecuteNetwork
 * Add option to run ExecuteNetwork on tflite w/o delegate


!referencetests:301301

Signed-off-by: Finn Williams <Finn.Williams@arm.com>
Change-Id: If3e12599b17aff1199d7ab0a55e1c901e480083d
diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp
index e3ca22e..f812e53 100644
--- a/tests/ExecuteNetwork/ExecuteNetwork.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp
@@ -54,18 +54,27 @@
     builder(&tfLiteInterpreter);
     tfLiteInterpreter->AllocateTensors();
 
-    // Create the Armnn Delegate
-    armnnDelegate::DelegateOptions delegateOptions(params.m_ComputeDevices);
-    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
-            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
-                             armnnDelegate::TfLiteArmnnDelegateDelete);
-    // Register armnn_delegate to TfLiteInterpreter
-    int status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
-    if (status == kTfLiteError)
+    int status = 0;
+    if (params.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate)
     {
-        ARMNN_LOG(fatal) << "Could not register ArmNN TfLite Delegate to TfLiteInterpreter!";
-        return EXIT_FAILURE;
+        // Create the Armnn Delegate
+        armnnDelegate::DelegateOptions delegateOptions(params.m_ComputeDevices);
+        std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                 armnnDelegate::TfLiteArmnnDelegateDelete);
+        // Register armnn_delegate to TfLiteInterpreter
+        status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
+        if (status == kTfLiteError)
+        {
+            ARMNN_LOG(fatal) << "Could not register ArmNN TfLite Delegate to TfLiteInterpreter!";
+            return EXIT_FAILURE;
+        }
     }
+    else
+    {
+        std::cout << "Running on TfLite without ArmNN delegate\n";
+    }
+
 
     std::vector<std::string>  inputBindings;
     for (const std::string& inputName: params.m_InputNames)
@@ -110,7 +119,7 @@
 
             std::copy(tensorData.begin(), tensorData.end(), inputData);
         }
-        else if (params.m_InputTypes[inputIndex].compare("int8") == 0)
+        else if (params.m_InputTypes[inputIndex].compare("qsymms8") == 0)
         {
             auto inputData = tfLiteInterpreter->typed_tensor<int8_t>(input);
 
@@ -180,7 +189,7 @@
     for (size_t x = 0; x < params.m_Iterations; x++)
     {
         // Run the inference
-        tfLiteInterpreter->Invoke();
+        status = tfLiteInterpreter->Invoke();
 
         // Print out the output
         for (unsigned int outputIndex = 0; outputIndex < params.m_OutputNames.size(); ++outputIndex)
@@ -207,11 +216,7 @@
 
                 for (int i = 0; i < outputSize; ++i)
                 {
-                    std::cout << tfLiteDelageOutputData[i] << ", ";
-                    if (i % 60 == 0)
-                    {
-                        std::cout << std::endl;
-                    }
+                    printf("%f ", tfLiteDelageOutputData[i]);
                 }
             }
             else if (params.m_OutputTypes[outputIndex].compare("int") == 0)
@@ -226,14 +231,10 @@
 
                 for (int i = 0; i < outputSize; ++i)
                 {
-                    std::cout << tfLiteDelageOutputData[i] << ", ";
-                    if (i % 60 == 0)
-                    {
-                        std::cout << std::endl;
-                    }
+                    printf("%d ", tfLiteDelageOutputData[i]);
                 }
             }
-            else if (params.m_OutputTypes[outputIndex].compare("int8") == 0)
+            else if (params.m_OutputTypes[outputIndex].compare("qsymms8") == 0)
             {
                 auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int8_t>(tfLiteDelegateOutputId);
                 if(tfLiteDelageOutputData == NULL)
@@ -245,11 +246,7 @@
 
                 for (int i = 0; i < outputSize; ++i)
                 {
-                    std::cout << signed(tfLiteDelageOutputData[i]) << ", ";
-                    if (i % 60 == 0)
-                    {
-                        std::cout << std::endl;
-                    }
+                    printf("%d ", tfLiteDelageOutputData[i]);
                 }
             }
             else if (params.m_OutputTypes[outputIndex].compare("qasymm8") == 0)
@@ -264,11 +261,7 @@
 
                 for (int i = 0; i < outputSize; ++i)
                 {
-                    std::cout << unsigned(tfLiteDelageOutputData[i]) << ", ";
-                    if (i % 60 == 0)
-                    {
-                        std::cout << std::endl;
-                    }
+                    printf("%u ", tfLiteDelageOutputData[i]);
                 }
             }
             else
@@ -289,7 +282,8 @@
 int MainImpl(const ExecuteNetworkParams& params,
              const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
 {
-    using TContainer = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
+    using TContainer =
+           mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
 
     std::vector<TContainer> inputDataContainers;
 
@@ -383,6 +377,10 @@
             {
                 outputDataContainers.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
             }
+            else if (params.m_OutputTypes[i].compare("qsymms8") == 0)
+            {
+                outputDataContainers.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
+            }
             else
             {
                 ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
@@ -503,8 +501,19 @@
     }
     else if(modelFormat.find("tflite") != std::string::npos)
     {
-
-        if (ProgramOptions.m_ExNetParams.m_EnableDelegate)
+        if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteParser)
+        {
+            #if defined(ARMNN_TF_LITE_PARSER)
+                        return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(ProgramOptions.m_ExNetParams, runtime);
+            #else
+                        ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support.";
+                        return EXIT_FAILURE;
+            #endif
+        }
+        else if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
+                    ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate ||
+                ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
+                    ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter)
         {
         #if defined(ARMNN_TF_LITE_DELEGATE)
             return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, runtime);
@@ -513,12 +522,6 @@
             return EXIT_FAILURE;
         #endif
         }
-    #if defined(ARMNN_TF_LITE_PARSER)
-        return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(ProgramOptions.m_ExNetParams, runtime);
-    #else
-        ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support.";
-        return EXIT_FAILURE;
-    #endif
     }
     else
     {
diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
index a30ce57..a19eaa9 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp
@@ -14,6 +14,13 @@
 {
     using TensorShapePtr = std::unique_ptr<armnn::TensorShape>;
 
+    enum class TfLiteExecutor
+    {
+        ArmNNTfLiteParser,
+        ArmNNTfLiteDelegate,
+        TfliteInterpreter
+    };
+
     std::string                   m_CachedNetworkFilePath;
     std::vector<armnn::BackendId> m_ComputeDevices;
     bool                          m_DequantizeOutput;
@@ -47,6 +54,7 @@
     int                           m_TuningLevel;
     std::string                   m_TuningPath;
     std::string                   m_MLGOTuningFilePath;
+    TfLiteExecutor                m_TfLiteExecutor;
 
     // Ensures that the parameters for ExecuteNetwork fit together
     void ValidateParams();
diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
index ba28dd0..62057ea 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
@@ -177,10 +177,6 @@
                  "tensorflow-text.",
                  cxxopts::value<std::string>())
 
-                ("D,armnn-tflite-delegate",
-                 "enable Arm NN TfLite delegate",
-                 cxxopts::value<bool>(m_ExNetParams.m_EnableDelegate)->default_value("false")->implicit_value("true"))
-
                 ("m,model-path",
                  "Path to model file, e.g. .armnn, .caffemodel, .prototxt, .tflite, .onnx",
                  cxxopts::value<std::string>(m_ExNetParams.m_ModelPath))
@@ -271,7 +267,19 @@
                  "The type of the output tensors in the network separated by comma. "
                  "If unset, defaults to \"float\" for all defined outputs. "
                  "Accepted values (float, int or qasymm8).",
-                 cxxopts::value<std::string>());
+                 cxxopts::value<std::string>())
+
+                ("T,tflite-executor",
+                 "Set the executor for the tflite model: parser, delegate, tflite"
+                 "parser is the ArmNNTfLiteParser, "
+                 "delegate is the ArmNNTfLiteDelegate, "
+                 "tflite is the TfliteInterpreter",
+                 cxxopts::value<std::string>()->default_value("parser"))
+
+                ("D,armnn-tflite-delegate",
+                 "Enable Arm NN TfLite delegate. "
+                 "This option is depreciated please use tflite-executor instead",
+                 cxxopts::value<bool>(m_ExNetParams.m_EnableDelegate)->default_value("false")->implicit_value("true"));
 
         m_CxxOptions.add_options("c) Optimization")
                 ("bf16-turbo-mode",
@@ -409,6 +417,36 @@
             m_ExNetParams.m_InputTensorDataFilePaths.empty();
     m_ExNetParams.m_DynamicBackendsPath = m_RuntimeOptions.m_DynamicBackendsPath;
 
+
+    std::string tfliteExecutor = GetOptionValue<std::string>("tflite-executor", m_CxxResult);
+
+    if (tfliteExecutor.size() == 0 || tfliteExecutor == "parser")
+    {
+        m_ExNetParams.m_TfLiteExecutor = ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteParser;
+    }
+    else if (tfliteExecutor == "delegate")
+    {
+        m_ExNetParams.m_TfLiteExecutor = ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate;
+    }
+    else if (tfliteExecutor == "tflite")
+    {
+        m_ExNetParams.m_TfLiteExecutor = ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter;
+    }
+    else
+    {
+        ARMNN_LOG(info) << fmt::format("Invalid tflite-executor option '{}'.", tfliteExecutor);
+        throw armnn::InvalidArgumentException ("Invalid tflite-executor option");
+    }
+
+    if (m_ExNetParams.m_EnableDelegate)
+    {
+        m_ExNetParams.m_TfLiteExecutor = ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate;
+        ARMNN_LOG(info) << fmt::format("armnn-tflite-delegate option is being depreciated, "
+                                       "please use tflite-executor instead.");
+    }
+
+
+
     // Parse input tensor shape from the string we got from the command-line.
     std::vector<std::string> inputTensorShapesVector =
             ParseStringList(GetOptionValue<std::string>("input-tensor-shape", m_CxxResult), ":");
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp
index 7996262..6bfad06 100644
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -335,7 +335,8 @@
     using DataType           = TDataType;
     using Params             = InferenceModelInternal::Params;
     using QuantizationParams = InferenceModelInternal::QuantizationParams;
-    using TContainer         = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
+    using TContainer
+        = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
 
     struct CommandLineOptions
     {
diff --git a/tests/InferenceTest.hpp b/tests/InferenceTest.hpp
index 5ec744c..0cc6c3b 100644
--- a/tests/InferenceTest.hpp
+++ b/tests/InferenceTest.hpp
@@ -110,7 +110,8 @@
 class InferenceModelTestCase : public IInferenceTestCase
 {
 public:
-    using TContainer = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
+    using TContainer =
+           mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
 
     InferenceModelTestCase(TModel& model,
                            unsigned int testCaseId,
diff --git a/tests/InferenceTest.inl b/tests/InferenceTest.inl
index 3d6dae3..79700d9 100644
--- a/tests/InferenceTest.inl
+++ b/tests/InferenceTest.inl
@@ -26,7 +26,8 @@
 namespace test
 {
 
-using TContainer = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
+using TContainer =
+        mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
 
 template <typename TTestCaseDatabase, typename TModel>
 ClassifierTestCase<TTestCaseDatabase, TModel>::ClassifierTestCase(
@@ -66,6 +67,14 @@
                                 });
     }
 
+    void operator()(const std::vector<int8_t>& values)
+    {
+        SortPredictions(values, [](int8_t value)
+        {
+            return value;
+        });
+    }
+
     void operator()(const std::vector<uint8_t>& values)
     {
         auto& scale = m_Scale;
diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp
index 2afd941..d902d23 100644
--- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp
+++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp
@@ -52,6 +52,16 @@
                                    [](const std::string& s) { return armnn::numeric_cast<uint8_t>(std::stoi(s)); });
 }
 
+
+template<>
+auto ParseDataArray<armnn::DataType::QSymmS8>(std::istream& stream)
+{
+    return ParseArrayImpl<int8_t>(stream,
+                                   [](const std::string& s) { return armnn::numeric_cast<int8_t>(std::stoi(s)); });
+}
+
+
+
 template<>
 auto ParseDataArray<armnn::DataType::QAsymmU8>(std::istream& stream,
                                                const float& quantizationScale,
@@ -130,6 +140,15 @@
     }
 }
 
+void TensorPrinter::operator()(const std::vector<int8_t>& values)
+{
+    ForEachValue(values, [](int8_t value)
+    {
+        printf("%d ", value);
+    });
+    WriteToFile(values);
+}
+
 void TensorPrinter::operator()(const std::vector<int>& values)
 {
     ForEachValue(values, [](int value)
@@ -170,7 +189,8 @@
     }
 }
 
-using TContainer         = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
+using TContainer  =
+        mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
 using QuantizationParams = std::pair<float, int32_t>;
 
 void PopulateTensorWithData(TContainer& tensorData,
@@ -212,6 +232,12 @@
                      ParseDataArray<armnn::DataType::Signed32>(inputTensorFile) :
                      GenerateDummyTensorData<armnn::DataType::Signed32>(numElements);
     }
+    else if (dataTypeStr.compare("qsymms8") == 0)
+    {
+        tensorData = readFromFile ?
+                     ParseDataArray<armnn::DataType::QSymmS8>(inputTensorFile) :
+                     GenerateDummyTensorData<armnn::DataType::QSymmS8>(numElements);
+    }
     else if (dataTypeStr.compare("qasymm8") == 0)
     {
         tensorData = readFromFile ?
diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
index 742f968..d92c17c 100644
--- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
+++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp
@@ -34,6 +34,8 @@
 
     void operator()(const std::vector<int>& values);
 
+    void operator()(const std::vector<int8_t>& values);
+
 private:
     template<typename Container, typename Delegate>
     void ForEachValue(const Container& c, Delegate delegate);
@@ -48,7 +50,8 @@
     bool m_DequantizeOutput;
 };
 
-using TContainer         = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
+using TContainer =
+        mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
 using QuantizationParams = std::pair<float, int32_t>;
 
 void PopulateTensorWithData(TContainer& tensorData,