Fixing compare output feature in ExecuteNetwork

The -A -B -C options in execute network were attempting to calculate
the RMS error over output tensors. However, the calculation was mixing
tensor elements and bytes when doing the calculation. This patch
changes the calculation to use a per byte RMS error calculation.

Signed-off-by: Colm Donelan <colm.donelan@arm.com>
Change-Id: If30230a16cfed1a8804b4d54ed1abcd371f26664
diff --git a/tests/ExecuteNetwork/ArmNNExecutor.cpp b/tests/ExecuteNetwork/ArmNNExecutor.cpp
index 730c072..29ef4c5 100644
--- a/tests/ExecuteNetwork/ArmNNExecutor.cpp
+++ b/tests/ExecuteNetwork/ArmNNExecutor.cpp
@@ -707,48 +707,14 @@
 void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
 {
     unsigned int index = 0;
-
+    std::string typeString;
     for (const auto& outputTensors: m_OutputTensorsVec)
     {
         for (const auto& outputTensor: outputTensors)
         {
-            float result = 0;
             size_t size = outputTensor.second.GetNumBytes();
-
-            switch (outputTensor.second.GetDataType())
-            {
-                case armnn::DataType::Float32:
-                {
-                    result = ComputeRMSE<float>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
-                    break;
-                }
-                case armnn::DataType::Signed32:
-                {
-                    result = ComputeRMSE<int32_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
-                    break;
-                }
-                case armnn::DataType::QSymmS16:
-                {
-                    result = ComputeRMSE<int16_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
-                    break;
-                }
-                case armnn::DataType::QSymmS8:
-                case armnn::DataType::QAsymmS8:
-                {
-                    result = ComputeRMSE<int8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
-                    break;
-                }
-                case armnn::DataType::QAsymmU8:
-                {
-                    result = ComputeRMSE<uint8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
-                    break;
-                }
-                default:
-                {
-                    LogAndThrow("Unexpected DataType");
-                }
-            }
-            std::cout << "RMSE: of " << result << "\n";
+            double result = ComputeByteLevelRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
+            std::cout << "Byte level root mean square error: " << result << "\n";
         }
     }
 }
diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
index cba6748..007f818 100644
--- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
+++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp
@@ -359,17 +359,19 @@
                  cxxopts::value<std::string>()->default_value("parser"))
 
                 ("C, compare-output",
-                 "Compare the output of the network with an output file that has been previously "
-                 "produced by running a network through ExecuteNetwork. See --write-outputs-to-file "
-                 "to produce an output file for an execution.",
+                 "Perform a per byte root mean square error calculation of the inference output with an output"
+                 " file that has been previously produced by running a network through ExecuteNetwork."
+                 " See --write-outputs-to-file to produce an output file for an execution.",
                  cxxopts::value<std::string>(m_ExNetParams.m_ComparisonFile))
 
                 ("B, compare-output-with-backend",
-                 "Compare the output of the network with a different backend.",
+                 "Perform a per byte root mean square error calculation of the output of the inference with a"
+                 " different backend.",
                  cxxopts::value<std::vector<std::string>>())
 
                 ("A, compare-with-tflite",
-                 "Compare the output of the network with the tflite ref model.",
+                 "Perform an per byte root mean square error calculation of the output of the inference with"
+                 " the tflite ref model.",
                  cxxopts::value<bool>(m_ExNetParams.m_CompareWithTflite)->default_value("false")
                          ->implicit_value("true"));
 
diff --git a/tests/ExecuteNetwork/TfliteExecutor.cpp b/tests/ExecuteNetwork/TfliteExecutor.cpp
index fc9c21a..f365623 100644
--- a/tests/ExecuteNetwork/TfliteExecutor.cpp
+++ b/tests/ExecuteNetwork/TfliteExecutor.cpp
@@ -230,45 +230,9 @@
     for (unsigned int outputIndex = 0; outputIndex < m_TfLiteInterpreter->outputs().size(); ++outputIndex)
     {
         auto tfLiteDelegateOutputId = m_TfLiteInterpreter->outputs()[outputIndex];
-        float result = 0;
-        switch (m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->type)
-        {
-            case kTfLiteFloat32:
-            {
-                result =  ComputeRMSE<float>(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation,
-                                             otherOutput[outputIndex],
-                                             m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes);
-
-                break;
-            }
-            case kTfLiteInt32:
-            {
-                result =  ComputeRMSE<int32_t>(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation,
-                                                    otherOutput[outputIndex],
-                                                    m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes);
-                break;
-            }
-            case kTfLiteUInt8:
-            {
-                result =  ComputeRMSE<uint8_t>(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation,
-                                                    otherOutput[outputIndex],
-                                                    m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes);
-                break;
-            }
-            case kTfLiteInt8:
-            {
-                result =  ComputeRMSE<int8_t>(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation,
-                                                    otherOutput[outputIndex],
-                                                    m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes);
-                break;
-            }
-            default:
-            {
-            }
-        }
-
-        std::cout << "RMSE of "
-                  << m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->name
-                  << ": " << result << std::endl;
+        size_t size = m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes;
+        double result = ComputeByteLevelRMSE(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation,
+                                             otherOutput[outputIndex], size);
+        std::cout << "Byte level root mean square error: " << result << "\n";
     }
 };