IVGCVSW-2560 Verify Inference test for TensorFlow Lite MobileNet SSD

 * Assign output shape of MobileNet SSD to ArmNN network
 * Add m_OverridenOutputShapes to TfLiteParser to set shape in GetNetworkOutputBindingInfo
 * Use input quantization instead of output quantization params
 * Correct data and datatype in Inference test

Change-Id: I01ac2e07ed08e8928ba0df33a4847399e1dd8394
Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com>
Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
index b14ed0b..dc09cc6 100644
--- a/include/armnn/Descriptors.hpp
+++ b/include/armnn/Descriptors.hpp
@@ -332,7 +332,7 @@
     DetectionPostProcessDescriptor()
     : m_MaxDetections(0)
     , m_MaxClassesPerDetection(1)
-    , m_DetectionsPerClass(100)
+    , m_DetectionsPerClass(1)
     , m_NmsScoreThreshold(0)
     , m_NmsIouThreshold(0)
     , m_NumClasses(0)
diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
index cd0e921..31e808f 100644
--- a/src/armnnTfLiteParser/TfLiteParser.cpp
+++ b/src/armnnTfLiteParser/TfLiteParser.cpp
@@ -295,7 +295,7 @@
     }
 }
 
-armnn::TensorInfo ToTensorInfo(TfLiteParser::TensorRawPtr tensorPtr)
+armnn::TensorInfo ToTensorInfo(TfLiteParser::TensorRawPtr tensorPtr, const std::vector<unsigned int>& shapes)
 {
     armnn::DataType type;
     CHECK_TENSOR_PTR(tensorPtr);
@@ -345,17 +345,21 @@
         }
     }
 
-    auto const & dimensions = AsUnsignedVector(tensorPtr->shape);
-
     // two statements (on purpose) for easier debugging:
-    armnn::TensorInfo result(static_cast<unsigned int>(tensorPtr->shape.size()),
-                             dimensions.data(),
+    armnn::TensorInfo result(static_cast<unsigned int>(shapes.size()),
+                             shapes.data(),
                              type,
                              quantizationScale,
                              quantizationOffset);
     return result;
 }
 
+armnn::TensorInfo ToTensorInfo(TfLiteParser::TensorRawPtr tensorPtr)
+{
+    auto const & dimensions = AsUnsignedVector(tensorPtr->shape);
+    return ToTensorInfo(tensorPtr, dimensions);
+}
+
 template<typename T>
 std::pair<armnn::ConstTensor, std::unique_ptr<T[]>>
 CreateConstTensorImpl(TfLiteParser::BufferRawPtr bufferPtr,
@@ -1796,10 +1800,17 @@
 
     BOOST_ASSERT(layer != nullptr);
 
-    // Register outputs
+    // The model does not specify the output shapes.
+    // The output shapes are calculated from the max_detection and max_classes_per_detection.
+    unsigned int numDetectedBox = desc.m_MaxDetections * desc.m_MaxClassesPerDetection;
+    m_OverridenOutputShapes.push_back({ 1, numDetectedBox, 4 });
+    m_OverridenOutputShapes.push_back({ 1, numDetectedBox });
+    m_OverridenOutputShapes.push_back({ 1, numDetectedBox });
+    m_OverridenOutputShapes.push_back({ 1 });
+
     for (unsigned int i = 0 ; i < outputs.size() ; ++i)
     {
-        armnn::TensorInfo detectionBoxOutputTensorInfo = ToTensorInfo(outputs[i]);
+        armnn::TensorInfo detectionBoxOutputTensorInfo = ToTensorInfo(outputs[i], m_OverridenOutputShapes[i]);
         layer->GetOutputSlot(i).SetTensorInfo(detectionBoxOutputTensorInfo);
     }
 
@@ -2232,12 +2243,15 @@
 {
     CHECK_SUBGRAPH(m_Model, subgraphId);
     auto outputs = GetSubgraphOutputs(m_Model, subgraphId);
-    for (auto const & output : outputs)
+    for (unsigned int i = 0; i < outputs.size(); ++i)
     {
+        auto const output = outputs[i];
         if (output.second->name == name)
         {
             auto bindingId = GenerateLayerBindingId(subgraphId, output.first);
-            return std::make_pair(bindingId, ToTensorInfo(output.second));
+            std::vector<unsigned int> shape = m_OverridenOutputShapes.size() > 0 ?
+                                                m_OverridenOutputShapes[i] : AsUnsignedVector(output.second->shape);
+            return std::make_pair(bindingId, ToTensorInfo(output.second, shape));
         }
     }
 
diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
index 3fe4809..2895487 100644
--- a/src/armnnTfLiteParser/TfLiteParser.hpp
+++ b/src/armnnTfLiteParser/TfLiteParser.hpp
@@ -193,6 +193,10 @@
     /// Connections for tensors in each subgraph
     /// The first index is the subgraph ID, the second index is the tensor ID
     std::vector<TensorConnections> m_SubgraphConnections;
+
+    /// This is used in case that the model does not speciry the output.
+    /// The shape can be calculated from the options.
+    std::vector<std::vector<unsigned int>> m_OverridenOutputShapes;
 };
 
 }
diff --git a/src/armnnTfLiteParser/test/DetectionPostProcess.cpp b/src/armnnTfLiteParser/test/DetectionPostProcess.cpp
index 3c60293..3002885 100644
--- a/src/armnnTfLiteParser/test/DetectionPostProcess.cpp
+++ b/src/armnnTfLiteParser/test/DetectionPostProcess.cpp
@@ -76,28 +76,24 @@
                             }
                         },
                         {
-                            "shape": [1, 3, 4],
                             "type": "FLOAT32",
                             "buffer": 3,
                             "name": "detection_boxes",
                             "quantization": {}
                         },
                         {
-                            "shape": [1, 3],
                             "type": "FLOAT32",
                             "buffer": 4,
                             "name": "detection_classes",
                             "quantization": {}
                         },
                         {
-                            "shape": [1, 3],
                             "type": "FLOAT32",
                             "buffer": 5,
                             "name": "detection_scores",
                             "quantization": {}
                         },
                         {
-                            "shape": [1],
                             "type": "FLOAT32",
                             "buffer": 6,
                             "name": "num_detections",
diff --git a/src/backends/reference/workloads/DetectionPostProcess.cpp b/src/backends/reference/workloads/DetectionPostProcess.cpp
index 2eb35f5..6868180 100644
--- a/src/backends/reference/workloads/DetectionPostProcess.cpp
+++ b/src/backends/reference/workloads/DetectionPostProcess.cpp
@@ -197,7 +197,7 @@
             }
             std::vector<unsigned int> selectedIndices = NonMaxSuppression(numBoxes, boxCorners, classScores,
                                                                           desc.m_NmsScoreThreshold,
-                                                                          desc.m_MaxClassesPerDetection,
+                                                                          desc.m_DetectionsPerClass,
                                                                           desc.m_NmsIouThreshold);
 
             for (unsigned int i = 0; i < selectedIndices.size(); ++i)
diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp
index 6e73f52..25ccbee 100644
--- a/tests/InferenceModel.hpp
+++ b/tests/InferenceModel.hpp
@@ -577,6 +577,13 @@
                               m_OutputBindings[outputIndex].second.GetQuantizationOffset());
     }
 
+    QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
+    {
+        CheckInputIndexIsValid(inputIndex);
+        return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
+                              m_InputBindings[inputIndex].second.GetQuantizationOffset());
+    }
+
     std::vector<QuantizationParams> GetAllQuantizationParams() const
     {
         std::vector<QuantizationParams> quantizationParams;
diff --git a/tests/MobileNetSsdDatabase.hpp b/tests/MobileNetSsdDatabase.hpp
index cac5587..7a30f22 100644
--- a/tests/MobileNetSsdDatabase.hpp
+++ b/tests/MobileNetSsdDatabase.hpp
@@ -27,14 +27,17 @@
 struct MobileNetSsdTestCaseData
 {
     MobileNetSsdTestCaseData(
-        std::vector<uint8_t> inputData,
-        std::vector<DetectedObject> expectedOutput)
-        : m_InputData(std::move(inputData))
-        , m_ExpectedOutput(std::move(expectedOutput))
+        const std::vector<uint8_t>& inputData,
+        const std::vector<DetectedObject>& expectedDetectedObject,
+        const std::vector<std::vector<float>>& expectedOutput)
+        : m_InputData(inputData)
+        , m_ExpectedDetectedObject(expectedDetectedObject)
+        , m_ExpectedOutput(expectedOutput)
     {}
 
-    std::vector<uint8_t>        m_InputData;
-    std::vector<DetectedObject> m_ExpectedOutput;
+    std::vector<uint8_t>            m_InputData;
+    std::vector<DetectedObject>     m_ExpectedDetectedObject;
+    std::vector<std::vector<float>> m_ExpectedOutput;
 };
 
 class MobileNetSsdDatabase
@@ -59,7 +62,9 @@
     ObjectDetectionInput
     {
         "Cat.jpg",
-        DetectedObject(16, BoundingBox(0.208961248f, 0.0852333307f, 0.92757535f, 0.940263629f), 0.79296875f)
+        {
+          DetectedObject(16.0f, BoundingBox(0.208961248f, 0.0852333307f, 0.92757535f, 0.940263629f), 0.79296875f)
+        }
     }
 };
 
@@ -100,12 +105,33 @@
         return nullptr;
     }
 
-    // Prepare test case expected output
-    std::vector<DetectedObject> expectedOutput;
-    expectedOutput.reserve(1);
-    expectedOutput.push_back(testCaseInput.second);
+    std::vector<float> numDetections = { static_cast<float>(testCaseInput.second.size()) };
 
-    return std::make_unique<MobileNetSsdTestCaseData>(std::move(imageData), std::move(expectedOutput));
+    std::vector<float> detectionBoxes;
+    std::vector<float> detectionClasses;
+    std::vector<float> detectionScores;
+
+    for (DetectedObject expectedObject : testCaseInput.second)
+    {
+            detectionBoxes.push_back(expectedObject.m_BoundingBox.m_YMin);
+            detectionBoxes.push_back(expectedObject.m_BoundingBox.m_XMin);
+            detectionBoxes.push_back(expectedObject.m_BoundingBox.m_YMax);
+            detectionBoxes.push_back(expectedObject.m_BoundingBox.m_XMax);
+
+            detectionClasses.push_back(expectedObject.m_Class);
+
+            detectionScores.push_back(expectedObject.m_Confidence);
+    }
+
+    // Prepare test case expected output
+    std::vector<std::vector<float>> expectedOutputs;
+    expectedOutputs.reserve(4);
+    expectedOutputs.push_back(detectionBoxes);
+    expectedOutputs.push_back(detectionClasses);
+    expectedOutputs.push_back(detectionScores);
+    expectedOutputs.push_back(numDetections);
+
+    return std::make_unique<MobileNetSsdTestCaseData>(imageData, testCaseInput.second, expectedOutputs);
 }
 
 } // anonymous namespace
diff --git a/tests/MobileNetSsdInferenceTest.hpp b/tests/MobileNetSsdInferenceTest.hpp
index 10ee1dc..bbbf957 100644
--- a/tests/MobileNetSsdInferenceTest.hpp
+++ b/tests/MobileNetSsdInferenceTest.hpp
@@ -29,7 +29,7 @@
                                         { std::move(testCaseData.m_InputData) },
                                         { k_OutputSize1, k_OutputSize2, k_OutputSize3, k_OutputSize4 })
         , m_FloatComparer(boost::math::fpc::percent_tolerance(1.0f))
-        , m_DetectedObjects(testCaseData.m_ExpectedOutput)
+        , m_DetectedObjects(testCaseData.m_ExpectedDetectedObject)
     {}
 
     TestCaseResult ProcessResult(const InferenceTestOptions& options) override
@@ -46,10 +46,21 @@
         const std::vector<float>& output4 = boost::get<std::vector<float>>(this->GetOutputs()[3]); // valid detections
         BOOST_ASSERT(output4.size() == k_OutputSize4);
 
+        const size_t numDetections = boost::numeric_cast<size_t>(output4[0]);
+
+        // Check if number of valid detections matches expectations
+        const size_t expectedNumDetections = m_DetectedObjects.size();
+        if (numDetections != expectedNumDetections)
+        {
+            BOOST_LOG_TRIVIAL(error) << "Number of detections is incorrect: Expected (" <<
+                expectedNumDetections << ")" << " but got (" << numDetections << ")";
+            return TestCaseResult::Failed;
+        }
+
         // Extract detected objects from output data
         std::vector<DetectedObject> detectedObjects;
         const float* outputData = output1.data();
-        for (unsigned int i = 0u; i < k_NumDetections; i++)
+        for (unsigned int i = 0u; i < numDetections; i++)
         {
             // NOTE: Order of coordinates in output data is yMin, xMin, yMax, xMax
             float yMin = *outputData++;
@@ -58,61 +69,49 @@
             float xMax = *outputData++;
 
             DetectedObject detectedObject(
-                static_cast<unsigned int>(output2.at(i)),
+                output2.at(i),
                 BoundingBox(xMin, yMin, xMax, yMax),
                 output3.at(i));
 
             detectedObjects.push_back(detectedObject);
         }
 
-        // Sort detected objects by confidence
-        std::sort(detectedObjects.begin(), detectedObjects.end(),
-            [](const DetectedObject& a, const DetectedObject& b)
-            {
-                return a.m_Confidence > b.m_Confidence ||
-                    (a.m_Confidence == b.m_Confidence && a.m_Class > b.m_Class);
-            });
-
-        // Check if number of valid detections matches expectations
-        const size_t numValidDetections = boost::numeric_cast<size_t>(output4[0]);
-        if (numValidDetections != m_DetectedObjects.size())
-        {
-            BOOST_LOG_TRIVIAL(error) << "Number of valid detections is incorrect: Expected (" <<
-                m_DetectedObjects.size() << ")" << " but got (" << numValidDetections << ")";
-            return TestCaseResult::Failed;
-        }
+        std::sort(detectedObjects.begin(), detectedObjects.end());
+        std::sort(m_DetectedObjects.begin(), m_DetectedObjects.end());
 
         // Compare detected objects with expected results
         std::vector<DetectedObject>::const_iterator it = detectedObjects.begin();
-        for (const DetectedObject& expectedDetection : m_DetectedObjects)
+        for (unsigned int i = 0; i < numDetections; i++)
         {
             if (it == detectedObjects.end())
             {
-                BOOST_LOG_TRIVIAL(info) << "No more detected objects to compare";
+                BOOST_LOG_TRIVIAL(error) << "No more detected objects found! Index out of bounds: " << i;
                 return TestCaseResult::Abort;
             }
 
             const DetectedObject& detectedObject = *it;
-            if (detectedObject.m_Class != expectedDetection.m_Class)
+            const DetectedObject& expectedObject = m_DetectedObjects[i];
+
+            if (detectedObject.m_Class != expectedObject.m_Class)
             {
                 BOOST_LOG_TRIVIAL(error) << "Prediction for test case " << this->GetTestCaseId() <<
-                    " is incorrect: Expected (" << expectedDetection.m_Class << ")" <<
+                    " is incorrect: Expected (" << expectedObject.m_Class << ")" <<
                     " but predicted (" << detectedObject.m_Class << ")";
                 return TestCaseResult::Failed;
             }
 
-            if(!m_FloatComparer(detectedObject.m_Confidence, expectedDetection.m_Confidence))
+            if(!m_FloatComparer(detectedObject.m_Confidence, expectedObject.m_Confidence))
             {
                 BOOST_LOG_TRIVIAL(error) << "Confidence of prediction for test case " << this->GetTestCaseId() <<
-                    " is incorrect: Expected (" << expectedDetection.m_Confidence << ")  +- 1.0 pc" <<
+                    " is incorrect: Expected (" << expectedObject.m_Confidence << ")  +- 1.0 pc" <<
                     " but predicted (" << detectedObject.m_Confidence << ")";
                 return TestCaseResult::Failed;
             }
 
-            if (!m_FloatComparer(detectedObject.m_BoundingBox.m_XMin, expectedDetection.m_BoundingBox.m_XMin) ||
-                !m_FloatComparer(detectedObject.m_BoundingBox.m_YMin, expectedDetection.m_BoundingBox.m_YMin) ||
-                !m_FloatComparer(detectedObject.m_BoundingBox.m_XMax, expectedDetection.m_BoundingBox.m_XMax) ||
-                !m_FloatComparer(detectedObject.m_BoundingBox.m_YMax, expectedDetection.m_BoundingBox.m_YMax))
+            if (!m_FloatComparer(detectedObject.m_BoundingBox.m_XMin, expectedObject.m_BoundingBox.m_XMin) ||
+                !m_FloatComparer(detectedObject.m_BoundingBox.m_YMin, expectedObject.m_BoundingBox.m_YMin) ||
+                !m_FloatComparer(detectedObject.m_BoundingBox.m_XMax, expectedObject.m_BoundingBox.m_XMax) ||
+                !m_FloatComparer(detectedObject.m_BoundingBox.m_YMax, expectedObject.m_BoundingBox.m_YMax))
             {
                 BOOST_LOG_TRIVIAL(error) << "Detected bounding box for test case " << this->GetTestCaseId() <<
                     " is incorrect";
@@ -126,11 +125,11 @@
     }
 
 private:
-    static constexpr unsigned int k_NumDetections = 1u;
+    static constexpr unsigned int k_Shape       = 10u;
 
-    static constexpr unsigned int k_OutputSize1 = k_NumDetections * 4u;
-    static constexpr unsigned int k_OutputSize2 = k_NumDetections;
-    static constexpr unsigned int k_OutputSize3 = k_NumDetections;
+    static constexpr unsigned int k_OutputSize1 = k_Shape * 4u;
+    static constexpr unsigned int k_OutputSize2 = k_Shape;
+    static constexpr unsigned int k_OutputSize3 = k_Shape;
     static constexpr unsigned int k_OutputSize4 = 1u;
 
     boost::math::fpc::close_at_tolerance<float> m_FloatComparer;
@@ -152,7 +151,7 @@
 
         options.add_options()
             ("data-dir,d", po::value<std::string>(&m_DataDir)->required(),
-                "Path to directory containing test data");
+             "Path to directory containing test data");
 
         Model::AddCommandLineOptions(options, m_ModelCommandLineOptions);
     }
@@ -169,7 +168,7 @@
         {
             return false;
         }
-        std::pair<float, int32_t> qParams = m_Model->GetQuantizationParams();
+        std::pair<float, int32_t> qParams = m_Model->GetInputQuantizationParams();
         m_Database = std::make_unique<MobileNetSsdDatabase>(m_DataDir.c_str(), qParams.first, qParams.second);
         if (!m_Database)
         {
diff --git a/tests/ObjectDetectionCommon.hpp b/tests/ObjectDetectionCommon.hpp
index 85b54c2..ee3afb5 100644
--- a/tests/ObjectDetectionCommon.hpp
+++ b/tests/ObjectDetectionCommon.hpp
@@ -31,7 +31,7 @@
 
 struct DetectedObject
 {
-    DetectedObject(unsigned int detectedClass,
+    DetectedObject(float detectedClass,
                    const BoundingBox& boundingBox,
                    float confidence)
         : m_Class(detectedClass)
@@ -39,11 +39,17 @@
         , m_Confidence(confidence)
     {}
 
-    unsigned int m_Class;
+    bool operator<(const DetectedObject& other) const
+    {
+        return m_Confidence < other.m_Confidence ||
+            (m_Confidence == other.m_Confidence && m_Class < other.m_Class);
+    }
+
+    float        m_Class;
     BoundingBox  m_BoundingBox;
     float        m_Confidence;
 };
 
-using ObjectDetectionInput = std::pair<std::string, DetectedObject>;
+using ObjectDetectionInput = std::pair<std::string, std::vector<DetectedObject>>;
 
 } // anonymous namespace
\ No newline at end of file