IVGCVSW-7277 Fixed issues with FullyConnected on certain TFLite models

 * TFLite Parser:
    * Fixed issue in ParseReshape where the targetShape wasn't always calculated correctly
    * Fixed issue in ParseFullyConnected where the wrong name was used for the ReshapeLayer
    * Added an ExpandDims to the FullyConnected to ensure that we reshape the output correctly
 * TFLite Delegate:
    * Added an ExpandDims to the FullyConnected to ensure that we reshape the output correctly

Signed-off-by: Mike Kelly <mike.kelly@arm.com>
Change-Id: I129dfcb8543f8a3a297c0589c841be20ef3b6407
diff --git a/delegate/src/DelegateUtils.hpp b/delegate/src/DelegateUtils.hpp
index 850b279..9144757 100644
--- a/delegate/src/DelegateUtils.hpp
+++ b/delegate/src/DelegateUtils.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -367,6 +367,52 @@
     return kTfLiteOk;
 }
 
+armnn::IConnectableLayer* AddReshapeLayer(TfLiteContext* tfLiteContext,
+                                          TfLiteNode* tfLiteNode,
+                                          armnn::IConnectableLayer* prevLayer,
+                                          armnn::TensorInfo reshapedOutputTensorInfo,
+                                          armnn::TensorInfo outputTensorInfo,
+                                          armnnDelegate::DelegateData& data)
+{
+    armnn::ReshapeDescriptor desc;
+    desc.m_TargetShape = outputTensorInfo.GetShape();
+
+    bool isSupported = false;
+    armnn::BackendId setBackend;
+    FORWARD_LAYER_SUPPORT_FUNC("RESHAPE",
+                               tfLiteContext,
+                               IsReshapeSupported,
+                               data.m_Backends,
+                               isSupported,
+                               setBackend,
+                               reshapedOutputTensorInfo,
+                               outputTensorInfo,
+                               desc);
+
+    if (!isSupported)
+    {
+        return nullptr;
+    }
+
+    armnn::IConnectableLayer* reshapeLayer = data.m_Network->AddReshapeLayer(desc);
+    reshapeLayer->SetBackendId(setBackend);
+    ARMNN_ASSERT(reshapeLayer != nullptr);
+
+    prevLayer->GetOutputSlot(0).SetTensorInfo(reshapedOutputTensorInfo);
+    reshapeLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+    // Connect and prepare output slots
+    for (unsigned int outputIndex = 0; outputIndex < reshapeLayer->GetNumOutputSlots(); ++outputIndex)
+    {
+        data.m_OutputSlotForNode[static_cast<unsigned long>(
+                tfLiteNode->outputs->data[outputIndex])]->Connect(reshapeLayer->GetInputSlot(0));
+        armnn::IOutputSlot& outputSlot = reshapeLayer->GetOutputSlot(outputIndex);
+        data.m_OutputSlotForNode[static_cast<unsigned long>(
+                tfLiteNode->outputs->data[outputIndex])] = &outputSlot;
+    }
+    return reshapeLayer;
+}
+
 armnn::DataType GetDataType(const TfLiteTensor& tfLiteTensor)
 {
     switch (tfLiteTensor.type)
diff --git a/delegate/src/FullyConnected.hpp b/delegate/src/FullyConnected.hpp
index a2960e2..2243ad0 100644
--- a/delegate/src/FullyConnected.hpp
+++ b/delegate/src/FullyConnected.hpp
@@ -1,11 +1,12 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
 #pragma once
 
 #include "DelegateUtils.hpp"
+#include "armnnUtils/TensorUtils.hpp"
 #include <armnn/utility/IgnoreUnused.hpp>
 
 #include <tensorflow/lite/builtin_ops.h>
@@ -103,6 +104,25 @@
 
         reshapedTensorInfo.SetShape(armnn::TensorShape{ 2, reshapedDimensions.data() });
     }
+    armnn::TensorInfo reshapedOutputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    if (outputTensorInfo.GetNumDimensions() > 2)
+    {
+        // Calculate reshape to flatten to 2D [batch_size, input_size]
+        std::vector<unsigned int> reshapedDimensions(2);
+        reshapedDimensions[1] = weightsTensorInfo.GetShape()[0];
+        reshapedDimensions[0] = outputTensorInfo.GetNumElements() / reshapedDimensions[1];
+
+        if (outputTensorInfo.GetNumElements() % reshapedDimensions[1] != 0)
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                    tfLiteContext,
+                    "TfLiteArmnnDelegate: Failed to deduce output tensor shape from filter size #%d #%d node #%d: ",
+                    reshapedDimensions[1], operatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+        reshapedOutputTensorInfo.SetShape(armnn::TensorShape{ 2, reshapedDimensions.data() });
+    }
 
     armnn::FullyConnectedDescriptor descriptor;
     descriptor.m_TransposeWeightMatrix = true;
@@ -113,6 +133,7 @@
     armnn::BackendId setBackend;
     auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
     {
+
         FORWARD_LAYER_SUPPORT_FUNC("FULLY_CONNECTED",
                                    tfLiteContext,
                                    IsFullyConnectedSupported,
@@ -128,7 +149,7 @@
 
     if (!delegateData.m_Network)
     {
-        validateFunc(outputTensorInfo, isSupported);
+        validateFunc(reshapedOutputTensorInfo, isSupported);
         return isSupported ? kTfLiteOk : kTfLiteError;
     }
 
@@ -202,12 +223,27 @@
     }
 
     auto* tfLiteNodeParameters = reinterpret_cast<TfLiteFullyConnectedParams*>(tfLiteNode->builtin_data);
+
+    if (outputTensorInfo.GetNumDimensions() > 2)
+    {
+        layer = AddReshapeLayer(tfLiteContext, tfLiteNode, layer, reshapedOutputTensorInfo, outputTensorInfo,
+                                delegateData);
+        if (!layer)
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                    tfLiteContext,
+                    "TfLiteArmnnDelegate: Failed to add reshape for FullyConnected #%d node #%d: ",
+                    operatorCode,
+                    nodeIndex);
+            return kTfLiteError;
+        }
+    }
+
     if (!tfLiteNodeParameters)
     {
         // No Activation
         return kTfLiteOk;
     }
-
     // Check Activation
     TfLiteFusedActivation activationType = tfLiteNodeParameters->activation;
     return FusedActivation(tfLiteContext, tfLiteNode, activationType, layer, 0, delegateData);
diff --git a/delegate/src/test/FullyConnectedTest.cpp b/delegate/src/test/FullyConnectedTest.cpp
index c300bc7..3ef5ced 100644
--- a/delegate/src/test/FullyConnectedTest.cpp
+++ b/delegate/src/test/FullyConnectedTest.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020-2021,2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
index a3bec92..66dc13e 100644
--- a/src/armnnTfLiteParser/TfLiteParser.cpp
+++ b/src/armnnTfLiteParser/TfLiteParser.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2018-2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -497,9 +497,19 @@
             }
 
             std::unique_ptr<bool[]> dimMask = std::make_unique<bool[]>(tensorPtr->shape_signature.size());
+            bool batchOnly = true;
             for (unsigned int i = 0; i < tensorPtr->shape_signature.size(); ++i)
             {
-                dimMask[i] = tensorPtr->shape_signature[i] == -1 ? false : true;
+                dimMask[i] = tensorPtr->shape_signature[i] != -1;
+
+                if (i > 0 && !dimMask[i])
+                {
+                    batchOnly = false;
+                }
+            }
+            if (batchOnly)
+            {
+                dimMask[0] = true;
             }
             tensorShape = TensorShape(static_cast<unsigned int>(safeShape.size()), safeShape.data(), dimMask.get());
         }
@@ -1163,7 +1173,7 @@
 
     for (unsigned int i = 0u; i < numOutputs; ++i)
     {
-        layer->GetOutputSlot(i).SetTensorInfo(OutputTensorInfoFromInputs(subgraphIndex, operatorIndex, layer, i, {}));
+        layer->GetOutputSlot(i).SetTensorInfo(ToTensorInfo(outputs[0], true));
     }
 
     auto inputTensorIds  = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
@@ -2969,10 +2979,17 @@
                 try
                 {
                     // We attempt to infer during Runtime.
-                    TensorShape reshapeShapes   =ToTensorInfo(inputs[1]).GetShape();
-                    reshapeShapes   = InputTensorInfo(subgraphIndex, operatorIndex, 1).GetShape();
+                    TensorShape reshapeShapes = ToTensorInfo(inputs[1]).GetShape();
+
+                    if (reshapeShapes[0] == actualOutputTensorInfo.GetNumDimensions())
+                    {
+                        for (unsigned int i = 0; i < actualOutputTensorInfo.GetShape().GetNumDimensions(); ++i)
+                        {
+                            targetShape.push_back(actualOutputTensorInfo.GetShape()[i]);
+                        }
+                    }
                     // The parser only supports shape (batch, -1) or (-1) for non-constant shape input.
-                    if (reshapeShapes[0] > 2)
+                    else if (reshapeShapes[0] > 2)
                     {
                         throw ParseException(fmt::format("Invalid input shape '{}' in Reshape layer '{}' {}. "
                                                          "When inferring during runtime, the parser only supports "
@@ -2981,16 +2998,18 @@
                                                          layerName,
                                                          CHECK_LOCATION().AsString()));
                     }
-
-                    const int32_t numInputElements = inputTensorInfo.GetNumElements();
-                    const int32_t inputTensorShape = inputTensorInfo.GetShape()[0];
-                    if (reshapeShapes[0] == 1)
+                    else
                     {
-                        targetShape = {numInputElements};
-                    }
-                    else if (reshapeShapes[0] == 2)
-                    {
-                        targetShape = {inputTensorShape, numInputElements / inputTensorShape};
+                        const int32_t numInputElements = inputTensorInfo.GetNumElements();
+                        const int32_t inputTensorShape = inputTensorInfo.GetShape()[0];
+                        if (reshapeShapes[0] == 1)
+                        {
+                            targetShape = {numInputElements};
+                        }
+                        else if (reshapeShapes[0] == 2)
+                        {
+                            targetShape = {inputTensorShape, numInputElements / inputTensorShape};
+                        }
                     }
                 }
                 catch (const std::exception& exc)
@@ -3220,7 +3239,6 @@
     auto inputTensorIndexes = AsUnsignedVector(GetInputTensorIds(m_Model, subgraphIndex, operatorIndex));
     // Add the first input tensor to the registration list
     std::vector<unsigned int> tensorIndexesToRegister = {inputTensorIndexes[0]};
-    std::vector<unsigned int> ignoreInputWhenRegister = {};
     armnn::TensorInfo inputTensorInfo = InputTensorInfo(subgraphIndex, operatorIndex, 0);
 
     desc.m_ConstantWeights = IsConstTensor(inputs[1]);
@@ -3278,14 +3296,12 @@
         std::string reshapeLayerName = fmt::format("Reshape_for:{}", layer->GetName());
         armnn::ReshapeDescriptor reshapeDescriptor;
         reshapeDescriptor.m_TargetShape = reshapedTensorInfo.GetShape();
-        armnn::IConnectableLayer* reshapeLayer = m_Network->AddReshapeLayer(reshapeDescriptor, layerName.c_str());
+        armnn::IConnectableLayer* reshapeLayer = m_Network->AddReshapeLayer(reshapeDescriptor,
+                                                                            reshapeLayerName.c_str());
 
         reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedTensorInfo);
         reshapeLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
 
-        auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex));
-        m_TensorInfos[outputTensorIndexes[0]] = reshapedTensorInfo;
-
         RegisterInputSlots(subgraphIndex, operatorIndex, reshapeLayer, {inputTensorIndexes[0]});
         // Fc layer connects to the reshape layer, so we skip the first input slot when registering fc's input slots
         tensorIndexesToRegister.erase(tensorIndexesToRegister.begin());
@@ -3297,8 +3313,30 @@
     armnn::TensorInfo outputTensorInfo = OutputTensorInfoFromShapes(subgraphIndex, operatorIndex, layer, 0,
                                                                     { inputTensorInfo.GetShape(),
                                                                       filterTensorInfo.GetShape() });
+
     layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
 
+    if (outputTensorInfo.GetNumDimensions() > 2)
+    {
+        // Calculate reshape to flatten to 2D [batch_size, input_size]
+        std::vector<unsigned int> reshapedDimensions(2);
+        reshapedDimensions[1] = filterTensorInfo.GetShape()[0];
+        reshapedDimensions[0] = outputTensorInfo.GetNumElements() / reshapedDimensions[1];
+        armnn::TensorInfo reshapedOutputTensorInfo = outputTensorInfo;
+        if (outputTensorInfo.GetNumElements() % reshapedDimensions[1] != 0)
+        {
+            throw ParseException(
+                    fmt::format("Failed to deduce output tensor shape from filter size {} {}",
+                                reshapedDimensions[1],
+                                CHECK_LOCATION().AsString()));
+        }
+        reshapedOutputTensorInfo.SetShape(armnn::TensorShape{ 2, reshapedDimensions.data() });
+        layer->GetOutputSlot(0).SetTensorInfo(reshapedOutputTensorInfo);
+
+        std::string reshapeLayerName = fmt::format("ExpandDims:{}:{}", subgraphIndex, operatorIndex);
+        layer = AddReshapeLayer(layer, 0, reshapeLayerName, outputTensorInfo);
+    }
+
     // we need to add the activation layer and fortunately we don't need to care about the data layout
     armnn::IConnectableLayer* fusedActivationLayer = AddFusedActivationLayer(layer, 0,
                                                                              options->fused_activation_function);
@@ -3306,6 +3344,8 @@
     // register the output connection slots for the layer, connections are made after all layers have been created
     auto outputTensorIndexes = AsUnsignedVector(GetOutputTensorIds(m_Model, subgraphIndex, operatorIndex));
     RegisterOutputSlots(subgraphIndex, operatorIndex, fusedActivationLayer, {outputTensorIndexes[0]});
+
+    m_TensorInfos[outputTensorIndexes[0]] = layer->GetOutputSlot(0).GetTensorInfo();
 }
 
 void TfLiteParserImpl::ParseDetectionPostProcess(size_t subgraphIndex, size_t operatorIndex)
@@ -4533,6 +4573,23 @@
     RegisterOutputSlots(subgraphIndex, operatorIndex, layer, {outputTensorIndexes[0]});
 }
 
+armnn::IConnectableLayer* TfLiteParserImpl::AddReshapeLayer(armnn::IConnectableLayer* layer,
+                                                            unsigned int outputSlot,
+                                                            std::string reshapeLayerName,
+                                                            armnn::TensorInfo outputShape)
+{
+    ReshapeDescriptor desc;
+    desc.m_TargetShape = outputShape.GetShape();
+
+    IConnectableLayer* reshapeLayer =
+            m_Network->AddReshapeLayer(desc, reshapeLayerName.c_str());
+
+    auto & prevOutputSlot = layer->GetOutputSlot(outputSlot);
+    prevOutputSlot.Connect(reshapeLayer->GetInputSlot(0));
+    reshapeLayer->GetOutputSlot(0).SetTensorInfo(outputShape);
+    return reshapeLayer;
+}
+
 armnn::IConnectableLayer* TfLiteParserImpl::AddFusedActivationLayer(armnn::IConnectableLayer* prevLayer,
                                                                     unsigned int outputSlot,
                                                                     tflite::ActivationFunctionType activationType)
diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
index cf334cc..327b8a8 100644
--- a/src/armnnTfLiteParser/TfLiteParser.hpp
+++ b/src/armnnTfLiteParser/TfLiteParser.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2018-2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once
@@ -218,6 +218,12 @@
                                   size_t operatorIndex,
                                   armnn::IConnectableLayer* layer);
 
+    /// Attach an reshape layer to the one passed as a parameter
+    armnn::IConnectableLayer* AddReshapeLayer(armnn::IConnectableLayer* layer,
+                                              unsigned int outputSlot,
+                                              std::string reshapeLayerName,
+                                              armnn::TensorInfo outputShape);
+
     /// Attach an activation layer to the one passed as a parameter
     armnn::IConnectableLayer* AddFusedActivationLayer(armnn::IConnectableLayer* layer,
                                                       unsigned int outputSlot,
diff --git a/src/armnnTfLiteParser/test/FullyConnected.cpp b/src/armnnTfLiteParser/test/FullyConnected.cpp
index 108b878..0597776 100644
--- a/src/armnnTfLiteParser/test/FullyConnected.cpp
+++ b/src/armnnTfLiteParser/test/FullyConnected.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -360,7 +360,7 @@
 {
     FullyConnectedWeightsBiasFloat()
             : FullyConnectedFixture("[ 1, 4, 1, 1 ]",     // inputShape
-                                    "[ 1, 1 ]",           // outputShape
+                                    "[ 1, 1, 1, 1 ]",     // outputShape
                                     "[ 1, 4 ]",           // filterShape
                                     "[ 2, 3, 4, 5 ]",     // filterData
                                     "[ 1 ]",              // biasShape
@@ -373,7 +373,7 @@
 
 TEST_CASE_FIXTURE(FullyConnectedWeightsBiasFloat, "FullyConnectedWeightsBiasFloat")
 {
-    RunTest<2, armnn::DataType::Float32>(
+    RunTest<4, armnn::DataType::Float32>(
             0,
             { 10, 20, 30, 40 },
             { 400 });