IVGCVSW-6119 ConstTensorsAsInput: FullyConnected

 * Constant weights and biases are now stored as Constant layers.
 * Updated Serializer, Deserializer and unit tests to reflect this.
 * Updated TfLiteDelegate, TfLiteParser and OnnxParser.
 * Updated Schema with IsConstant and ConstantTensorsAsInputs.
 * Updated Ref backend to handle constant weights and
   bias as inputs rather than reading from member variables.
 * Added dynamic or constant input EndToEnd tests.

!android-nn-driver:5959

Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com>
Change-Id: Ibf3cf437df1100e4b322b0d303c575c6339f9696
diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp
index 2d9194a..5c99496 100644
--- a/src/armnnDeserializer/Deserializer.cpp
+++ b/src/armnnDeserializer/Deserializer.cpp
@@ -688,6 +688,7 @@
 {
     CHECK_CONST_TENSOR_PTR(constTensorPtr);
     armnn::TensorInfo tensorInfo = ToTensorInfo(constTensorPtr->info());
+    tensorInfo.SetConstant();
 
     switch (constTensorPtr->data_type())
     {
@@ -938,6 +939,7 @@
     {
         versions.m_BindingIdScheme = graph->featureVersions()->bindingIdsScheme();
         versions.m_WeightsLayoutScheme = graph->featureVersions()->weightsLayoutScheme();
+        versions.m_ConstTensorsAsInputs = graph->featureVersions()->constantTensorsAsInputs();
     }
 
     return versions;
@@ -1052,13 +1054,15 @@
 }
 
 void IDeserializer::DeserializerImpl::RegisterInputSlots(GraphPtr graph,
-                                      uint32_t layerIndex,
-                                      armnn::IConnectableLayer* layer)
+                                                         uint32_t layerIndex,
+                                                         armnn::IConnectableLayer* layer,
+                                                         std::vector<unsigned int> ignoreSlots)
 {
     CHECK_LAYERS(graph, 0, layerIndex);
     ARMNN_ASSERT(layer != nullptr);
     LayerBaseRawPtr baseLayer = GetBaseLayer(graph, layerIndex);
-    if (baseLayer->inputSlots()->size() != layer->GetNumInputSlots())
+
+    if (baseLayer->inputSlots()->size() != (layer->GetNumInputSlots() - ignoreSlots.size()))
     {
         throw ParseException(fmt::format("The number of inputslots ({0}) does not match the number expected ({1})"
                                          " for layer index:{2} {3}",
@@ -1070,10 +1074,14 @@
 
     for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
     {
-        auto fbInputSlot = baseLayer->inputSlots()->Get(i);
-        auto fbConnection = fbInputSlot->connection();
-        armnn::IInputSlot* inputSlot = &(layer->GetInputSlot(fbInputSlot->index()));
-        RegisterInputSlotOfConnection(fbConnection->sourceLayerIndex(), fbConnection->outputSlotIndex(), inputSlot);
+        // Check if slot should be ignored.
+        if (std::find(ignoreSlots.begin(), ignoreSlots.end(), i) == ignoreSlots.end())
+        {
+            auto fbInputSlot = baseLayer->inputSlots()->Get(i);
+            auto fbConnection = fbInputSlot->connection();
+            armnn::IInputSlot* inputSlot = &(layer->GetInputSlot(fbInputSlot->index()));
+            RegisterInputSlotOfConnection(fbConnection->sourceLayerIndex(), fbConnection->outputSlotIndex(), inputSlot);
+        }
     }
 }
 
@@ -1924,40 +1932,47 @@
     fullyConnectedDescriptor.m_BiasEnabled = flatBufferDescriptor->biasEnabled();
     fullyConnectedDescriptor.m_TransposeWeightMatrix = flatBufferDescriptor->transposeWeightsMatrix();
     fullyConnectedDescriptor.m_ConstantWeights = flatBufferDescriptor->constantWeights();
-    uint32_t numInputs = 1;
-    if (!fullyConnectedDescriptor.m_ConstantWeights)
+
+    armnn::IConnectableLayer* layer;
+    std::vector<unsigned int> ignoreSlots {};
+
+    // Weights and biases used to be always constant and were stored as members of the layer. This has changed and
+    // they are now passed as inputs. If they are constant then they will be stored in a ConstantLayer.
+    if (this->GetFeatureVersions(graph).m_ConstTensorsAsInputs <= 0)
     {
-        numInputs = 2;
+        // If the model stores weights and biases as members of the layer we have to read them from there
+        // but add them to their own ConstantLayer for compatibility
+        CHECK_VALID_SIZE(inputs.size(), 1);
+        layer = m_Network->AddFullyConnectedLayer(fullyConnectedDescriptor,
+                                                  layerName.c_str());
+
+        armnn::ConstTensor weightsTensor = ToConstTensor(flatBufferLayer->weights());
+        auto weightsLayer = m_Network->AddConstantLayer(weightsTensor);
+        weightsLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1u));
+        weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsTensor.GetInfo());
+        ignoreSlots.emplace_back(1u);
+
         if (fullyConnectedDescriptor.m_BiasEnabled)
         {
-            numInputs = 3;
+            armnn::ConstTensor biasTensor = ToConstTensor(flatBufferLayer->biases());
+            auto biasLayer = m_Network->AddConstantLayer(biasTensor);
+            biasLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(2u));
+            biasLayer->GetOutputSlot(0).SetTensorInfo(biasTensor.GetInfo());
+            ignoreSlots.emplace_back(2u);
         }
     }
-    CHECK_VALID_SIZE(inputs.size(), numInputs);
-
-    armnn::Optional <armnn::ConstTensor> optionalWeights = armnn::EmptyOptional();
-    armnn::Optional<armnn::ConstTensor> optionalBiases = armnn::EmptyOptional();
-    if (fullyConnectedDescriptor.m_ConstantWeights)
+    else
     {
-        armnn::ConstTensor weightsTensorData = ToConstTensor(flatBufferLayer->weights());
-        optionalWeights = armnn::Optional<armnn::ConstTensor>(weightsTensorData);
-
-        if (flatBufferDescriptor->biasEnabled())
-        {
-            armnn::ConstTensor biasTensorData = ToConstTensor(flatBufferLayer->biases());
-            optionalBiases = armnn::Optional<armnn::ConstTensor>(biasTensorData);
-        }
+        layer = m_Network->AddFullyConnectedLayer(fullyConnectedDescriptor,
+                                                  layerName.c_str());
+        uint32_t numInputs = fullyConnectedDescriptor.GetNumInputs();
+        CHECK_VALID_SIZE(inputs.size(), numInputs);
     }
 
-    armnn::IConnectableLayer* layer = m_Network->AddFullyConnectedLayer(fullyConnectedDescriptor,
-                                                                        optionalWeights,
-                                                                        optionalBiases,
-                                                                        layerName.c_str());
-
     armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
     layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
 
-    RegisterInputSlots(graph, layerIndex, layer);
+    RegisterInputSlots(graph, layerIndex, layer, ignoreSlots);
     RegisterOutputSlots(graph, layerIndex, layer);
 }
 
diff --git a/src/armnnDeserializer/Deserializer.hpp b/src/armnnDeserializer/Deserializer.hpp
index b1362c4..b4dc68b 100644
--- a/src/armnnDeserializer/Deserializer.hpp
+++ b/src/armnnDeserializer/Deserializer.hpp
@@ -143,9 +143,12 @@
     void ParseTransposeConvolution2d(GraphPtr graph, unsigned int layerIndex);
     void ParseUnidirectionalSequenceLstm(GraphPtr graph, unsigned int layerIndex);
 
-    void RegisterInputSlots(GraphPtr graph, uint32_t layerIndex,
-                            armnn::IConnectableLayer* layer);
-    void RegisterOutputSlots(GraphPtr graph, uint32_t layerIndex,
+    void RegisterInputSlots(GraphPtr graph,
+                            uint32_t layerIndex,
+                            armnn::IConnectableLayer* layer,
+                            std::vector<unsigned int> ignoreSlots = {});
+    void RegisterOutputSlots(GraphPtr graph,
+                             uint32_t layerIndex,
                              armnn::IConnectableLayer* layer);
 
     // NOTE index here must be from flatbuffer object index property
@@ -171,6 +174,9 @@
 
         // Default values to zero for backward compatibility
         unsigned int m_WeightsLayoutScheme = 0;
+
+        // Default values to zero for backward compatibility
+        unsigned int m_ConstTensorsAsInputs = 0;
     };
 
     FeatureVersions GetFeatureVersions(GraphPtr graph);
diff --git a/src/armnnDeserializer/test/DeserializeFullyConnected.cpp b/src/armnnDeserializer/test/DeserializeFullyConnected.cpp
index da2db08..5e298d1 100644
--- a/src/armnnDeserializer/test/DeserializeFullyConnected.cpp
+++ b/src/armnnDeserializer/test/DeserializeFullyConnected.cpp
@@ -117,22 +117,241 @@
     }
 };
 
+
+struct FullyConnectedFixtureConstantAsInput : public ParserFlatbuffersSerializeFixture
+{
+    explicit FullyConnectedFixtureConstantAsInput()
+    {
+        m_JsonString = R"(
+    {
+      "layers": [
+        {
+          "layer_type": "InputLayer",
+          "layer": {
+            "base": {
+              "base": {
+                "index": 0,
+                "layerName": "InputLayer",
+                "layerType": "Input",
+                "inputSlots": [
+
+                ],
+                "outputSlots": [
+                  {
+                    "index": 0,
+                    "tensorInfo": {
+                      "dimensions": [
+                        1,
+                        4,
+                        1,
+                        1
+                      ],
+                      "dataType": "QAsymmU8",
+                      "quantizationScale": 1.0,
+                      "quantizationOffset": 0,
+                      "quantizationDim": 0,
+                      "dimensionality": 1,
+                      "dimensionSpecificity": [
+                        true,
+                        true,
+                        true,
+                        true
+                      ]
+                    }
+                  }
+                ]
+              },
+              "layerBindingId": 0
+            }
+          }
+        },
+        {
+          "layer_type": "FullyConnectedLayer",
+          "layer": {
+            "base": {
+              "index": 1,
+              "layerName": "FullyConnectedLayer",
+              "layerType": "FullyConnected",
+              "inputSlots": [
+                {
+                  "index": 0,
+                  "connection": {
+                    "sourceLayerIndex": 0,
+                    "outputSlotIndex": 0
+                  }
+                },
+                {
+                  "index": 1,
+                  "connection": {
+                    "sourceLayerIndex": 2,
+                    "outputSlotIndex": 0
+                  }
+                }
+              ],
+              "outputSlots": [
+                {
+                  "index": 0,
+                  "tensorInfo": {
+                    "dimensions": [
+                      1,
+                      1
+                    ],
+                    "dataType": "QAsymmU8",
+                    "quantizationScale": 2.0,
+                    "quantizationOffset": 0,
+                    "quantizationDim": 0,
+                    "dimensionality": 1,
+                    "dimensionSpecificity": [
+                      true,
+                      true
+                    ]
+                  }
+                }
+              ]
+            },
+            "descriptor": {
+              "biasEnabled": false,
+              "transposeWeightsMatrix": true,
+              "constantWeights": true
+            }
+          }
+        },
+        {
+          "layer_type": "ConstantLayer",
+          "layer": {
+            "base": {
+              "index": 2,
+              "layerName": "",
+              "layerType": "Constant",
+              "inputSlots": [
+
+              ],
+              "outputSlots": [
+                {
+                  "index": 0,
+                  "tensorInfo": {
+                    "dimensions": [
+                      1,
+                      4
+                    ],
+                    "dataType": "QAsymmU8",
+                    "quantizationScale": 1.0,
+                    "quantizationOffset": 0,
+                    "quantizationDim": 0,
+                    "dimensionality": 1,
+                    "dimensionSpecificity": [
+                      true,
+                      true
+                    ],
+                    "isConstant": true,
+                  }
+                }
+              ]
+            },
+            "input": {
+              "info": {
+                "dimensions": [
+                  1,
+                  4
+                ],
+                "dataType": "QAsymmU8",
+                "quantizationScale": 1.0,
+                "quantizationOffset": 0,
+                "quantizationDim": 0,
+                "dimensionality": 1,
+                "dimensionSpecificity": [
+                  true,
+                  true
+                ]
+              },
+              "data_type": "ByteData",
+              "data": {
+                "data": [
+                  2,
+                  3,
+                  4,
+                  5
+                ]
+              }
+            }
+          }
+        },
+        {
+          "layer_type": "OutputLayer",
+          "layer": {
+            "base": {
+              "base": {
+                "index": 3,
+                "layerName": "OutputLayer",
+                "layerType": "Output",
+                "inputSlots": [
+                  {
+                    "index": 0,
+                    "connection": {
+                      "sourceLayerIndex": 1,
+                      "outputSlotIndex": 0
+                    }
+                  }
+                ],
+                "outputSlots": [
+
+                ]
+              },
+              "layerBindingId": 0
+            }
+          }
+        }
+      ],
+      "inputIds": [
+        0
+      ],
+      "outputIds": [
+        0
+      ],
+      "featureVersions": {
+        "bindingIdsScheme": 1,
+        "weightsLayoutScheme": 1,
+        "constantTensorsAsInputs": 1
+      }
+    }
+    )";
+        Setup();
+    }
+};
+
 struct FullyConnectedWithNoBiasFixture : FullyConnectedFixture
 {
     FullyConnectedWithNoBiasFixture()
-        : FullyConnectedFixture("[ 1, 4, 1, 1 ]",     // inputShape
-                                "[ 1, 1 ]",           // outputShape
-                                "[ 1, 4 ]",           // filterShape
-                                "QuantisedAsymm8")     // filterData
+            : FullyConnectedFixture("[ 1, 4, 1, 1 ]",     // inputShape
+                                    "[ 1, 1 ]",           // outputShape
+                                    "[ 1, 4 ]",           // filterShape
+                                    "QuantisedAsymm8")    // filterData
     {}
 };
 
 TEST_CASE_FIXTURE(FullyConnectedWithNoBiasFixture, "FullyConnectedWithNoBias")
 {
+    // Weights and biases used to be always constant and were stored as members of the layer. This has changed and
+    // they are now passed as inputs (ConstantLayer) but the old way can still be used for now.
     RunTest<2, armnn::DataType::QAsymmU8>(
-         0,
-         {{"InputLayer",  { 10, 20, 30, 40 }}},
-         {{"OutputLayer", { 400/2 }}});
+            0,
+            {{"InputLayer",  { 10, 20, 30, 40 }}},
+            {{"OutputLayer", { 400/2 }}});
+}
+
+struct FullyConnectedWithNoBiasFixtureConstantAsInput : FullyConnectedFixtureConstantAsInput
+{
+    FullyConnectedWithNoBiasFixtureConstantAsInput()
+            : FullyConnectedFixtureConstantAsInput()
+    {}
+};
+
+TEST_CASE_FIXTURE(FullyConnectedWithNoBiasFixtureConstantAsInput, "FullyConnectedWithNoBiasConstantAsInput")
+{
+    RunTest<2, armnn::DataType::QAsymmU8>(
+            0,
+            {{"InputLayer",  { 10, 20, 30, 40 }}},
+            {{"OutputLayer", { 400/2 }}});
 }
 
 }