IVGCVSW-6119 ConstTensorsAsInput: FullyConnected

 * Constant weights and biases are now stored as Constant layers.
 * Updated Serializer, Deserializer and unit tests to reflect this.
 * Updated TfLiteDelegate, TfLiteParser and OnnxParser.
 * Updated Schema with IsConstant and ConstantTensorsAsInputs.
 * Updated Ref backend to handle constant weights and
   bias as inputs rather than reading from member variables.
 * Added dynamic or constant input EndToEnd tests.

!android-nn-driver:5959

Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com>
Change-Id: Ibf3cf437df1100e4b322b0d303c575c6339f9696
diff --git a/src/armnnSerializer/ArmnnSchema.fbs b/src/armnnSerializer/ArmnnSchema.fbs
index a544161..85435a3 100644
--- a/src/armnnSerializer/ArmnnSchema.fbs
+++ b/src/armnnSerializer/ArmnnSchema.fbs
@@ -69,6 +69,7 @@
     quantizationDim:uint;
     dimensionality:uint = 1;
     dimensionSpecificity:[bool];
+    isConstant:bool = false;
 }
 
 struct Connection {
@@ -324,7 +325,7 @@
 table FullyConnectedLayer {
     base:LayerBase;
     descriptor:FullyConnectedDescriptor;
-    weights:ConstTensor;
+    weights:ConstTensor; // ConstTensors are now passed as inputs.
     biases:ConstTensor;
 }
 
@@ -1007,6 +1008,7 @@
 table FeatureCompatibilityVersions {
   bindingIdsScheme:uint = 0;
   weightsLayoutScheme:uint = 0;
+  constantTensorsAsInputs:uint = 0;
 }
 
 // Root type for serialized data is the graph of the network
diff --git a/src/armnnSerializer/ArmnnSchema_generated.h b/src/armnnSerializer/ArmnnSchema_generated.h
index 27550f0..ca2bf0c 100644
--- a/src/armnnSerializer/ArmnnSchema_generated.h
+++ b/src/armnnSerializer/ArmnnSchema_generated.h
@@ -1685,7 +1685,8 @@
     VT_QUANTIZATIONSCALES = 12,
     VT_QUANTIZATIONDIM = 14,
     VT_DIMENSIONALITY = 16,
-    VT_DIMENSIONSPECIFICITY = 18
+    VT_DIMENSIONSPECIFICITY = 18,
+    VT_ISCONSTANT = 20
   };
   const flatbuffers::Vector<uint32_t> *dimensions() const {
     return GetPointer<const flatbuffers::Vector<uint32_t> *>(VT_DIMENSIONS);
@@ -1711,6 +1712,9 @@
   const flatbuffers::Vector<uint8_t> *dimensionSpecificity() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_DIMENSIONSPECIFICITY);
   }
+  bool isConstant() const {
+    return GetField<uint8_t>(VT_ISCONSTANT, 0) != 0;
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyOffset(verifier, VT_DIMENSIONS) &&
@@ -1724,6 +1728,7 @@
            VerifyField<uint32_t>(verifier, VT_DIMENSIONALITY) &&
            VerifyOffset(verifier, VT_DIMENSIONSPECIFICITY) &&
            verifier.VerifyVector(dimensionSpecificity()) &&
+           VerifyField<uint8_t>(verifier, VT_ISCONSTANT) &&
            verifier.EndTable();
   }
 };
@@ -1756,6 +1761,9 @@
   void add_dimensionSpecificity(flatbuffers::Offset<flatbuffers::Vector<uint8_t>> dimensionSpecificity) {
     fbb_.AddOffset(TensorInfo::VT_DIMENSIONSPECIFICITY, dimensionSpecificity);
   }
+  void add_isConstant(bool isConstant) {
+    fbb_.AddElement<uint8_t>(TensorInfo::VT_ISCONSTANT, static_cast<uint8_t>(isConstant), 0);
+  }
   explicit TensorInfoBuilder(flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
     start_ = fbb_.StartTable();
@@ -1777,7 +1785,8 @@
     flatbuffers::Offset<flatbuffers::Vector<float>> quantizationScales = 0,
     uint32_t quantizationDim = 0,
     uint32_t dimensionality = 1,
-    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> dimensionSpecificity = 0) {
+    flatbuffers::Offset<flatbuffers::Vector<uint8_t>> dimensionSpecificity = 0,
+    bool isConstant = false) {
   TensorInfoBuilder builder_(_fbb);
   builder_.add_dimensionSpecificity(dimensionSpecificity);
   builder_.add_dimensionality(dimensionality);
@@ -1786,6 +1795,7 @@
   builder_.add_quantizationOffset(quantizationOffset);
   builder_.add_quantizationScale(quantizationScale);
   builder_.add_dimensions(dimensions);
+  builder_.add_isConstant(isConstant);
   builder_.add_dataType(dataType);
   return builder_.Finish();
 }
@@ -1799,7 +1809,8 @@
     const std::vector<float> *quantizationScales = nullptr,
     uint32_t quantizationDim = 0,
     uint32_t dimensionality = 1,
-    const std::vector<uint8_t> *dimensionSpecificity = nullptr) {
+    const std::vector<uint8_t> *dimensionSpecificity = nullptr,
+    bool isConstant = false) {
   auto dimensions__ = dimensions ? _fbb.CreateVector<uint32_t>(*dimensions) : 0;
   auto quantizationScales__ = quantizationScales ? _fbb.CreateVector<float>(*quantizationScales) : 0;
   auto dimensionSpecificity__ = dimensionSpecificity ? _fbb.CreateVector<uint8_t>(*dimensionSpecificity) : 0;
@@ -1812,7 +1823,8 @@
       quantizationScales__,
       quantizationDim,
       dimensionality,
-      dimensionSpecificity__);
+      dimensionSpecificity__,
+      isConstant);
 }
 
 struct ByteData FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
@@ -10124,7 +10136,8 @@
   typedef FeatureCompatibilityVersionsBuilder Builder;
   enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE {
     VT_BINDINGIDSSCHEME = 4,
-    VT_WEIGHTSLAYOUTSCHEME = 6
+    VT_WEIGHTSLAYOUTSCHEME = 6,
+    VT_CONSTANTTENSORSASINPUTS = 8
   };
   uint32_t bindingIdsScheme() const {
     return GetField<uint32_t>(VT_BINDINGIDSSCHEME, 0);
@@ -10132,10 +10145,14 @@
   uint32_t weightsLayoutScheme() const {
     return GetField<uint32_t>(VT_WEIGHTSLAYOUTSCHEME, 0);
   }
+  uint32_t constantTensorsAsInputs() const {
+    return GetField<uint32_t>(VT_CONSTANTTENSORSASINPUTS, 0);
+  }
   bool Verify(flatbuffers::Verifier &verifier) const {
     return VerifyTableStart(verifier) &&
            VerifyField<uint32_t>(verifier, VT_BINDINGIDSSCHEME) &&
            VerifyField<uint32_t>(verifier, VT_WEIGHTSLAYOUTSCHEME) &&
+           VerifyField<uint32_t>(verifier, VT_CONSTANTTENSORSASINPUTS) &&
            verifier.EndTable();
   }
 };
@@ -10150,6 +10167,9 @@
   void add_weightsLayoutScheme(uint32_t weightsLayoutScheme) {
     fbb_.AddElement<uint32_t>(FeatureCompatibilityVersions::VT_WEIGHTSLAYOUTSCHEME, weightsLayoutScheme, 0);
   }
+  void add_constantTensorsAsInputs(uint32_t constantTensorsAsInputs) {
+    fbb_.AddElement<uint32_t>(FeatureCompatibilityVersions::VT_CONSTANTTENSORSASINPUTS, constantTensorsAsInputs, 0);
+  }
   explicit FeatureCompatibilityVersionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
         : fbb_(_fbb) {
     start_ = fbb_.StartTable();
@@ -10165,8 +10185,10 @@
 inline flatbuffers::Offset<FeatureCompatibilityVersions> CreateFeatureCompatibilityVersions(
     flatbuffers::FlatBufferBuilder &_fbb,
     uint32_t bindingIdsScheme = 0,
-    uint32_t weightsLayoutScheme = 0) {
+    uint32_t weightsLayoutScheme = 0,
+    uint32_t constantTensorsAsInputs = 0) {
   FeatureCompatibilityVersionsBuilder builder_(_fbb);
+  builder_.add_constantTensorsAsInputs(constantTensorsAsInputs);
   builder_.add_weightsLayoutScheme(weightsLayoutScheme);
   builder_.add_bindingIdsScheme(bindingIdsScheme);
   return builder_.Finish();
diff --git a/src/armnnSerializer/Serializer.cpp b/src/armnnSerializer/Serializer.cpp
index 44cd180..195b416 100644
--- a/src/armnnSerializer/Serializer.cpp
+++ b/src/armnnSerializer/Serializer.cpp
@@ -1126,7 +1126,6 @@
 // Build FlatBuffer for FullyConnected Layer
 void SerializerStrategy::SerializeFullyConnectedLayer(const armnn::IConnectableLayer* layer,
                                                       const armnn::FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                                      const std::vector<armnn::ConstTensor>& constants,
                                                       const char*)
 {
     // Create FlatBuffer BaseLayer
@@ -1139,28 +1138,10 @@
                                                    fullyConnectedDescriptor.m_TransposeWeightMatrix,
                                                    fullyConnectedDescriptor.m_ConstantWeights);
 
-    // Create FlatBuffer weights data
-    flatbuffers::Offset<serializer::ConstTensor> flatBufferWeights;
-    // Create FlatBuffer bias data
-    flatbuffers::Offset<serializer::ConstTensor> flatBufferBiases;
-    if (fullyConnectedDescriptor.m_ConstantWeights && !constants.empty())
-    {
-        armnn::ConstTensor weights = constants.at(0);
-        flatBufferWeights = CreateConstTensorInfo(weights);
-
-        if (fullyConnectedDescriptor.m_BiasEnabled)
-        {
-            armnn::ConstTensor biases = constants.at(1);
-            flatBufferBiases = CreateConstTensorInfo(biases);
-        }
-    }
-
     // Create FlatBuffer FullyConnectedLayer
     auto flatBufferLayer = serializer::CreateFullyConnectedLayer(m_flatBufferBuilder,
                                                                  flatBufferBaseLayer,
-                                                                 flatBufferDescriptor,
-                                                                 flatBufferWeights,
-                                                                 flatBufferBiases);
+                                                                 flatBufferDescriptor);
 
     // Add created FullyConnectedLayer to the FlatBufferLayers
     CreateAnyLayer(flatBufferLayer.o, serializer::Layer::Layer_FullyConnectedLayer);
@@ -1916,7 +1897,8 @@
         serializer::CreateFeatureCompatibilityVersions(
                 m_flatBufferBuilder,
                 1, // Binding ids scheme version
-                1  // Weights layout scheme version
+                1, // Weights layout scheme version
+                1  // Constant tensors as inputs version
             );
     return versionsTable;
 }
@@ -2110,7 +2092,7 @@
         {
             const armnn::FullyConnectedDescriptor& layerDescriptor =
                     static_cast<const armnn::FullyConnectedDescriptor&>(descriptor);
-            SerializeFullyConnectedLayer(layer, layerDescriptor, constants, name);
+            SerializeFullyConnectedLayer(layer, layerDescriptor, name);
             break;
         }
         case armnn::LayerType::Gather :
diff --git a/src/armnnSerializer/Serializer.hpp b/src/armnnSerializer/Serializer.hpp
index dead873..18b2cc7 100644
--- a/src/armnnSerializer/Serializer.hpp
+++ b/src/armnnSerializer/Serializer.hpp
@@ -184,7 +184,6 @@
 
     void SerializeFullyConnectedLayer(const armnn::IConnectableLayer* layer,
                                       const armnn::FullyConnectedDescriptor& fullyConnectedDescriptor,
-                                      const std::vector<armnn::ConstTensor>& constants,
                                       const char* name = nullptr);
 
     void SerializeGatherLayer(const armnn::IConnectableLayer* layer,
diff --git a/src/armnnSerializer/test/SerializerTests.cpp b/src/armnnSerializer/test/SerializerTests.cpp
index 98532d0..9e9df0d 100644
--- a/src/armnnSerializer/test/SerializerTests.cpp
+++ b/src/armnnSerializer/test/SerializerTests.cpp
@@ -789,6 +789,41 @@
     deserializedNetwork->ExecuteStrategy(verifier);
 }
 
+using FullyConnectedDescriptor = armnn::FullyConnectedDescriptor;
+class FullyConnectedLayerVerifier : public LayerVerifierBaseWithDescriptor<FullyConnectedDescriptor>
+{
+public:
+    FullyConnectedLayerVerifier(const std::string& layerName,
+                        const std::vector<armnn::TensorInfo>& inputInfos,
+                        const std::vector<armnn::TensorInfo>& outputInfos,
+                        const FullyConnectedDescriptor& descriptor)
+        : LayerVerifierBaseWithDescriptor<FullyConnectedDescriptor>(layerName, inputInfos, outputInfos, descriptor) {}
+
+    void ExecuteStrategy(const armnn::IConnectableLayer* layer,
+                         const armnn::BaseDescriptor& descriptor,
+                         const std::vector<armnn::ConstTensor>& constants,
+                         const char* name,
+                         const armnn::LayerBindingId id = 0) override
+    {
+        armnn::IgnoreUnused(constants, id);
+        switch (layer->GetType())
+        {
+            case armnn::LayerType::Input: break;
+            case armnn::LayerType::Output: break;
+            case armnn::LayerType::Constant: break;
+            default:
+            {
+                VerifyNameAndConnections(layer, name);
+                const FullyConnectedDescriptor& layerDescriptor =
+                        static_cast<const FullyConnectedDescriptor&>(descriptor);
+                CHECK(layerDescriptor.m_ConstantWeights == m_Descriptor.m_ConstantWeights);
+                CHECK(layerDescriptor.m_BiasEnabled == m_Descriptor.m_BiasEnabled);
+                CHECK(layerDescriptor.m_TransposeWeightMatrix == m_Descriptor.m_TransposeWeightMatrix);
+            }
+        }
+    }
+};
+
 TEST_CASE("SerializeFullyConnected")
 {
     const std::string layerName("fullyConnected");
@@ -809,11 +844,16 @@
 
     armnn::INetworkPtr network = armnn::INetwork::Create();
     armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0);
+
+    // Old way of handling constant tensors.
+    ARMNN_NO_DEPRECATE_WARN_BEGIN
     armnn::IConnectableLayer* const fullyConnectedLayer =
         network->AddFullyConnectedLayer(descriptor,
                                         weights,
                                         armnn::Optional<armnn::ConstTensor>(biases),
                                         layerName.c_str());
+    ARMNN_NO_DEPRECATE_WARN_END
+
     armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
 
     inputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
@@ -825,13 +865,11 @@
     armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network));
     CHECK(deserializedNetwork);
 
-    const std::vector<armnn::ConstTensor> constants {weights, biases};
-    LayerVerifierBaseWithDescriptorAndConstants<armnn::FullyConnectedDescriptor> verifier(
-            layerName, {inputInfo}, {outputInfo}, descriptor, constants);
+    FullyConnectedLayerVerifier verifier(layerName, {inputInfo, weightsInfo, biasesInfo}, {outputInfo}, descriptor);
     deserializedNetwork->ExecuteStrategy(verifier);
 }
 
-TEST_CASE("SerializeFullyConnectedWeightsAsInputs")
+TEST_CASE("SerializeFullyConnectedWeightsAndBiasesAsInputs")
 {
     const std::string layerName("fullyConnected_weights_as_inputs");
     const armnn::TensorInfo inputInfo ({ 2, 5, 1, 1 }, armnn::DataType::Float32);
@@ -854,8 +892,6 @@
     armnn::IConnectableLayer* const biasInputLayer = network->AddInputLayer(2);
     armnn::IConnectableLayer* const fullyConnectedLayer =
         network->AddFullyConnectedLayer(descriptor,
-                                        weights,
-                                        bias,
                                         layerName.c_str());
     armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
 
@@ -878,6 +914,49 @@
     deserializedNetwork->ExecuteStrategy(verifier);
 }
 
+TEST_CASE("SerializeFullyConnectedWeightsAndBiasesAsConstantLayers")
+{
+    const std::string layerName("fullyConnected_weights_as_inputs");
+    const armnn::TensorInfo inputInfo ({ 2, 5, 1, 1 }, armnn::DataType::Float32);
+    const armnn::TensorInfo outputInfo({ 2, 3 }, armnn::DataType::Float32);
+
+    const armnn::TensorInfo weightsInfo({ 5, 3 }, armnn::DataType::Float32);
+    const armnn::TensorInfo biasesInfo ({ 3 }, armnn::DataType::Float32);
+
+    std::vector<float> weightsData = GenerateRandomData<float>(weightsInfo.GetNumElements());
+    std::vector<float> biasesData  = GenerateRandomData<float>(biasesInfo.GetNumElements());
+    armnn::ConstTensor weights(weightsInfo, weightsData);
+    armnn::ConstTensor biases(biasesInfo, biasesData);
+
+    armnn::FullyConnectedDescriptor descriptor;
+    descriptor.m_BiasEnabled = true;
+    descriptor.m_TransposeWeightMatrix = false;
+    descriptor.m_ConstantWeights = true;
+
+    armnn::INetworkPtr network = armnn::INetwork::Create();
+    armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0);
+    armnn::IConnectableLayer* const weightsLayer = network->AddConstantLayer(weights, "Weights");
+    armnn::IConnectableLayer* const biasesLayer = network->AddConstantLayer(biases, "Biases");
+    armnn::IConnectableLayer* const fullyConnectedLayer = network->AddFullyConnectedLayer(descriptor,layerName.c_str());
+    armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
+
+    inputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
+    weightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
+    biasesLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(2));
+    fullyConnectedLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+    inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
+    weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
+    biasesLayer->GetOutputSlot(0).SetTensorInfo(biasesInfo);
+    fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+    armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network));
+    CHECK(deserializedNetwork);
+
+    FullyConnectedLayerVerifier verifier(layerName, {inputInfo, weightsInfo, biasesInfo}, {outputInfo}, descriptor);
+    deserializedNetwork->ExecuteStrategy(verifier);
+}
+
 TEST_CASE("SerializeGather")
 {
     using GatherDescriptor = armnn::GatherDescriptor;