IVGCVSW-4555 ArmnnConverter (Serializer) does not support per-axis quantization params

* TensorInfo can have multiple scales and quantization dimension.

Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Change-Id: I0ff02e3766996b6a9da6dc4e92d366bc9505c77d
diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp
index bc6fbf0..58232a2 100644
--- a/src/armnnDeserializer/Deserializer.cpp
+++ b/src/armnnDeserializer/Deserializer.cpp
@@ -508,6 +508,9 @@
         case DataType_QAsymmS8:
             type = armnn::DataType::QAsymmS8;
             break;
+        case DataType_QSymmS8:
+            type = armnn::DataType::QSymmS8;
+            break;
         case DataType_QuantisedAsymm8:
         case DataType_QAsymmU8:
             type = armnn::DataType::QAsymmU8;
@@ -539,13 +542,30 @@
                             location.AsString()));
         }
     }
-    float quantizationScale = tensorPtr->quantizationScale();
-    int32_t quantizationOffset = tensorPtr->quantizationOffset();
+
 
     auto dimensions = tensorPtr->dimensions();
     unsigned int size = dimensions->size();
     std::vector<unsigned int> outputDims(dimensions->begin(), dimensions->begin() + size);
 
+    auto quantizationScales = tensorPtr->quantizationScales();
+
+    if (quantizationScales)
+    {
+        unsigned int quantizationScalesSize = quantizationScales->size();
+        std::vector<float> scales(quantizationScales->begin(), quantizationScales->begin() + quantizationScalesSize);
+        unsigned int quantizationDim = tensorPtr->quantizationDim();
+        armnn::TensorInfo result(size,
+                                 outputDims.data(),
+                                 type,
+                                 scales,
+                                 quantizationDim);
+        return result;
+    }
+
+    float quantizationScale = tensorPtr->quantizationScale();
+    int32_t quantizationOffset = tensorPtr->quantizationOffset();
+
     // two statements (on purpose) for easier debugging:
     armnn::TensorInfo result(size,
                              outputDims.data(),
diff --git a/src/armnnSerializer/ArmnnSchema.fbs b/src/armnnSerializer/ArmnnSchema.fbs
index ca3db5d..ff79f6c 100644
--- a/src/armnnSerializer/ArmnnSchema.fbs
+++ b/src/armnnSerializer/ArmnnSchema.fbs
@@ -38,7 +38,8 @@
     QuantisedSymm16 = 5, // deprecated
     QAsymmU8 = 6,
     QSymmS16 = 7,
-    QAsymmS8 = 8
+    QAsymmS8 = 8,
+    QSymmS8 = 9
 }
 
 enum DataLayout : byte {
@@ -54,8 +55,10 @@
 table TensorInfo {
     dimensions:[uint];
     dataType:DataType;
-    quantizationScale:float = 1.0;
+    quantizationScale:float = 1.0; // @deprecated Use quantizationScales instead
     quantizationOffset:int = 0;
+    quantizationScales:[float];
+    quantizationDim:uint;
 }
 
 struct Connection {
diff --git a/src/armnnSerializer/Serializer.cpp b/src/armnnSerializer/Serializer.cpp
index 37ab326..cb7a5c4 100644
--- a/src/armnnSerializer/Serializer.cpp
+++ b/src/armnnSerializer/Serializer.cpp
@@ -1420,25 +1420,43 @@
     return fbVector;
 }
 
-flatbuffers::Offset<serializer::ConstTensor>
-    SerializerVisitor::CreateConstTensorInfo(const armnn::ConstTensor& constTensor)
+flatbuffers::Offset<TensorInfo>  SerializerVisitor::CreateTensorInfo(const armnn::TensorInfo& tensorInfo)
 {
-    armnn::TensorInfo tensorInfo = constTensor.GetInfo();
-
     // Get the dimensions
     std::vector<unsigned int> shape;
-
     for(unsigned int dim = 0; dim < tensorInfo.GetShape().GetNumDimensions(); ++dim)
     {
         shape.push_back(tensorInfo.GetShape()[dim]);
     }
 
+    if (tensorInfo.HasPerAxisQuantization())
+    {
+        // Create FlatBuffer TensorInfo
+        auto flatBufferTensorInfo =
+            serializer::CreateTensorInfo(m_flatBufferBuilder,
+                                         m_flatBufferBuilder.CreateVector(shape),
+                                         GetFlatBufferDataType(tensorInfo.GetDataType()),
+                                         tensorInfo.GetQuantizationScales()[0],
+                                         tensorInfo.GetQuantizationOffset(),
+                                         m_flatBufferBuilder.CreateVector(tensorInfo.GetQuantizationScales()),
+                                         tensorInfo.GetQuantizationDim().value());
+        return flatBufferTensorInfo;
+    }
+
     // Create FlatBuffer TensorInfo
     auto flatBufferTensorInfo = serializer::CreateTensorInfo(m_flatBufferBuilder,
                                                              m_flatBufferBuilder.CreateVector(shape),
                                                              GetFlatBufferDataType(tensorInfo.GetDataType()),
                                                              tensorInfo.GetQuantizationScale(),
                                                              tensorInfo.GetQuantizationOffset());
+    return flatBufferTensorInfo;
+}
+
+flatbuffers::Offset<serializer::ConstTensor>
+    SerializerVisitor::CreateConstTensorInfo(const armnn::ConstTensor& constTensor)
+{
+    armnn::TensorInfo tensorInfo = constTensor.GetInfo();
+
     flatbuffers::Offset<void> fbPayload;
 
     switch (tensorInfo.GetDataType())
@@ -1471,6 +1489,7 @@
             fbPayload = flatBuffersData.o;
             break;
         }
+        case armnn::DataType::QSymmS8:
         case armnn::DataType::QAsymmU8:
         case armnn::DataType::Boolean:
         default:
@@ -1484,7 +1503,7 @@
     }
     flatbuffers::Offset<serializer::ConstTensor> flatBufferConstTensor = serializer::CreateConstTensor(
             m_flatBufferBuilder,
-            flatBufferTensorInfo,
+            CreateTensorInfo(tensorInfo),
             GetFlatBufferConstTensorData(tensorInfo.GetDataType()),
             fbPayload);
     return flatBufferConstTensor;
@@ -1533,24 +1552,10 @@
         const IOutputSlot& outputSlot = layer->GetOutputSlot(slotIndex);
         const armnn::TensorInfo& tensorInfo = outputSlot.GetTensorInfo();
 
-        // Get the dimensions
-        std::vector<unsigned int> shape;
-        for(unsigned int dim = 0; dim < tensorInfo.GetShape().GetNumDimensions(); ++dim)
-        {
-            shape.push_back(tensorInfo.GetShape()[dim]);
-        }
-
-        // Create FlatBuffer TensorInfo
-        auto flatBufferTensorInfo = serializer::CreateTensorInfo(m_flatBufferBuilder,
-                                                                 m_flatBufferBuilder.CreateVector(shape),
-                                                                 GetFlatBufferDataType(tensorInfo.GetDataType()),
-                                                                 tensorInfo.GetQuantizationScale(),
-                                                                 tensorInfo.GetQuantizationOffset());
-
         // Create FlatBuffer Outputslot
         outputSlots.push_back(serializer::CreateOutputSlot(m_flatBufferBuilder,
                                                            slotIndex,
-                                                           flatBufferTensorInfo));
+                                                           CreateTensorInfo(tensorInfo)));
     }
     return outputSlots;
 }
diff --git a/src/armnnSerializer/Serializer.hpp b/src/armnnSerializer/Serializer.hpp
index 806caef..6dd6558 100644
--- a/src/armnnSerializer/Serializer.hpp
+++ b/src/armnnSerializer/Serializer.hpp
@@ -293,6 +293,9 @@
     flatbuffers::Offset<armnnSerializer::ConstTensor> CreateConstTensorInfo(
             const armnn::ConstTensor& constTensor);
 
+    /// Creates the serializer TensorInfo for the armnn TensorInfo.
+    flatbuffers::Offset<TensorInfo>  CreateTensorInfo(const armnn::TensorInfo& tensorInfo);
+
     template <typename T>
     flatbuffers::Offset<flatbuffers::Vector<T>> CreateDataVector(const void* memory, unsigned int size);
 
diff --git a/src/armnnSerializer/SerializerUtils.cpp b/src/armnnSerializer/SerializerUtils.cpp
index c184771..5566abf 100644
--- a/src/armnnSerializer/SerializerUtils.cpp
+++ b/src/armnnSerializer/SerializerUtils.cpp
@@ -39,6 +39,7 @@
         case armnn::DataType::QSymmS16:
             return armnnSerializer::ConstTensorData::ConstTensorData_ShortData;
         case armnn::DataType::QAsymmU8:
+        case armnn::DataType::QSymmS8:
         case armnn::DataType::Boolean:
             return armnnSerializer::ConstTensorData::ConstTensorData_ByteData;
         default:
@@ -62,6 +63,8 @@
             return armnnSerializer::DataType::DataType_QAsymmS8;
         case armnn::DataType::QAsymmU8:
             return armnnSerializer::DataType::DataType_QAsymmU8;
+        case armnn::DataType::QSymmS8:
+            return armnnSerializer::DataType::DataType_QSymmS8;
         case armnn::DataType::Boolean:
             return armnnSerializer::DataType::DataType_Boolean;
         default:
diff --git a/src/armnnSerializer/test/SerializerTests.cpp b/src/armnnSerializer/test/SerializerTests.cpp
index 8c9c92b..db89430 100644
--- a/src/armnnSerializer/test/SerializerTests.cpp
+++ b/src/armnnSerializer/test/SerializerTests.cpp
@@ -214,6 +214,10 @@
             CompareConstTensorData<const uint8_t*>(
                 tensor1.GetMemoryArea(), tensor2.GetMemoryArea(), tensor1.GetNumElements());
             break;
+        case armnn::DataType::QSymmS8:
+            CompareConstTensorData<const int8_t*>(
+                tensor1.GetMemoryArea(), tensor2.GetMemoryArea(), tensor1.GetNumElements());
+            break;
         case armnn::DataType::Signed32:
             CompareConstTensorData<const int32_t*>(
                 tensor1.GetMemoryArea(), tensor2.GetMemoryArea(), tensor1.GetNumElements());
@@ -621,6 +625,100 @@
     deserializedNetwork->Accept(verifier);
 }
 
+BOOST_AUTO_TEST_CASE(SerializeConvolution2dWithPerAxisParams)
+{
+    using Descriptor = armnn::Convolution2dDescriptor;
+    class Convolution2dLayerVerifier : public LayerVerifierBaseWithDescriptor<Descriptor>
+    {
+    public:
+        Convolution2dLayerVerifier(const std::string& layerName,
+                                   const std::vector<armnn::TensorInfo>& inputInfos,
+                                   const std::vector<armnn::TensorInfo>& outputInfos,
+                                   const Descriptor& descriptor,
+                                   const armnn::ConstTensor& weights,
+                                   const armnn::Optional<armnn::ConstTensor>& biases)
+            : LayerVerifierBaseWithDescriptor<Descriptor>(layerName, inputInfos, outputInfos, descriptor)
+            , m_Weights(weights)
+            , m_Biases(biases) {}
+
+        void VisitConvolution2dLayer(const armnn::IConnectableLayer* layer,
+                                     const Descriptor& descriptor,
+                                     const armnn::ConstTensor& weights,
+                                     const armnn::Optional<armnn::ConstTensor>& biases,
+                                     const char* name) override
+        {
+            VerifyNameAndConnections(layer, name);
+            VerifyDescriptor(descriptor);
+
+            // check weights
+            CompareConstTensor(weights, m_Weights);
+
+            // check biases
+            BOOST_CHECK(biases.has_value() == descriptor.m_BiasEnabled);
+            BOOST_CHECK(biases.has_value() == m_Biases.has_value());
+
+            if (biases.has_value() && m_Biases.has_value())
+            {
+                CompareConstTensor(biases.value(), m_Biases.value());
+            }
+        }
+
+    private:
+        armnn::ConstTensor                  m_Weights;
+        armnn::Optional<armnn::ConstTensor> m_Biases;
+    };
+
+    using namespace armnn;
+
+    const std::string layerName("convolution2dWithPerAxis");
+    const TensorInfo inputInfo ({ 1, 3, 1, 2 }, DataType::QAsymmU8, 0.55f, 128);
+    const TensorInfo outputInfo({ 1, 3, 1, 3 }, DataType::QAsymmU8, 0.75f, 128);
+
+    const std::vector<float> quantScales{ 0.75f, 0.65f, 0.85f };
+    constexpr unsigned int quantDimension = 0;
+
+    const TensorInfo kernelInfo({ 3, 1, 1, 2 }, DataType::QSymmS8, quantScales, quantDimension);
+
+    const std::vector<float> biasQuantScales{ 0.25f, 0.50f, 0.75f };
+    const TensorInfo biasInfo({ 3 }, DataType::Signed32, biasQuantScales, quantDimension);
+
+    std::vector<int8_t> kernelData = GenerateRandomData<int8_t>(kernelInfo.GetNumElements());
+    armnn::ConstTensor weights(kernelInfo, kernelData);
+    std::vector<int32_t> biasData = GenerateRandomData<int32_t>(biasInfo.GetNumElements());
+    armnn::ConstTensor biases(biasInfo, biasData);
+
+    Convolution2dDescriptor descriptor;
+    descriptor.m_StrideX     = 1;
+    descriptor.m_StrideY     = 1;
+    descriptor.m_PadLeft     = 0;
+    descriptor.m_PadRight    = 0;
+    descriptor.m_PadTop      = 0;
+    descriptor.m_PadBottom   = 0;
+    descriptor.m_BiasEnabled = true;
+    descriptor.m_DataLayout  = armnn::DataLayout::NHWC;
+
+    armnn::INetworkPtr network = armnn::INetwork::Create();
+    armnn::IConnectableLayer* const inputLayer  = network->AddInputLayer(0);
+    armnn::IConnectableLayer* const convLayer   =
+        network->AddConvolution2dLayer(descriptor,
+                                       weights,
+                                       armnn::Optional<armnn::ConstTensor>(biases),
+                                       layerName.c_str());
+    armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
+
+    inputLayer->GetOutputSlot(0).Connect(convLayer->GetInputSlot(0));
+    convLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+    inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
+    convLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+    armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network));
+    BOOST_CHECK(deserializedNetwork);
+
+    Convolution2dLayerVerifier verifier(layerName, {inputInfo}, {outputInfo}, descriptor, weights, biases);
+    deserializedNetwork->Accept(verifier);
+}
+
 BOOST_AUTO_TEST_CASE(SerializeDepthToSpace)
 {
     DECLARE_LAYER_VERIFIER_CLASS_WITH_DESCRIPTOR(DepthToSpace)
@@ -742,6 +840,102 @@
     deserializedNetwork->Accept(verifier);
 }
 
+BOOST_AUTO_TEST_CASE(SerializeDepthwiseConvolution2dWithPerAxisParams)
+{
+    using Descriptor = armnn::DepthwiseConvolution2dDescriptor;
+    class DepthwiseConvolution2dLayerVerifier : public LayerVerifierBaseWithDescriptor<Descriptor>
+    {
+    public:
+        DepthwiseConvolution2dLayerVerifier(const std::string& layerName,
+                                            const std::vector<armnn::TensorInfo>& inputInfos,
+                                            const std::vector<armnn::TensorInfo>& outputInfos,
+                                            const Descriptor& descriptor,
+                                            const armnn::ConstTensor& weights,
+                                            const armnn::Optional<armnn::ConstTensor>& biases) :
+            LayerVerifierBaseWithDescriptor<Descriptor>(layerName, inputInfos, outputInfos, descriptor),
+            m_Weights(weights),
+            m_Biases(biases) {}
+
+        void VisitDepthwiseConvolution2dLayer(const armnn::IConnectableLayer* layer,
+                                              const Descriptor& descriptor,
+                                              const armnn::ConstTensor& weights,
+                                              const armnn::Optional<armnn::ConstTensor>& biases,
+                                              const char* name) override
+        {
+            VerifyNameAndConnections(layer, name);
+            VerifyDescriptor(descriptor);
+
+            // check weights
+            CompareConstTensor(weights, m_Weights);
+
+            // check biases
+            BOOST_CHECK(biases.has_value() == descriptor.m_BiasEnabled);
+            BOOST_CHECK(biases.has_value() == m_Biases.has_value());
+
+            if (biases.has_value() && m_Biases.has_value())
+            {
+                CompareConstTensor(biases.value(), m_Biases.value());
+            }
+        }
+
+    private:
+        armnn::ConstTensor                      m_Weights;
+        armnn::Optional<armnn::ConstTensor>     m_Biases;
+    };
+
+    using namespace armnn;
+
+    const std::string layerName("depwiseConvolution2dWithPerAxis");
+    const TensorInfo inputInfo ({ 1, 3, 3, 2 }, DataType::QAsymmU8, 0.55f, 128);
+    const TensorInfo outputInfo({ 1, 2, 2, 4 }, DataType::QAsymmU8, 0.75f, 128);
+
+    const std::vector<float> quantScales{ 0.75f, 0.80f, 0.90f, 0.95f };
+    const unsigned int quantDimension = 0;
+    TensorInfo kernelInfo({ 2, 2, 2, 2 }, DataType::QSymmS8, quantScales, quantDimension);
+
+    const std::vector<float> biasQuantScales{ 0.25f, 0.35f, 0.45f, 0.55f };
+    constexpr unsigned int biasQuantDimension = 0;
+    TensorInfo biasInfo({ 4 }, DataType::Signed32, biasQuantScales, biasQuantDimension);
+
+    std::vector<int8_t> kernelData = GenerateRandomData<int8_t>(kernelInfo.GetNumElements());
+    armnn::ConstTensor weights(kernelInfo, kernelData);
+    std::vector<int32_t> biasData = GenerateRandomData<int32_t>(biasInfo.GetNumElements());
+    armnn::ConstTensor biases(biasInfo, biasData);
+
+    DepthwiseConvolution2dDescriptor descriptor;
+    descriptor.m_StrideX     = 1;
+    descriptor.m_StrideY     = 1;
+    descriptor.m_PadLeft     = 0;
+    descriptor.m_PadRight    = 0;
+    descriptor.m_PadTop      = 0;
+    descriptor.m_PadBottom   = 0;
+    descriptor.m_DilationX   = 1;
+    descriptor.m_DilationY   = 1;
+    descriptor.m_BiasEnabled = true;
+    descriptor.m_DataLayout  = armnn::DataLayout::NHWC;
+
+    armnn::INetworkPtr network = armnn::INetwork::Create();
+    armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0);
+    armnn::IConnectableLayer* const depthwiseConvLayer =
+        network->AddDepthwiseConvolution2dLayer(descriptor,
+                                                weights,
+                                                armnn::Optional<armnn::ConstTensor>(biases),
+                                                layerName.c_str());
+    armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
+
+    inputLayer->GetOutputSlot(0).Connect(depthwiseConvLayer->GetInputSlot(0));
+    depthwiseConvLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+    inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
+    depthwiseConvLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
+
+    armnn::INetworkPtr deserializedNetwork = DeserializeNetwork(SerializeNetwork(*network));
+    BOOST_CHECK(deserializedNetwork);
+
+    DepthwiseConvolution2dLayerVerifier verifier(layerName, {inputInfo}, {outputInfo}, descriptor, weights, biases);
+    deserializedNetwork->Accept(verifier);
+}
+
 BOOST_AUTO_TEST_CASE(SerializeDequantize)
 {
     DECLARE_LAYER_VERIFIER_CLASS(Dequantize)