IVGCVSW-3836 Add support for Int32 per-axis scales

* Added ScaledInt32PerAxisDecoder implementation
* Added new case for Signed32 in MakeDecoder that returns a
  ScaledInt32PerAxisDecoder if the tensor info has multiple
  quantization scales

Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Change-Id: I8b3c11091644da993044d2a0fe2aba6b06b5af56
diff --git a/include/armnn/TypesUtils.hpp b/include/armnn/TypesUtils.hpp
index 3225230..e2294af 100644
--- a/include/armnn/TypesUtils.hpp
+++ b/include/armnn/TypesUtils.hpp
@@ -149,12 +149,13 @@
 {
     switch (dataType)
     {
-        case DataType::Float16:         return "Float16";
-        case DataType::Float32:         return "Float32";
-        case DataType::QuantisedAsymm8: return "QAsymm8";
-        case DataType::QuantisedSymm16: return "QSymm16";
-        case DataType::Signed32:        return "Signed32";
-        case DataType::Boolean:         return "Boolean";
+        case DataType::Float16:               return "Float16";
+        case DataType::Float32:               return "Float32";
+        case DataType::QuantisedAsymm8:       return "QAsymm8";
+        case DataType::QuantizedSymm8PerAxis: return "QSymm8PerAxis";
+        case DataType::QuantisedSymm16:       return "QSymm16";
+        case DataType::Signed32:              return "Signed32";
+        case DataType::Boolean:               return "Boolean";
 
         default:
             return "Unknown";
diff --git a/src/armnnUtils/TensorUtils.cpp b/src/armnnUtils/TensorUtils.cpp
index 0dbb75c..630490f 100644
--- a/src/armnnUtils/TensorUtils.cpp
+++ b/src/armnnUtils/TensorUtils.cpp
@@ -138,4 +138,33 @@
     return uAxis;
 }
 
+unsigned int GetNumElementsAfter(const armnn::TensorShape& shape, unsigned int axis)
+{
+    unsigned int numDim = shape.GetNumDimensions();
+    BOOST_ASSERT(0 >= axis);
+    BOOST_ASSERT(axis < numDim - 1);
+    unsigned int count = 1;
+    for (unsigned int i = axis; i < numDim; i++)
+    {
+        count *= shape[i];
+    }
+    return count;
+}
+
+std::pair<unsigned int, std::vector<float>> GetPerAxisParams(const armnn::TensorInfo& info)
+{
+    const std::vector<float>& scales = info.GetQuantizationScales();
+    armnn::Optional<unsigned int> quantizationDim = info.GetQuantizationDim();
+    if (scales.size() < 1 || !quantizationDim.has_value())
+    {
+        throw armnn::InvalidArgumentException(
+            std::string("Per-axis quantization params not set for tensor of type ") +
+            armnn::GetDataTypeName(info.GetDataType()), CHECK_LOCATION());
+    }
+    unsigned int axisFactor = GetNumElementsAfter(info.GetShape(), quantizationDim.value());
+
+    return { axisFactor, scales };
+}
+
+
 } // namespace armnnUtils
diff --git a/src/armnnUtils/TensorUtils.hpp b/src/armnnUtils/TensorUtils.hpp
index 32af179..b67431d 100644
--- a/src/armnnUtils/TensorUtils.hpp
+++ b/src/armnnUtils/TensorUtils.hpp
@@ -34,32 +34,8 @@
 
 unsigned int GetUnsignedAxis(const unsigned int inputDimension, const int axis);
 
-inline unsigned int GetNumElementsAfter(const armnn::TensorShape& shape,
-                                        unsigned int axis)
-{
-    unsigned int numDim = shape.GetNumDimensions();
-    BOOST_ASSERT(0 >= axis);
-    BOOST_ASSERT(axis < numDim - 1);
-    unsigned int count = 1;
-    for (unsigned int i = axis; i < numDim; i++)
-    {
-        count *= shape[i];
-    }
-    return count;
-}
+unsigned int GetNumElementsAfter(const armnn::TensorShape& shape, unsigned int axis);
 
-inline std::pair<unsigned int, std::vector<float>> GetPerAxisParams(const armnn::TensorInfo& info)
-{
-    const std::vector<float>& scales = info.GetQuantizationScales();
-    armnn::Optional<unsigned int> quantizationDim = info.GetQuantizationDim();
-    if (scales.size() < 1 || !quantizationDim.has_value())
-    {
-        throw armnn::InvalidArgumentException(
-        "We currently support only per-axis symmetric quantization for QuantizedSymm8.");
-    }
-    unsigned int axisFactor = GetNumElementsAfter(info.GetShape(), quantizationDim.value());
-
-    return {axisFactor, scales};
-}
+std::pair<unsigned int, std::vector<float>> GetPerAxisParams(const armnn::TensorInfo& info);
 
 } // namespace armnnUtils
diff --git a/src/backends/reference/workloads/BaseIterator.hpp b/src/backends/reference/workloads/BaseIterator.hpp
index 9fe3f15..5047531 100644
--- a/src/backends/reference/workloads/BaseIterator.hpp
+++ b/src/backends/reference/workloads/BaseIterator.hpp
@@ -451,4 +451,25 @@
     std::vector<float> m_Scale;
 };
 
-} //namespace armnn
\ No newline at end of file
+class ScaledInt32PerAxisDecoder : public PerAxisIterator<const int32_t, Decoder<float>>
+{
+public:
+    ScaledInt32PerAxisDecoder(const int32_t* data, const std::vector<float>& scales, unsigned int axisFactor)
+        : PerAxisIterator(data, axisFactor), m_Scales(scales) {}
+
+    float Get() const override
+    {
+        return armnn::Dequantize(*m_Iterator, m_Scales[m_AxisIndex], 0);
+    }
+
+    // Get scale of the current value
+    float GetScale() const
+    {
+        return m_Scales[m_AxisIndex];
+    }
+
+private:
+    std::vector<float> m_Scales;
+};
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/Decoders.hpp b/src/backends/reference/workloads/Decoders.hpp
index dd2b28a..dcd498c 100644
--- a/src/backends/reference/workloads/Decoders.hpp
+++ b/src/backends/reference/workloads/Decoders.hpp
@@ -14,6 +14,54 @@
 namespace armnn
 {
 
+namespace
+{
+
+inline std::unique_ptr<Decoder<float>> MakeSigned32PerAxisDecoder(const TensorInfo& info, const void* data)
+{
+    auto params = armnnUtils::GetPerAxisParams(info);
+    return std::make_unique<ScaledInt32PerAxisDecoder>(
+        static_cast<const int32_t*>(data),
+        params.second,
+        params.first);
+}
+
+inline std::unique_ptr<Decoder<float>> MakeSigned32Decoder(const TensorInfo& info, const void* data)
+{
+    if(info.HasMultipleQuantizationScales())
+    {
+        // NOTE: If we have multiple quantization scales, we create a ScaledInt32PerAxisDecoder.
+        // This will be used to decode per-axis quantized convolution biases.
+        return MakeSigned32PerAxisDecoder(info, data);
+    }
+    else
+    {
+        if (info.GetQuantizationDim().has_value())
+        {
+            // NOTE: Even though we only have a single quantization scale, if the quantization
+            // dimension is set, the tensor has per-axis quantization and we need to create a
+            // ScaledInt32PerAxisDecoder
+            return MakeSigned32PerAxisDecoder(info, data);
+        }
+
+        const float scale = info.GetQuantizationScale();
+        if (scale == 0.f)
+        {
+            // NOTE:: If no quantization scale is set, we create an Int32Decoder, which simply
+            // casts the int value to float. This will be used for any INT32 data other than
+            // convolution biases.
+            return std::make_unique<Int32Decoder>(static_cast<const int32_t*>(data));
+        }
+
+        // NOTE: If we only have a single (non-zero) quantization scale and no quantization
+        // dimension is specified, we need to create a ScaledInt32Decoder. This will be used
+        // to decode per-tensor quantized convolution biases.
+        return std::make_unique<ScaledInt32Decoder>(static_cast<const int32_t*>(data), scale);
+    }
+}
+
+} // anonymous namespace
+
 template<typename T>
 inline std::unique_ptr<Decoder<T>> MakeDecoder(const TensorInfo& info, const void* data = nullptr);
 
@@ -54,13 +102,7 @@
         }
         case DataType::Signed32:
         {
-            const float scale = info.GetQuantizationScale();
-            if (scale == 0.f)
-            {
-                return std::make_unique<Int32Decoder>(static_cast<const int32_t*>(data));
-            }
-            // NOTE: ScaledInt32Decoder is used for quantized convolution biases
-            return std::make_unique<ScaledInt32Decoder>(static_cast<const int32_t*>(data), scale);
+            return MakeSigned32Decoder(info, data);
         }
         default:
         {