IVGCVSW-5826 Change weights layout for depthwise to [1,H,W,I*M]

 * This change is necessary because tflite uses a [1,H,W,I*M] format
   and uses the I*M dimension for per axis quantization. Our previous
   layout [M,I,H,W] can't handle the correlating quantization scales.
 * Updates Onnx-, TfLiteParser and TfliteDelegate
 * Updates the CpuRef, CpuAcc and GpuAcc backends
 * Adjusts unit tests
 * Adds test to ensure models with old layout can still be read and
   executed
 * Adds conversion function to previous layout [1,H,W,I*M] --> [M,I,H,W]
   which can be used by backend developers

!android-nn-driver:5553

Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Change-Id: Ifef23368b8c3702cf315a5838d214f7dc13c0152
diff --git a/src/armnnDeserializer/Deserializer.cpp b/src/armnnDeserializer/Deserializer.cpp
index 976986e..7951589 100644
--- a/src/armnnDeserializer/Deserializer.cpp
+++ b/src/armnnDeserializer/Deserializer.cpp
@@ -927,6 +927,7 @@
     if (graph->featureVersions())
     {
         versions.m_BindingIdScheme = graph->featureVersions()->bindingIdsScheme();
+        versions.m_WeightsLayoutScheme = graph->featureVersions()->weightsLayoutScheme();
     }
 
     return versions;
@@ -1420,19 +1421,51 @@
     descriptor.m_BiasEnabled = serializerDescriptor->biasEnabled();;
     descriptor.m_DataLayout  = ToDataLayout(serializerDescriptor->dataLayout());
 
-    armnn::ConstTensor weights = ToConstTensor(serializerLayer->weights());
-    armnn::ConstTensor biases;
+    IConnectableLayer* layer;
 
     armnn::Optional<armnn::ConstTensor> optionalBiases = armnn::EmptyOptional();
     if (descriptor.m_BiasEnabled)
     {
-        biases = ToConstTensor(serializerLayer->biases());
+        armnn::ConstTensor biases = ToConstTensor(serializerLayer->biases());
         optionalBiases = armnn::Optional<armnn::ConstTensor>(biases);
     }
-    IConnectableLayer* layer = m_Network->AddDepthwiseConvolution2dLayer(descriptor,
-                                                                         weights,
-                                                                         optionalBiases,
-                                                                         layerName.c_str());
+
+    armnn::ConstTensor weights = ToConstTensor(serializerLayer->weights());
+    // The data layout for weights in ArmNN used to be [M,I,H,W] but now it's changed to [1,H,W,I*M]
+    // When reading older flatbuffer files we need to add a permutation to get to the new layout.
+    if (this->GetFeatureVersions(graph).m_WeightsLayoutScheme <= 0)
+    {
+        // Permute weights  [ H, W, M, I ] --> [ 1, H, W, I*M ]
+        // Step1: [ M, I, H, W ] --> [ H, W, I, M]
+        PermutationVector permutationVector = { 3, 2, 0, 1 };
+        armnn::TensorInfo weightsInfo = weights.GetInfo();
+        std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[weightsInfo.GetNumBytes()]);
+        weightsInfo = armnnUtils::Permuted(weightsInfo, permutationVector);
+        armnnUtils::Permute(weightsInfo.GetShape(), permutationVector,
+                            weights.GetMemoryArea(), permuteBuffer.get(),
+                            GetDataTypeSize(weightsInfo.GetDataType()));
+
+        // Step2: Reshape [ H, W, I, M] --> [ 1, H, W, I*M ]
+        auto weightsShape = weightsInfo.GetShape();
+        weightsInfo.SetShape({1,
+                              weightsShape[0],
+                              weightsShape[1],
+                              weightsShape[2]*weightsShape[3]});
+
+        armnn::ConstTensor weightsPermuted(weightsInfo, permuteBuffer.get());
+
+        layer = m_Network->AddDepthwiseConvolution2dLayer(descriptor,
+                                                          weightsPermuted,
+                                                          optionalBiases,
+                                                          layerName.c_str());
+    }
+    else
+    {
+        layer = m_Network->AddDepthwiseConvolution2dLayer(descriptor,
+                                                          weights,
+                                                          optionalBiases,
+                                                          layerName.c_str());
+    }
 
     armnn::TensorInfo outputTensorInfo = ToTensorInfo(outputs[0]);
     layer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
diff --git a/src/armnnDeserializer/Deserializer.hpp b/src/armnnDeserializer/Deserializer.hpp
index 3465011..8f38058 100644
--- a/src/armnnDeserializer/Deserializer.hpp
+++ b/src/armnnDeserializer/Deserializer.hpp
@@ -163,6 +163,9 @@
     {
         // Default values to zero for backward compatibility
         unsigned int m_BindingIdScheme = 0;
+
+        // Default values to zero for backward compatibility
+        unsigned int m_WeightsLayoutScheme = 0;
     };
 
     FeatureVersions GetFeatureVersions(GraphPtr graph);
diff --git a/src/armnnDeserializer/test/DeserializeDepthwiseConv2d.cpp b/src/armnnDeserializer/test/DeserializeDepthwiseConv2d.cpp
new file mode 100644
index 0000000..83dede1
--- /dev/null
+++ b/src/armnnDeserializer/test/DeserializeDepthwiseConv2d.cpp
@@ -0,0 +1,233 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ParserFlatbuffersSerializeFixture.hpp"
+
+#include <armnnDeserializer/IDeserializer.hpp>
+
+#include <boost/test/unit_test.hpp>
+
+#include <string>
+
+BOOST_AUTO_TEST_SUITE(Deserializer)
+
+struct DepthwiseConv2dFlatbufferVersion1Fixture : public ParserFlatbuffersSerializeFixture
+{
+    explicit DepthwiseConv2dFlatbufferVersion1Fixture()
+    {
+        m_JsonString = R"(
+        {
+          "layers": [
+            {
+              "layer_type": "InputLayer",
+              "layer": {
+                "base": {
+                  "base": {
+                    "index": 0,
+                    "layerName": "Input",
+                    "layerType": "Input",
+                    "inputSlots": [
+
+                    ],
+                    "outputSlots": [
+                      {
+                        "index": 0,
+                        "tensorInfo": {
+                          "dimensions": [
+                            1,
+                            3,
+                            3,
+                            3
+                          ],
+                          "dataType": "QAsymmS8",
+                          "quantizationScale": 1.0,
+                          "quantizationOffset": 0,
+                          "quantizationDim": 0,
+                          "dimensionality": 1,
+                          "dimensionSpecificity": [
+                            true,
+                            true,
+                            true,
+                            true
+                          ]
+                        }
+                      }
+                    ]
+                  },
+                  "layerBindingId": 0
+                }
+              }
+            },
+            {
+              "layer_type": "DepthwiseConvolution2dLayer",
+              "layer": {
+                "base": {
+                  "index": 1,
+                  "layerName": "depwiseConvolution2dWithPerAxis",
+                  "layerType": "DepthwiseConvolution2d",
+                  "inputSlots": [
+                    {
+                      "index": 0,
+                      "connection": {
+                        "sourceLayerIndex": 0,
+                        "outputSlotIndex": 0
+                      }
+                    }
+                  ],
+                  "outputSlots": [
+                    {
+                      "index": 0,
+                      "tensorInfo": {
+                        "dimensions": [
+                          1,
+                          3,
+                          3,
+                          3
+                        ],
+                        "dataType": "QAsymmS8",
+                        "quantizationScale": 1.0,
+                        "quantizationOffset": 0,
+                        "quantizationDim": 0,
+                        "dimensionality": 1,
+                        "dimensionSpecificity": [
+                          true,
+                          true,
+                          true,
+                          true
+                        ]
+                      }
+                    }
+                  ]
+                },
+                "descriptor": {
+                  "padLeft": 1,
+                  "padRight": 1,
+                  "padTop": 1,
+                  "padBottom": 1,
+                  "strideX": 1,
+                  "strideY": 1,
+                  "dilationX": 1,
+                  "dilationY": 1,
+                  "biasEnabled": false,
+                  "dataLayout": "NHWC"
+                },
+                "weights": {
+                  "info": {
+                    "dimensions": [
+                      1,
+                      3,
+                      3,
+                      3
+                    ],
+                    "dataType": "QSymmS8",
+                    "quantizationScale": 0.25,
+                    "quantizationOffset": 0,
+                    "quantizationScales": [
+                      0.25,
+                      0.2,
+                      0.1
+                    ],
+                    "quantizationDim": 0,
+                    "dimensionality": 1,
+                    "dimensionSpecificity": [
+                      true,
+                      true,
+                      true,
+                      true
+                    ]
+                  },
+                  "data_type": "ByteData",
+                  "data": {
+                    "data": [
+                      4,
+                      20,
+                      0,
+                      8,
+                      20,
+                      30,
+                      4,
+                      0,
+                      10,
+                      12,
+                      0,
+                      40,
+                      0,
+                      5,
+                      30,
+                      16,
+                      10,
+                      40,
+                      12,
+                      0,
+                      30,
+                      16,
+                      20,
+                      0,
+                      12,
+                      20,
+                      20
+                    ]
+                  }
+                }
+              }
+            },
+            {
+              "layer_type": "OutputLayer",
+              "layer": {
+                "base": {
+                  "base": {
+                    "index": 2,
+                    "layerName": "Output",
+                    "layerType": "Output",
+                    "inputSlots": [
+                      {
+                        "index": 0,
+                        "connection": {
+                          "sourceLayerIndex": 1,
+                          "outputSlotIndex": 0
+                        }
+                      }
+                    ],
+                    "outputSlots": [
+
+                    ]
+                  },
+                  "layerBindingId": 0
+                }
+              }
+            }
+          ],
+          "inputIds": [
+            0
+          ],
+          "outputIds": [
+            0
+          ],
+          "featureVersions": {
+            "bindingIdsScheme": 1
+          }
+        }
+        )";
+        SetupSingleInputSingleOutput("Input", "Output");
+    }
+};
+
+// This test uses a model that was created before weights layout scheme version was added to our flatbuffers
+// file. It ensures older models can still be read and executed
+// featureVersion weights layout scheme 1 indicates a change in the depthwise weights layout within
+// armm from [M,I,H,W] --> [1,H,W,I*M]
+BOOST_FIXTURE_TEST_CASE(DepthwiseConv2d_FlatbufferVersion1, DepthwiseConv2dFlatbufferVersion1Fixture)
+{
+    RunTest<4, armnn::DataType::QAsymmS8>(
+            0,
+            { 3,2,0,0,4,3,0,1,2,
+              0,1,3,0,4,2,2,2,3,
+              2,4,3,2,0,4,3,4,0},
+            { 15,60,10,11,37,20, 0,18,17,
+              20,65,28,28,74,26,12,20,18,
+              25,36,12,37,42,25,29,14, 9});
+}
+
+BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file