IVGCVSW-5826 Change weights layout for depthwise to [1,H,W,I*M] * This change is necessary because tflite uses a [1,H,W,I*M] format and uses the I*M dimension for per axis quantization. Our previous layout [M,I,H,W] can't handle the correlating quantization scales. * Updates Onnx-, TfLiteParser and TfliteDelegate * Updates the CpuRef, CpuAcc and GpuAcc backends * Adjusts unit tests * Adds test to ensure models with old layout can still be read and executed * Adds conversion function to previous layout [1,H,W,I*M] --> [M,I,H,W] which can be used by backend developers !android-nn-driver:5553 Signed-off-by: Jan Eilers <jan.eilers@arm.com> Change-Id: Ifef23368b8c3702cf315a5838d214f7dc13c0152

commit: 53ef79504b4c881c572735393c2eede5fa556c46 [log] [tgz]
author: Jan Eilers <jan.eilers@arm.com> Wed Jun 02 12:01:25 2021 +0100
committer: Jan Eilers <jan.eilers@arm.com> Wed Jun 16 11:31:42 2021 +0000
tree: f6e0cd27c4d03075fa154074c5b12d7c8c3149f7
parent: 77fe76bfa8cb798943821d1f3e432c228e1cdee3 [diff] [blame]
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
index ad50907..589a951 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp

@@ -36,12 +36,11 @@
     const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input,  descriptor.m_DataLayout);
     const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
-    // ArmNN's weight format is [ M, I, H, W ]
-    const unsigned int aclDepthMultiplier = weights.GetShape()[0];
-
-    // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
-    // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
-    TensorInfo weightsPermuted = ConvertWeightTensorInfoFromArmnnToAcl(weights, descriptor.m_DataLayout);
+    // ArmNN's weight format is usually [ M, I, H, W ] but for depthwise its [ 1, H, W, I*M]
+    // Permute to [ 1, I * M, H, W ] (if NCHW), as required by the compute library
+    unsigned int aclDepthMultiplier;
+    TensorInfo weightsPermuted;
+    std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
 
     // Convert the weights into the compute library format
     const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
@@ -79,21 +78,20 @@
     const WorkloadInfo& info)
     : BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
 {
-    // ArmNN's weight format is [ M, I, H, W ]
+    // ArmNN's weight format for depthwise is [ 1, H, W, I*M ]
     auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
 
-    // Allocate a buffer for the swizzling of the weight tensor
-    std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]);
-
-    // Convert the weight format from ArmNN's [ M, I, H, W ] (does NOT depend on the data layout) to either
-    // [ 1, H, W, I * M ] (if NHWC) or [ 1, I * M, H, W ] (if NCHW), as required by the compute library
-    ConstTensor weightPermuted = ConvertWeightTensorFromArmnnToAcl(m_Data.m_Weight,
-                                                                   m_Data.m_Parameters.m_DataLayout,
-                                                                   permuteBuffer.get());
+    ConstTensor weightsPermuted;
+    unsigned int depthMultiplier;
+    std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[weightInfo.GetNumBytes()]);
+    std::tie(weightsPermuted, depthMultiplier) = Convert1HWOTensorToAcl(m_Data.m_Weight,
+                                                                              info.m_InputTensorInfos[0],
+                                                                              m_Data.m_Parameters.m_DataLayout,
+                                                                              permuteBuffer.get());
 
     // Convert the weights into the compute library format
     m_KernelTensor = std::make_unique<arm_compute::Tensor>();
-    BuildArmComputeTensor(*m_KernelTensor, weightPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout);
+    BuildArmComputeTensor(*m_KernelTensor, weightsPermuted.GetInfo(), m_Data.m_Parameters.m_DataLayout);
 
     if (m_Data.m_Parameters.m_BiasEnabled)
     {
@@ -116,9 +114,6 @@
     input.info()->set_data_layout(aclDataLayout);
     output.info()->set_data_layout(aclDataLayout);
 
-    // Get the depth multiplier
-    const unsigned int depthMultiplier = weightInfo.GetShape()[0];
-
     arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
 
     const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
@@ -136,7 +131,7 @@
 
     ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
 
-    ScopedTensorHandle weightsPermutedHandle(weightPermuted);
+    ScopedTensorHandle weightsPermutedHandle(weightsPermuted);
     InitializeArmComputeTensorData(*m_KernelTensor, &weightsPermutedHandle);
 
     if (m_Data.m_Parameters.m_BiasEnabled)
commit	53ef79504b4c881c572735393c2eede5fa556c46	[log] [tgz]
author	Jan Eilers <jan.eilers@arm.com>	Wed Jun 02 12:01:25 2021 +0100
committer	Jan Eilers <jan.eilers@arm.com>	Wed Jun 16 11:31:42 2021 +0000
tree	f6e0cd27c4d03075fa154074c5b12d7c8c3149f7
parent	77fe76bfa8cb798943821d1f3e432c228e1cdee3 [diff] [blame]