IVGCVSW-5826 Change weights layout for depthwise to [1,H,W,I*M]
* This change is necessary because tflite uses a [1,H,W,I*M] format
and uses the I*M dimension for per axis quantization. Our previous
layout [M,I,H,W] can't handle the correlating quantization scales.
* Updates Onnx-, TfLiteParser and TfliteDelegate
* Updates the CpuRef, CpuAcc and GpuAcc backends
* Adjusts unit tests
* Adds test to ensure models with old layout can still be read and
executed
* Adds conversion function to previous layout [1,H,W,I*M] --> [M,I,H,W]
which can be used by backend developers
!android-nn-driver:5553
Signed-off-by: Jan Eilers <jan.eilers@arm.com>
Change-Id: Ifef23368b8c3702cf315a5838d214f7dc13c0152
diff --git a/delegate/src/Convolution.hpp b/delegate/src/Convolution.hpp
index 6566fff..96612e0 100644
--- a/delegate/src/Convolution.hpp
+++ b/delegate/src/Convolution.hpp
@@ -289,8 +289,6 @@
const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
- // Mappings from TensorflowLite filter tensors to the ArmNN filter tensors (ArmNN weights have to be [M, I, H, W])
- armnn::PermutationVector permutationVector{ 2, 3, 1, 0 }; // [H, W, I, M] -> [M, I, H, W]
armnn::TensorInfo filterTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteFilterTensor);
// Assuming input is NHWC
@@ -301,12 +299,6 @@
unsigned int filterHeight = filterTensorInfo.GetShape()[1];
unsigned int filterWidth = filterTensorInfo.GetShape()[2];
- // Reshape weights as [ H, W, I, M ]
- filterTensorInfo.SetShape({ filterHeight,
- filterWidth,
- inputTensorInfo.GetShape()[3],
- filterTensorInfo.GetShape()[3] / inputTensorInfo.GetShape()[3] });
-
// Calculate padding
CalcPadding(inputHeight, filterHeight, descriptor.m_StrideY, descriptor.m_DilationY,
descriptor.m_PadTop, descriptor.m_PadBottom, params->padding);
@@ -340,12 +332,8 @@
biasTensorInfo = armnn::TensorInfo(armnn::TensorShape({1}), GetDataType(tfLiteInputTensor));
}
- std::vector<uint8_t> swizzledData(filterTensorInfo.GetNumBytes());
- auto filter =
- CreateConstTensor(&tfLiteFilterTensor,
- filterTensorInfo,
- armnn::Optional<armnn::PermutationVector&>(permutationVector),
- swizzledData.data());
+ // For depthwise the weights layout is the same as for tflite [1, H, W, I*M]. No permutation required.
+ auto filter = CreateConstTensor(&tfLiteFilterTensor, filterTensorInfo);
if (!delegateData.m_Network)
{
@@ -369,8 +357,7 @@
{
auto biases =
CreateConstTensor(&tfLiteContext->tensors[tfLiteNode->inputs->data[2]],
- biasTensorInfo,
- armnn::Optional<armnn::PermutationVector&>());
+ biasTensorInfo);
layer = delegateData.m_Network->AddDepthwiseConvolution2dLayer(descriptor,
filter,
armnn::Optional<armnn::ConstTensor>(biases));