IVGCVSW-4375 Add support for Transpose

 * Added TransposeLayer
 * Added CL, Neon and Ref Workloads
 * Added Transpose utilities
 * Added Serializer and Deserializer support
 * Added Quantizer support

Signed-off-by: Mike Kelly <mike.kelly@arm.com>
Change-Id: I04c755ba7cb5b1edf72b3c9f3c0314878032e3c7
diff --git a/src/backends/backendsCommon/LayerSupportBase.cpp b/src/backends/backendsCommon/LayerSupportBase.cpp
index 449b809..1279134 100644
--- a/src/backends/backendsCommon/LayerSupportBase.cpp
+++ b/src/backends/backendsCommon/LayerSupportBase.cpp
@@ -579,4 +579,12 @@
     return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
 }
 
+bool LayerSupportBase::IsTransposeSupported(const TensorInfo& /*input*/,
+                                            const TensorInfo& /*output*/,
+                                            const TransposeDescriptor& /*descriptor*/,
+                                            Optional<std::string&> reasonIfUnsupported) const
+{
+    return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
+}
+
 } // namespace armnn
diff --git a/src/backends/backendsCommon/LayerSupportBase.hpp b/src/backends/backendsCommon/LayerSupportBase.hpp
index 459ac03..888bef5 100644
--- a/src/backends/backendsCommon/LayerSupportBase.hpp
+++ b/src/backends/backendsCommon/LayerSupportBase.hpp
@@ -353,6 +353,12 @@
         const TensorInfo& weights,
         const Optional<TensorInfo>& biases,
         Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
+    bool IsTransposeSupported(const TensorInfo& input,
+                              const TensorInfo& output,
+                              const TransposeDescriptor& descriptor,
+                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
 };
 
 } // namespace armnn
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index 410469e..9b7a242 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -2680,6 +2680,35 @@
     ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
 }
 
+void TransposeQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+{
+    const std::string descriptorName{"TransposeQueueDescriptor"};
+
+    ValidateNumInputs(workloadInfo,  descriptorName, 1);
+    ValidateNumOutputs(workloadInfo, descriptorName, 1);
+
+    const PermutationVector& mapping = m_Parameters.m_DimMappings;
+
+    const TensorInfo& inputTensorInfo  = workloadInfo.m_InputTensorInfos[0];
+    const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0];
+
+    ValidateTensorNumDimensions(inputTensorInfo,  descriptorName, mapping.GetSize(), "input");
+    ValidateTensorNumDimensions(outputTensorInfo, descriptorName, mapping.GetSize(), "output");
+
+    for (unsigned int i = 0u; i < mapping.GetSize(); ++i)
+    {
+        if (inputTensorInfo.GetShape()[mapping[i]] != outputTensorInfo.GetShape()[i])
+        {
+            throw InvalidArgumentException(descriptorName + ": src dimension " + to_string(mapping[i]) +
+                                           " (=" + to_string(inputTensorInfo.GetShape()[mapping[i]]) + ") " +
+                                           "must match dst dimension " + to_string(i) +
+                                           " (=" + to_string(outputTensorInfo.GetShape()[i]) + ")");
+        }
+    }
+
+    ValidateTensorDataTypesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
+}
+
 void QuantizedLstmQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
 {
     const std::string descriptorName{"QuantizedLstmQueueDescriptor"};
diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp
index 46681e9..06289fa 100644
--- a/src/backends/backendsCommon/WorkloadData.hpp
+++ b/src/backends/backendsCommon/WorkloadData.hpp
@@ -504,6 +504,11 @@
     void Validate(const WorkloadInfo& workloadInfo) const;
 };
 
+struct TransposeQueueDescriptor : QueueDescriptorWithParameters<TransposeDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
+
 struct QuantizedLstmQueueDescriptor : QueueDescriptor
 {
     QuantizedLstmQueueDescriptor()
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index 23ff70a..6ac76ec 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -1023,6 +1023,17 @@
                                                           reason);
             break;
         }
+        case LayerType::Transpose:
+        {
+            auto cLayer = boost::polymorphic_downcast<const TransposeLayer*>(&layer);
+            const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
+            const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
+            result = layerSupportObject->IsTransposeSupported(OverrideDataType(input, dataType),
+                                                              OverrideDataType(output, dataType),
+                                                              cLayer->GetParameters(),
+                                                              reason);
+            break;
+        }
         case LayerType::TransposeConvolution2d:
         {
             auto cLayer = boost::polymorphic_downcast<const TransposeConvolution2dLayer*>(&layer);
@@ -1315,7 +1326,7 @@
 }
 
 std::unique_ptr<IWorkload> IWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& /*descriptor*/,
-                                                           const WorkloadInfo&/**/ /*info*/) const
+                                                           const WorkloadInfo& /*info*/) const
 {
     return std::unique_ptr<IWorkload>();
 }
@@ -1379,7 +1390,7 @@
 {
     return std::unique_ptr<IWorkload>();
 }
-/**/
+
 std::unique_ptr<IWorkload> IWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& /*descriptor*/,
                                                            const WorkloadInfo& /*info*/) const
 {
@@ -1428,6 +1439,12 @@
     return std::unique_ptr<IWorkload>();
 }
 
+std::unique_ptr<IWorkload> IWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& /*descriptor*/,
+                                                             const WorkloadInfo& /*info*/) const
+{
+    return std::unique_ptr<IWorkload>();
+}
+
 std::unique_ptr<IWorkload> IWorkloadFactory::CreateTransposeConvolution2d(
     const TransposeConvolution2dQueueDescriptor& /*descriptor*/,
     const WorkloadInfo& /*info*/) const
diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp
index e1cdff6..dae58b6 100644
--- a/src/backends/backendsCommon/WorkloadFactory.hpp
+++ b/src/backends/backendsCommon/WorkloadFactory.hpp
@@ -235,6 +235,9 @@
     virtual std::unique_ptr<IWorkload> CreateSwitch(const SwitchQueueDescriptor& descriptor,
                                                     const WorkloadInfo& Info) const;
 
+    virtual std::unique_ptr<IWorkload> CreateTranspose(const TransposeQueueDescriptor& descriptor,
+                                                       const WorkloadInfo& info) const;
+
     virtual std::unique_ptr<IWorkload> CreateTransposeConvolution2d(
         const TransposeConvolution2dQueueDescriptor& descriptor,
         const WorkloadInfo& info) const;
diff --git a/src/backends/backendsCommon/WorkloadFactoryBase.hpp b/src/backends/backendsCommon/WorkloadFactoryBase.hpp
index 9602cc3..960dbd3 100644
--- a/src/backends/backendsCommon/WorkloadFactoryBase.hpp
+++ b/src/backends/backendsCommon/WorkloadFactoryBase.hpp
@@ -266,6 +266,10 @@
                                             const WorkloadInfo& /*info*/) const override
     { return nullptr; }
 
+    std::unique_ptr<IWorkload> CreateTranspose(const TransposeQueueDescriptor& /*descriptor*/,
+                                               const WorkloadInfo& /*info*/) const override
+    { return nullptr; }
+
     std::unique_ptr<IWorkload> CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor& /*descriptor*/,
                                                             const WorkloadInfo& /*info*/) const override
     { return nullptr; }
diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
index 1a899aa..395a63d 100644
--- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
+++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
@@ -537,6 +537,8 @@
 
 DECLARE_LAYER_POLICY_1_PARAM(Switch)
 
+DECLARE_LAYER_POLICY_2_PARAM(Transpose)
+
 DECLARE_LAYER_POLICY_2_PARAM(TransposeConvolution2d)
 
 
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index eba7944..62a66df 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -53,3 +53,4 @@
 #include <backendsCommon/test/layerTests/StridedSliceTestImpl.hpp>
 #include <backendsCommon/test/layerTests/SubtractionTestImpl.hpp>
 #include <backendsCommon/test/layerTests/TransposeConvolution2dTestImpl.hpp>
+#include <backendsCommon/test/layerTests/TransposeTestImpl.hpp>
diff --git a/src/backends/backendsCommon/test/layerTests/TransposeTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/TransposeTestImpl.hpp
new file mode 100644
index 0000000..3949dcc
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/TransposeTestImpl.hpp
@@ -0,0 +1,240 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <ResolveType.hpp>
+
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+template<typename T>
+LayerTestResult<T, 4> SimpleTransposeTestImpl(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+        armnn::TransposeDescriptor descriptor,
+        armnn::TensorInfo inputTensorInfo,
+        armnn::TensorInfo outputTensorInfo,
+        const std::vector<T>& inputData,
+        const std::vector<T>& outputExpectedData)
+{
+    boost::ignore_unused(memoryManager);
+    auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
+
+    LayerTestResult<T, 4> ret(outputTensorInfo);
+    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputExpectedData);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::TransposeQueueDescriptor data;
+    data.m_Parameters = descriptor;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateTranspose(data, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> SimpleTransposeTest(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[] = { 1, 2, 2, 2 };
+    unsigned int outputShape[] = { 1, 2, 2, 2 };
+
+    armnn::TransposeDescriptor descriptor;
+    descriptor.m_DimMappings = {0U, 2U, 3U, 1U};
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(0.5f);
+        inputTensorInfo.SetQuantizationOffset(5);
+        outputTensorInfo.SetQuantizationScale(0.5f);
+        outputTensorInfo.SetQuantizationOffset(5);
+    }
+
+    std::vector<T> input = std::vector<T>(
+    {
+        1, 2,
+        3, 4,
+        5, 6,
+        7, 8
+    });
+
+    std::vector<T> outputExpected = std::vector<T>(
+    {
+        1, 5, 2, 6,
+        3, 7, 4, 8
+    });
+
+    return SimpleTransposeTestImpl<T>(workloadFactory, memoryManager,
+                                    descriptor, inputTensorInfo,
+                                    outputTensorInfo, input, outputExpected);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> TransposeValueSet1Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = { 1, 2, 2, 3 };
+    unsigned int outputShape[] = { 1, 3, 2, 2 };
+
+    armnn::TransposeDescriptor descriptor;
+    descriptor.m_DimMappings = {0U, 3U, 1U, 2U};
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(0.5f);
+        inputTensorInfo.SetQuantizationOffset(5);
+        outputTensorInfo.SetQuantizationScale(0.5f);
+        outputTensorInfo.SetQuantizationOffset(5);
+    }
+
+    std::vector<T> input = std::vector<T>(
+    {
+         1,  2,  3,
+        11, 12, 13,
+        21, 22, 23,
+        31, 32, 33
+    });
+
+    std::vector<T> outputExpected = std::vector<T>(
+    {
+        1, 11, 21, 31,
+        2, 12, 22, 32,
+        3, 13, 23, 33
+    });
+
+    return SimpleTransposeTestImpl<T>(workloadFactory, memoryManager,
+                                    descriptor, inputTensorInfo,
+                                    outputTensorInfo, input, outputExpected);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> TransposeValueSet2Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = { 1, 3, 2, 2 };
+    unsigned int outputShape[] = { 1, 2, 2, 3 };
+
+    armnn::TransposeDescriptor descriptor;
+    descriptor.m_DimMappings = {0U, 2U, 3U, 1U};
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(0.5f);
+        inputTensorInfo.SetQuantizationOffset(5);
+        outputTensorInfo.SetQuantizationScale(0.5f);
+        outputTensorInfo.SetQuantizationOffset(5);
+    }
+
+    std::vector<T> input = std::vector<T>(
+    {
+        1, 11, 21, 31,
+        2, 12, 22, 32,
+        3, 13, 23, 33
+    });
+
+    std::vector<T> outputExpected = std::vector<T>(
+    {
+         1,  2,  3,
+        11, 12, 13,
+        21, 22, 23,
+        31, 32, 33,
+    });
+
+    return SimpleTransposeTestImpl<T>(workloadFactory, memoryManager,
+                                    descriptor, inputTensorInfo,
+                                    outputTensorInfo, input, outputExpected);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> TransposeValueSet3Test(
+        armnn::IWorkloadFactory& workloadFactory,
+        const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    unsigned int inputShape[]  = { 1, 2, 3, 3 };
+    unsigned int outputShape[] = { 1, 3, 2, 3 };
+
+    armnn::TransposeDescriptor descriptor;
+    descriptor.m_DimMappings = {0U, 3U, 1U, 2U};
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
+
+    // Set quantization parameters if the requested type is a quantized type.
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(0.5f);
+        inputTensorInfo.SetQuantizationOffset(5);
+        outputTensorInfo.SetQuantizationScale(0.5f);
+        outputTensorInfo.SetQuantizationOffset(5);
+    }
+
+    std::vector<T> input = std::vector<T>(
+    {
+         1,  2,  3,
+        11, 12, 13,
+        21, 22, 23,
+        31, 32, 33,
+        41, 42, 43,
+        51, 52, 53
+    });
+
+    std::vector<T> outputExpected = std::vector<T>(
+    {
+        1, 11, 21, 31, 41, 51,
+        2, 12, 22, 32, 42, 52,
+        3, 13, 23, 33, 43, 53
+    });
+
+    return SimpleTransposeTestImpl<T>(workloadFactory, memoryManager,
+                                      descriptor, inputTensorInfo,
+                                      outputTensorInfo, input, outputExpected);
+}