IVGCVSW-6853 Rewrite BuildArmComputePermutationVector()

* Some pemutation vectors were not converted correctly.
* Add Transpose end to end test.
* Comments added with an example to clarify the differences betweeen
Transpose and Permute

Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: I6c0954ca6ce00ef5f2a6f3625abe6f4fd27b5cdf
diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
index 2540073..1cc403c 100644
--- a/include/armnn/Descriptors.hpp
+++ b/include/armnn/Descriptors.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once
@@ -142,7 +142,14 @@
     }
 
     /// @brief Indicates how to translate tensor elements from a given source into the target destination, when
-    /// source and target potentially have different memory layouts e.g. {0U, 3U, 1U, 2U}.
+    /// source and target potentially have different memory layouts e.g.
+    /// Input Shape        {1, 1, 4, 4}
+    /// Permutation Vector {0, 2, 3, 1}
+    /// Output Shape       {1, 4, 1, 4}
+    /// dim "0" goes into index 0 ([ 1, X, X, X ])
+    /// dim "1" goes into index 2 ([ 1, X, 1, X ])
+    /// dim "2" goes into index 3 ([ 1, X, 1, 4 ])
+    /// dim "3" goes into index 1 ([ 1, 4, 1, 4 ])
     PermutationVector m_DimMappings;
 };
 
@@ -1443,7 +1450,14 @@
     }
 
     /// @brief Indicates how to translate tensor elements from a given source into the target destination, when
-    /// source and target potentially have different memory layouts e.g. {0U, 3U, 1U, 2U}.
+    /// source and target potentially have different memory layouts e.g.
+    /// Input Shape        {1, 1, 4, 4}
+    /// Permutation Vector {0, 2, 3, 1}
+    /// Output Shape       {1, 4, 4, 1}
+    /// dim "0" of input goes into index 0 ([ 1, X, X, X])
+    /// dim "2" of input goes into index 1 ([ 1, 4, X, X ])
+    /// dim "3" of input goes into index 2 ([ 1, 4, 4, X ])
+    /// dim "1" of input goes into index 3 ([ 1, 4, 4, 1 ])
     PermutationVector m_DimMappings;
 };
 
diff --git a/src/armnn/layers/PermuteLayer.hpp b/src/armnn/layers/PermuteLayer.hpp
index 37ae444..5300425 100644
--- a/src/armnn/layers/PermuteLayer.hpp
+++ b/src/armnn/layers/PermuteLayer.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once
@@ -16,7 +16,6 @@
 {
 public:
     /// Makes a workload for the Permute type.
-    /// @param [in] graph The graph where this layer can be found.
     /// @param [in] factory The workload factory which will create the workload.
     /// @return A pointer to the created workload, or nullptr if not created.
     virtual std::unique_ptr<IWorkload> CreateWorkload(const IWorkloadFactory& factory) const override;
@@ -27,7 +26,6 @@
 
     /// Check if the input tensor shape(s)
     /// will lead to a valid configuration of @ref PermuteLayer.
-    /// @param [in] shapeInferenceMethod Indicates if output shape shall be overwritten or just validated.
     void ValidateTensorShapesFromInputs() override;
 
     /// By default returns inputShapes if the number of inputs are equal to number of outputs,
diff --git a/src/armnn/layers/TransposeLayer.hpp b/src/armnn/layers/TransposeLayer.hpp
index 08268f2..cd54df5 100644
--- a/src/armnn/layers/TransposeLayer.hpp
+++ b/src/armnn/layers/TransposeLayer.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020,2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once
@@ -26,7 +26,6 @@
 
     /// Check if the input tensor shape(s)
     /// will lead to a valid configuration of @ref TransposeLayer.
-    /// @param [in] shapeInferenceMethod Indicates if output shape shall be overwritten or just validated.
     void ValidateTensorShapesFromInputs() override;
 
     /// Infers the output shapes from given input shapes and the permutation vector.
diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.cpp b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
index 517b11c..38c7f70 100644
--- a/src/backends/aclCommon/ArmComputeTensorUtils.cpp
+++ b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
@@ -1,11 +1,12 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #include <aclCommon/ArmComputeTensorUtils.hpp>
 #include <aclCommon/ArmComputeUtils.hpp>
 
 #include "armnn/Exceptions.hpp"
+#include "ArmComputeUtils.hpp"
 #include <armnn/Descriptors.hpp>
 
 #include <fmt/format.h>
@@ -293,28 +294,44 @@
 
 arm_compute::PermutationVector BuildArmComputeTransposeVector(const armnn::PermutationVector& perm)
 {
+    // As ArmNN indexes are left to right and ACL indexes are right to left,
+    // the permutation vector has to be reversed and then translated into ACL axis.
+    // i.e. {1, 0, 2, 3} --> {3, 2, 0, 1} --> {0, 1, 3, 2}
+
+    // Below an example of how the ArmNN and ACL index format work:
+    // ArmNN Format:
+    // Input Shape        {1, 10, 20, 30}
+    // Permutation Vector {1,  0,  2,  3}
+    // Output Shape       {10, 1, 20, 30}
+    // dim "1" of input goes into index 0 of the output ([ 10, X, X, X])
+    // dim "0" of input goes into index 1 of the output ([ 10, 1, X, X ])
+    // dim "2" of input goes into index 2 of the output ([ 10, 1, 20, X ])
+    // dim "3" of input goes into index 3 of the output ([ 10, 1, 20, 30 ])
+    // ACL Format:
+    // Input Shape        {30, 20, 10, 1}
+    // Permutation Vector {0,  1,  3,  2}
+    // Output Shape       {30, 20, 1, 10}
+    // dim "0" of input goes into index 0 of the output ([ 30,  X, X, X])
+    // dim "1" of input goes into index 1 of the output ([ 30, 20, X, X ])
+    // dim "3" of input goes into index 2 of the output ([ 30, 20, 1, X ])
+    // dim "2" of input goes into index 3 of the output ([ 30, 20, 1, 10 ])
+
     arm_compute::PermutationVector aclPerm;
-    std::map<unsigned int, unsigned int> permuteMappings;
-    for (unsigned int i = 0; i < perm.GetSize(); ++i)
+    auto rank = perm.GetSize();
+
+    // Reverse the order. i.e. {1, 0, 2, 3} --> {3, 2, 0, 1}
+    std::vector<unsigned int> reversedPerm;
+    reversedPerm.reserve(rank);
+    for (unsigned int i = rank; i > 0; --i)
     {
-        permuteMappings[perm[i]] = i;
+        reversedPerm.push_back(perm[i-1]);
     }
 
-    std::vector<unsigned int> permuteVector;
-    for (unsigned int i = 0; i < perm.GetSize(); ++i)
+    // Translate from Arm NN axis to ACL axis. i.e. {3, 2, 0, 1} --> {0, 1, 3, 2}
+    for (unsigned int i = 0; i < rank; ++i)
     {
-        permuteVector.push_back(permuteMappings.at(i));
-    }
-
-    unsigned int start = 0;
-    while ((start < perm.GetSize()) && (start == permuteVector[start]))
-    {
-        ++start;
-    }
-
-    for (unsigned int i = start; i < perm.GetSize(); ++i)
-    {
-        aclPerm.set(i - start, permuteVector[i] - start);
+        auto aclAxis = rank - 1 - reversedPerm[i];
+        aclPerm.set(i, aclAxis);
     }
     return aclPerm;
 }
diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.hpp b/src/backends/aclCommon/ArmComputeTensorUtils.hpp
index fdcd867..6ddecf2 100644
--- a/src/backends/aclCommon/ArmComputeTensorUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeTensorUtils.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once
@@ -66,10 +66,14 @@
 arm_compute::NormalizationLayerInfo BuildArmComputeNormalizationLayerInfo(const NormalizationDescriptor& desc);
 
 /// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector.
-arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& vector);
+/// \param perm PermutationVector used in Arm NN Permute layer
+/// \return PermutationVector used in ACL Transpose layer
+arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& perm);
 
 /// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector.
-arm_compute::PermutationVector BuildArmComputeTransposeVector(const armnn::PermutationVector& vector);
+/// \param perm PermutationVector used in Arm NN Transpose layer
+/// \return PermutationVector used in ACL Transpose layer
+arm_compute::PermutationVector BuildArmComputeTransposeVector(const armnn::PermutationVector& perm);
 
 /// Utility function used to setup an arm_compute::Size2D object from width and height values.
 arm_compute::Size2D BuildArmComputeSize2D(const unsigned int width, const unsigned int height);
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 232226b..c9668a2 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+# Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
 # SPDX-License-Identifier: MIT
 #
 
@@ -54,6 +54,7 @@
     SpaceToDepthEndToEndTestImpl.hpp
     SplitterEndToEndTestImpl.hpp
     StridedSliceAsyncEndToEndTest.hpp
+    TransposeEndToEndTestImpl.hpp
     TensorCopyUtils.hpp
     WorkloadFactoryHelper.hpp
     layerTests/AbsTestImpl.cpp
diff --git a/src/backends/backendsCommon/test/TransposeEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/TransposeEndToEndTestImpl.hpp
new file mode 100644
index 0000000..32e1b0e
--- /dev/null
+++ b/src/backends/backendsCommon/test/TransposeEndToEndTestImpl.hpp
@@ -0,0 +1,69 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <CommonTestUtils.hpp>
+
+#include <armnn/INetwork.hpp>
+#include <armnn/TypesUtils.hpp>
+
+#include <ResolveType.hpp>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+armnn::INetworkPtr CreateTransposeNetwork(const armnn::TensorInfo& inputTensorInfo,
+                                          const armnn::TensorInfo& outputTensorInfo,
+                                          const armnn::PermutationVector& mappings)
+{
+    armnn::INetworkPtr network(armnn::INetwork::Create());
+
+    const armnn::TransposeDescriptor transposeDescriptor(mappings);
+
+    armnn::IConnectableLayer* inputLayer     = network->AddInputLayer(0, "Input");
+    armnn::IConnectableLayer* transposeLayer = network->AddTransposeLayer(transposeDescriptor, "Transpose");
+    armnn::IConnectableLayer* outputLayer    = network->AddOutputLayer(0, "Output");
+
+    Connect(inputLayer, transposeLayer, inputTensorInfo, 0, 0);
+    Connect(transposeLayer, outputLayer, outputTensorInfo, 0, 0);
+
+    return network;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+void TransposeEndToEnd(const std::vector<armnn::BackendId>& backends)
+{
+    using namespace armnn;
+
+    std::vector<float> floatInputData{
+            1,  2,  3,  4,  5,
+            11, 12, 13, 14, 15,
+            21, 22, 23, 24, 25
+    };
+    std::vector<T> inputData = armnnUtils::QuantizedVector<T>(floatInputData);
+    std::vector<T> expectedOutputData = armnnUtils::QuantizedVector<T>(floatInputData);
+
+    const armnn::PermutationVector mappings{0, 2, 1 ,3};
+
+    TensorInfo inputInfo ({ 1, 1, 5, 3 }, ArmnnType, 0.0f, 0, true);
+    TensorInfo outputInfo({ 1, 5, 1, 3 }, ArmnnType, 0.0f, 0, true);
+
+    armnn::INetworkPtr network = CreateTransposeNetwork(inputInfo, outputInfo, mappings);
+
+    CHECK(network);
+
+    std::map<int, std::vector<T>> inputTensorData   = {{ 0, inputData }};
+    std::map<int, std::vector<T>> expectedOutputTensorData = {{ 0, expectedOutputData }};
+
+    EndToEndLayerTestImpl<ArmnnType, DataType::Signed32>(std::move(network),
+                                                         inputTensorData,
+                                                         expectedOutputTensorData,
+                                                         backends);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/src/backends/cl/test/ClEndToEndTests.cpp b/src/backends/cl/test/ClEndToEndTests.cpp
index 74307e4..b354481 100644
--- a/src/backends/cl/test/ClEndToEndTests.cpp
+++ b/src/backends/cl/test/ClEndToEndTests.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd. All rights reserved.
+// Copyright © 2017,2022 Arm Ltd. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -21,6 +21,7 @@
 #include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SplitterEndToEndTestImpl.hpp>
 #include <backendsCommon/test/TransposeConvolution2dEndToEndTestImpl.hpp>
+#include <backendsCommon/test/TransposeEndToEndTestImpl.hpp>
 
 #include <doctest/doctest.h>
 
@@ -415,6 +416,12 @@
         clDefaultBackends, armnn::DataLayout::NHWC);
 }
 
+// Transpose
+TEST_CASE("ClTransposeEndToEndTest")
+{
+TransposeEndToEnd<armnn::DataType::Float32>(clDefaultBackends);
+}
+
 TEST_CASE("ClQuantizedLstmEndToEndTest")
 {
     QuantizedLstmEndToEnd(clDefaultBackends);
diff --git a/src/backends/neon/test/NeonEndToEndTests.cpp b/src/backends/neon/test/NeonEndToEndTests.cpp
index faeb4da..a64e426 100644
--- a/src/backends/neon/test/NeonEndToEndTests.cpp
+++ b/src/backends/neon/test/NeonEndToEndTests.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -23,6 +23,7 @@
 #include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SplitterEndToEndTestImpl.hpp>
 #include <backendsCommon/test/TransposeConvolution2dEndToEndTestImpl.hpp>
+#include <backendsCommon/test/TransposeEndToEndTestImpl.hpp>
 
 #include <doctest/doctest.h>
 
@@ -417,6 +418,12 @@
         neonDefaultBackends, armnn::DataLayout::NHWC);
 }
 
+// Transpose
+TEST_CASE("NeonTransposeEndToEndTest")
+{
+    TransposeEndToEnd<armnn::DataType::Float32>(neonDefaultBackends);
+}
+
 TEST_CASE("NeonImportNonAlignedInputPointerTest")
 {
     ImportNonAlignedInputPointerTest(neonDefaultBackends);
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index a8c0634..218f6dd 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -32,6 +32,7 @@
 #include <backendsCommon/test/SplitterEndToEndTestImpl.hpp>
 #include <backendsCommon/test/StridedSliceAsyncEndToEndTest.hpp>
 #include <backendsCommon/test/TransposeConvolution2dEndToEndTestImpl.hpp>
+#include <backendsCommon/test/TransposeEndToEndTestImpl.hpp>
 
 #include <doctest/doctest.h>
 
@@ -1194,6 +1195,12 @@
         defaultBackends, armnn::DataLayout::NHWC);
 }
 
+// Transpose
+TEST_CASE("RefTransposeEndToEndTest")
+{
+    TransposeEndToEnd<armnn::DataType::Float32>(defaultBackends);
+}
+
 // Resize Bilinear
 TEST_CASE("RefResizeBilinearEndToEndFloatNchwTest")
 {