IVGCVSW-2421 Remove the template-based version of armnnUtils::Permute
in favor of a type-indepent implementation that takes void-pointers

 * The new implementation requires the size of the type to be passed
   to the function
 * Updated all the usages accordingly
 * Removed the old implementation no longer used

!android-nn-driver:469

Change-Id: I37f4e6d62a38fbb8ec8c39bb559a2c54c83365d4
diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
index 3b50476..d3f382a 100644
--- a/src/armnnTfLiteParser/TfLiteParser.cpp
+++ b/src/armnnTfLiteParser/TfLiteParser.cpp
@@ -414,10 +414,8 @@
     if (permutationVector.has_value() && permutationVector.value().GetSize() > 0)
     {
         tensorInfo = armnnUtils::Permuted(tensorInfo, permutationVector.value());
-        armnnUtils::Permute(tensorInfo.GetShape(),
-                            permutationVector.value(),
-                            reinterpret_cast<const T *>(bufferPtr->data.data()),
-                            data.get());
+        armnnUtils::Permute(tensorInfo.GetShape(), permutationVector.value(),
+                            reinterpret_cast<const T*>(bufferPtr->data.data()), data.get(), sizeof(T));
     }
     else
     {
diff --git a/src/armnnTfParser/TfParser.cpp b/src/armnnTfParser/TfParser.cpp
index 2d31842..3d0c72d 100755
--- a/src/armnnTfParser/TfParser.cpp
+++ b/src/armnnTfParser/TfParser.cpp
@@ -1232,7 +1232,7 @@
     // Swizzles the content of the tensor's permanent storage into a local storage.
     std::vector<float> weightTensorSwizzledData(weightTensorInfo.GetNumElements());
     armnnUtils::Permute(weightTensorSwizzledInfo.GetShape(), permutationVector,
-                        weightNode->GetStorage(), weightTensorSwizzledData.data());
+                        weightNode->GetStorage(), weightTensorSwizzledData.data(), sizeof(float));
 
     // Create a weight tensor with the newly swizzled data.
     ConstTensor weightTensor(weightTensorSwizzledInfo, weightTensorSwizzledData);
@@ -1350,7 +1350,7 @@
     // Swizzles the content of the tensor's permanent storage into a local storage.
     std::vector<float> weightTensorSwizzledData(weightTensorInfo.GetNumElements());
     armnnUtils::Permute(weightTensorSwizzledInfo.GetShape(), permutationVector,
-                        weightNode->GetStorage(), weightTensorSwizzledData.data());
+                        weightNode->GetStorage(), weightTensorSwizzledData.data(), sizeof(float));
 
     // Create a weight tensor with the newly swizzled data.
     ConstTensor weightTensor(weightTensorSwizzledInfo, weightTensorSwizzledData);
diff --git a/src/armnnUtils/Permute.cpp b/src/armnnUtils/Permute.cpp
index 6deff90..24a8286 100644
--- a/src/armnnUtils/Permute.cpp
+++ b/src/armnnUtils/Permute.cpp
@@ -39,14 +39,6 @@
         }
     }
 
-    template <typename T>
-    void Unroll(const T* srcData, T* dstData)
-    {
-        const T* const srcEnd = srcData + m_DstShape.GetNumElements();
-        T* const       dstEnd = dstData + m_DstShape.GetNumElements();
-        Unroll(0, srcData, dstData, srcEnd, dstEnd);
-    }
-
     void Unroll(const void* srcData, void* dstData, size_t dataTypeSize)
     {
         assert(srcData);
@@ -63,32 +55,6 @@
     }
 
 private:
-    template <typename T>
-    void Unroll(size_type dimension, const T* srcData, T* dstData, const T* srcEnd, T* dstEnd)
-    {
-        assert(srcData);
-        assert(dstData);
-        assert(srcEnd);
-        assert(dstEnd);
-        assert(srcData < srcEnd);
-        assert(dstData < dstEnd);
-
-        if (dimension >= m_DstShape.GetNumDimensions())
-        {
-            *dstData = *srcData;
-        }
-        else
-        {
-            for (size_type i = 0; i < m_DstShape[dimension]; i++)
-            {
-                Unroll(dimension + 1, srcData, dstData, srcEnd, dstEnd);
-
-                srcData += m_SrcStrides[dimension];
-                dstData += m_DstStrides[dimension];
-            }
-        }
-    }
-
     void Unroll(size_type dimension,
                 const unsigned char* srcData, unsigned char* dstData,
                 const unsigned char* srcEnd, unsigned char* dstEnd,
@@ -157,22 +123,4 @@
     PermuteLoop(dstShape, mappings).Unroll(src, dst, dataTypeSize);
 }
 
-template <typename T>
-void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings, const T* src, T* dst)
-{
-    PermuteLoop(dstShape, mappings).Unroll(src, dst);
-}
-
-// Instantiates for types.
-template void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings,
-                      const armnn::Half* src, armnn::Half* dst);
-template void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings,
-                      const float* src, float* dst);
-template void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings,
-                      const uint8_t* src, uint8_t* dst);
-template void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings,
-                      const int32_t* src, int32_t* dst);
-template void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings,
-                      const bool* src, bool* dst);
-
 } // namespace armnnUtils
diff --git a/src/armnnUtils/Permute.hpp b/src/armnnUtils/Permute.hpp
index 4e43198..b9ef94b 100644
--- a/src/armnnUtils/Permute.hpp
+++ b/src/armnnUtils/Permute.hpp
@@ -17,7 +17,4 @@
 void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings,
              const void* src, void* dst, size_t dataTypeSize);
 
-template <typename T>
-void Permute(const armnn::TensorShape& dstShape, const armnn::PermutationVector& mappings, const T* src, T* dst);
-
 } // namespace armnnUtils
diff --git a/src/backends/backendsCommon/test/Conv2dTestImpl.hpp b/src/backends/backendsCommon/test/Conv2dTestImpl.hpp
index 2ff66b0..8d292c8 100755
--- a/src/backends/backendsCommon/test/Conv2dTestImpl.hpp
+++ b/src/backends/backendsCommon/test/Conv2dTestImpl.hpp
@@ -149,7 +149,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
         inputData = tmp;
     }
 
@@ -178,7 +178,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
         outputData = tmp;
     }
     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
@@ -194,7 +194,7 @@
     boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
     if (layout == armnn::DataLayout::NHWC)
     {
-        armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data());
+        armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
     }
     AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
 
@@ -387,7 +387,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
         inputData = tmp;
     }
 
@@ -411,7 +411,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
         outputData = tmp;
     }
 
@@ -518,7 +518,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
         inputData = tmp;
     }
     auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
@@ -558,7 +558,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(outputImage.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
         outputImage = tmp;
     }
 
@@ -672,7 +672,8 @@
     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
     if (layout == armnn::DataLayout::NHWC)
     {
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, originalInputData.data(), inputData.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
+                            originalInputData.data(), inputData.data(), sizeof(T));
     }
     auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
 
@@ -758,7 +759,8 @@
     std::vector<T> outputImage = originalOutputImage;
     if (layout == armnn::DataLayout::NHWC)
     {
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, originalOutputImage.data(), outputImage.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
+                            originalOutputImage.data(), outputImage.data(), sizeof(T));
     }
 
     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
index 819b9d6..5215007 100755
--- a/src/backends/backendsCommon/test/LayerTests.cpp
+++ b/src/backends/backendsCommon/test/LayerTests.cpp
@@ -4602,7 +4602,7 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
         inputData = tmp;
     }
 
@@ -4664,11 +4664,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
         inputData = tmp;
 
         std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
         outputData = tmp1;
     }
 
@@ -4730,11 +4730,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
         inputData = tmp;
 
         std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
         outputData = tmp1;
     }
 
@@ -4794,11 +4794,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
         inputData = tmp;
 
         std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
         outputData = tmp1;
     }
 
@@ -4860,11 +4860,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
         inputData = tmp;
 
         std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
         outputData = tmp1;
     }
 
@@ -4970,7 +4970,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
         inputData = tmp;
     }
 
@@ -4981,7 +4981,8 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(expectedOutputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, expectedOutputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
+                            expectedOutputData.data(), tmp.data(), sizeof(float));
         expectedOutputData = tmp;
     }
     result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>(expectedOutputData));
diff --git a/src/backends/backendsCommon/test/Pooling2dTestImpl.hpp b/src/backends/backendsCommon/test/Pooling2dTestImpl.hpp
index f5e3386..b542938 100644
--- a/src/backends/backendsCommon/test/Pooling2dTestImpl.hpp
+++ b/src/backends/backendsCommon/test/Pooling2dTestImpl.hpp
@@ -293,11 +293,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
         inputData = tmp;
 
         std::vector<T> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(T));
         outputData = tmp1;
     }
 
@@ -362,11 +362,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
         inputData = tmp;
 
         std::vector<T> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(T));
         outputData = tmp1;
     }
 
@@ -473,11 +473,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
         inputData = tmp;
 
         std::vector<T> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(T));
         outputData = tmp1;
     }
 
diff --git a/src/backends/backendsCommon/test/SpaceToBatchNdTestImpl.hpp b/src/backends/backendsCommon/test/SpaceToBatchNdTestImpl.hpp
index a467cd3..814607d 100644
--- a/src/backends/backendsCommon/test/SpaceToBatchNdTestImpl.hpp
+++ b/src/backends/backendsCommon/test/SpaceToBatchNdTestImpl.hpp
@@ -35,11 +35,13 @@
         outputTensorInfo = armnnUtils::Permuted(outputTensorInfo, NCHWToNHWC);
 
         std::vector<float> inputTmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), inputTmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
+                            inputData.data(), inputTmp.data(), sizeof(float));
         inputData = inputTmp;
 
         std::vector<float> outputTmp(outputExpectedData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputExpectedData.data(), outputTmp.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
+                            outputExpectedData.data(), outputTmp.data(), sizeof(float));
         outputExpectedData = outputTmp;
     }
 
diff --git a/src/backends/reference/workloads/RefPermuteWorkload.cpp b/src/backends/reference/workloads/RefPermuteWorkload.cpp
index df50156..9ca1642 100644
--- a/src/backends/reference/workloads/RefPermuteWorkload.cpp
+++ b/src/backends/reference/workloads/RefPermuteWorkload.cpp
@@ -23,7 +23,8 @@
     const ITensorHandle*     dst      = m_Data.m_Outputs[0];
     const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
 
-    armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings, GetConstCpuData<T>(src), GetCpuData<T>(dst));
+    armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings,
+                        GetConstCpuData<void>(src), GetCpuData<void>(dst), sizeof(T));
 }
 
 template class RefPermuteWorkload<DataType::Float16>;
diff --git a/tests/ImagePreprocessor.cpp b/tests/ImagePreprocessor.cpp
index dfa400b..0ef0fda 100644
--- a/tests/ImagePreprocessor.cpp
+++ b/tests/ImagePreprocessor.cpp
@@ -44,7 +44,7 @@
         const armnn::PermutationVector NHWCToArmNN = { 0, 2, 3, 1 };
         armnn::TensorShape dstShape({m_BatchSize, 3, m_Height, m_Width});
         std::vector<float> tempImage(result.size());
-        armnnUtils::Permute<float>(dstShape, NHWCToArmNN, result.data(), tempImage.data());
+        armnnUtils::Permute(dstShape, NHWCToArmNN, result.data(), tempImage.data(), sizeof(float));
         result.swap(tempImage);
     }