IVGCVSW-2421 Remove the template-based version of armnnUtils::Permute
in favor of a type-indepent implementation that takes void-pointers

 * The new implementation requires the size of the type to be passed
   to the function
 * Updated all the usages accordingly
 * Removed the old implementation no longer used

!android-nn-driver:469

Change-Id: I37f4e6d62a38fbb8ec8c39bb559a2c54c83365d4
diff --git a/src/backends/backendsCommon/test/Conv2dTestImpl.hpp b/src/backends/backendsCommon/test/Conv2dTestImpl.hpp
index 2ff66b0..8d292c8 100755
--- a/src/backends/backendsCommon/test/Conv2dTestImpl.hpp
+++ b/src/backends/backendsCommon/test/Conv2dTestImpl.hpp
@@ -149,7 +149,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
         inputData = tmp;
     }
 
@@ -178,7 +178,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
         outputData = tmp;
     }
     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
@@ -194,7 +194,7 @@
     boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
     if (layout == armnn::DataLayout::NHWC)
     {
-        armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data());
+        armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data(), sizeof(T));
     }
     AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
 
@@ -387,7 +387,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
         inputData = tmp;
     }
 
@@ -411,7 +411,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data(), sizeof(T));
         outputData = tmp;
     }
 
@@ -518,7 +518,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
         inputData = tmp;
     }
     auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
@@ -558,7 +558,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(outputImage.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputImage.data(), tmp.data(), sizeof(T));
         outputImage = tmp;
     }
 
@@ -672,7 +672,8 @@
     const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
     if (layout == armnn::DataLayout::NHWC)
     {
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, originalInputData.data(), inputData.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
+                            originalInputData.data(), inputData.data(), sizeof(T));
     }
     auto input = MakeTensor<T, 4>(inputTensorInfo, inputData);
 
@@ -758,7 +759,8 @@
     std::vector<T> outputImage = originalOutputImage;
     if (layout == armnn::DataLayout::NHWC)
     {
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, originalOutputImage.data(), outputImage.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
+                            originalOutputImage.data(), outputImage.data(), sizeof(T));
     }
 
     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputImage);
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
index 819b9d6..5215007 100755
--- a/src/backends/backendsCommon/test/LayerTests.cpp
+++ b/src/backends/backendsCommon/test/LayerTests.cpp
@@ -4602,7 +4602,7 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
         inputData = tmp;
     }
 
@@ -4664,11 +4664,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
         inputData = tmp;
 
         std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
         outputData = tmp1;
     }
 
@@ -4730,11 +4730,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
         inputData = tmp;
 
         std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
         outputData = tmp1;
     }
 
@@ -4794,11 +4794,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
         inputData = tmp;
 
         std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
         outputData = tmp1;
     }
 
@@ -4860,11 +4860,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
         inputData = tmp;
 
         std::vector<float> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(float));
         outputData = tmp1;
     }
 
@@ -4970,7 +4970,7 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(float));
         inputData = tmp;
     }
 
@@ -4981,7 +4981,8 @@
     if (layout == armnn::DataLayout::NHWC)
     {
         std::vector<float> tmp(expectedOutputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, expectedOutputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
+                            expectedOutputData.data(), tmp.data(), sizeof(float));
         expectedOutputData = tmp;
     }
     result.outputExpected = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>(expectedOutputData));
diff --git a/src/backends/backendsCommon/test/Pooling2dTestImpl.hpp b/src/backends/backendsCommon/test/Pooling2dTestImpl.hpp
index f5e3386..b542938 100644
--- a/src/backends/backendsCommon/test/Pooling2dTestImpl.hpp
+++ b/src/backends/backendsCommon/test/Pooling2dTestImpl.hpp
@@ -293,11 +293,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
         inputData = tmp;
 
         std::vector<T> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(T));
         outputData = tmp1;
     }
 
@@ -362,11 +362,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
         inputData = tmp;
 
         std::vector<T> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(T));
         outputData = tmp1;
     }
 
@@ -473,11 +473,11 @@
     if (dataLayout == armnn::DataLayout::NHWC)
     {
         std::vector<T> tmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data(), sizeof(T));
         inputData = tmp;
 
         std::vector<T> tmp1(outputData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp1.data(), sizeof(T));
         outputData = tmp1;
     }
 
diff --git a/src/backends/backendsCommon/test/SpaceToBatchNdTestImpl.hpp b/src/backends/backendsCommon/test/SpaceToBatchNdTestImpl.hpp
index a467cd3..814607d 100644
--- a/src/backends/backendsCommon/test/SpaceToBatchNdTestImpl.hpp
+++ b/src/backends/backendsCommon/test/SpaceToBatchNdTestImpl.hpp
@@ -35,11 +35,13 @@
         outputTensorInfo = armnnUtils::Permuted(outputTensorInfo, NCHWToNHWC);
 
         std::vector<float> inputTmp(inputData.size());
-        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), inputTmp.data());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC,
+                            inputData.data(), inputTmp.data(), sizeof(float));
         inputData = inputTmp;
 
         std::vector<float> outputTmp(outputExpectedData.size());
-        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputExpectedData.data(), outputTmp.data());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC,
+                            outputExpectedData.data(), outputTmp.data(), sizeof(float));
         outputExpectedData = outputTmp;
     }
 
diff --git a/src/backends/reference/workloads/RefPermuteWorkload.cpp b/src/backends/reference/workloads/RefPermuteWorkload.cpp
index df50156..9ca1642 100644
--- a/src/backends/reference/workloads/RefPermuteWorkload.cpp
+++ b/src/backends/reference/workloads/RefPermuteWorkload.cpp
@@ -23,7 +23,8 @@
     const ITensorHandle*     dst      = m_Data.m_Outputs[0];
     const PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
 
-    armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings, GetConstCpuData<T>(src), GetCpuData<T>(dst));
+    armnnUtils::Permute(GetTensorInfo(dst).GetShape(), mappings,
+                        GetConstCpuData<void>(src), GetCpuData<void>(dst), sizeof(T));
 }
 
 template class RefPermuteWorkload<DataType::Float16>;