IVGCVSW-2074: SimpleConvolution2dTestImpl updated for NHWC

* Added a defaulted DataLayoutIndexed argument to the SimpleConvolution2dTestImpl
* Permute test data tensors just before use if necessary
* Add DataLayout to descriptor
* Added a DataLayoutIndexed argument to SimpleConvolution2d3x5TestCommon
* Added NHWC versions of the SimpleConvolution2d3x5Test for Neon and CL

Change-Id: I10e3ece42a50108baeabe4d8b0f0ac3e6d532261
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index 937c58c..4946515 100755
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -54,10 +54,12 @@
 // Convolution
 ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true)
 
-ARMNN_AUTO_TEST_CASE(SimpleConvolution2d, SimpleConvolution2d3x5Test, true)
-ARMNN_AUTO_TEST_CASE(SimpleConvolution2dSquare, SimpleConvolution2d3x3Test, true)
+ARMNN_AUTO_TEST_CASE(SimpleConvolution2d, SimpleConvolution2d3x5Test, true, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(SimpleConvolution2dNhwc, SimpleConvolution2d3x5Test, true, armnn::DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x3Uint8, SimpleConvolution2d3x3Uint8Test, true)
-ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false)
+ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dNhwc, SimpleConvolution2d3x5Test, false, armnn::DataLayout::NHWC)
+
 ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false)
 ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest)
 
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index 568a236..ffd0d29 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -26,9 +26,12 @@
 // Convolution
 ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true)
 
-ARMNN_AUTO_TEST_CASE(SimpleConvolution2d, SimpleConvolution2d3x5Test, true)
+ARMNN_AUTO_TEST_CASE(SimpleConvolution2d, SimpleConvolution2d3x5Test, true, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(SimpleConvolution2dNhwc, SimpleConvolution2d3x5Test, true, armnn::DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE(SimpleConvolution2dSquare, SimpleConvolution2d3x3Test, true)
-ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false)
+ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dNhwc, SimpleConvolution2d3x5Test, false, armnn::DataLayout::NHWC)
+
 ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2dSquare, SimpleConvolution2d3x3Test, false)
 ARMNN_AUTO_TEST_CASE(SimpleConvolution2dAsymmetricPadding, Convolution2dAsymmetricPaddingTest)
 
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 38ce94d..45239be 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -18,11 +18,11 @@
 // UNIT tests
 
 // Convolution
-ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x5, SimpleConvolution2d3x5Test, true)
-ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x5Uint8, SimpleConvolution2d3x5Uint8Test, true)
+ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x5, SimpleConvolution2d3x5Test, true, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(SimpleConvolution2d3x5Uint8, SimpleConvolution2d3x5Uint8Test, true, armnn::DataLayout::NCHW)
 
-ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false)
-ARMNN_AUTO_TEST_CASE(UnbiasedConvolutionUint8, SimpleConvolution2d3x5Uint8Test, false)
+ARMNN_AUTO_TEST_CASE(UnbiasedConvolution2d, SimpleConvolution2d3x5Test, false, armnn::DataLayout::NCHW)
+ARMNN_AUTO_TEST_CASE(UnbiasedConvolutionUint8, SimpleConvolution2d3x5Uint8Test, false, armnn::DataLayout::NCHW)
 
 ARMNN_AUTO_TEST_CASE(SimpleConvolution1d, Convolution1dTest, true)
 ARMNN_AUTO_TEST_CASE(SimpleConvolution1dUint8, Convolution1dUint8Test, true)
diff --git a/src/backends/test/Conv2dTestImpl.hpp b/src/backends/test/Conv2dTestImpl.hpp
index d8c1040..41a0d1b 100755
--- a/src/backends/test/Conv2dTestImpl.hpp
+++ b/src/backends/test/Conv2dTestImpl.hpp
@@ -4,6 +4,7 @@
 //
 #pragma once
 
+#include <string>
 #include <armnn/ArmNN.hpp>
 #include <armnn/Tensor.hpp>
 #include <armnn/TypesUtils.hpp>
@@ -13,6 +14,8 @@
 
 #include <backends/CpuTensorHandle.hpp>
 #include <backends/WorkloadFactory.hpp>
+#include "Permute.hpp"
+#include <boost/numeric/conversion/cast.hpp>
 
 // Mapping from input type to bias type for fully connected layers.
 // float => float, uint8_t => int32_t
@@ -59,33 +62,55 @@
     }
 }
 
+template<typename T>
+armnn::TensorInfo GetTensorInfo(unsigned int numberOfBatches,
+                                unsigned int numberOfChannels,
+                                unsigned int height,
+                                unsigned int width,
+                                const armnn::DataLayoutIndexed& layout)
+{
+    switch (layout.GetDataLayout())
+    {
+        case armnn::DataLayout::NCHW:
+            return armnn::TensorInfo({numberOfBatches, numberOfChannels, height, width}, armnn::GetDataType<T>());
+        case armnn::DataLayout ::NHWC:
+            return armnn::TensorInfo({numberOfBatches, height, width, numberOfChannels}, armnn::GetDataType<T>());
+        default:
+            throw armnn::InvalidArgumentException("unknown data layout ["
+                + std::to_string(static_cast<int>(layout.GetDataLayout())) + "]");
+    }
+}
+
+
+
 template<typename T, typename B>
 LayerTestResult<T, 4> SimpleConvolution2dTestImpl(armnn::IWorkloadFactory& workloadFactory,
-                                                  const boost::multi_array<T, 4>& input,
-                                                  const boost::multi_array<T, 4>& kernel,
+                                                  const boost::multi_array<T, 4>& originalInput,
+                                                  const boost::multi_array<T, 4>& originalKernel,
                                                   const boost::multi_array<B, 1>& bias,
-                                                  const boost::multi_array<T, 4>& outputExpected,
+                                                  const boost::multi_array<T, 4>& originalOutputExpected,
                                                   float qScale,
                                                   int32_t qOffset,
+                                                  const armnn::DataLayoutIndexed& layout = armnn::DataLayout::NCHW,
                                                   uint32_t padLeft = 0,
                                                   uint32_t padTop = 0,
                                                   uint32_t padRight = 0,
                                                   uint32_t padBottom = 0)
 {
-    unsigned int inputHeight   = boost::numeric_cast<unsigned int>(input.shape()[2]);
-    unsigned int inputWidth    = boost::numeric_cast<unsigned int>(input.shape()[3]);
-    unsigned int inputChannels = boost::numeric_cast<unsigned int>(input.shape()[1]);
-    unsigned int inputNum      = boost::numeric_cast<unsigned int>(input.shape()[0]);
+    unsigned int inputHeight   = boost::numeric_cast<unsigned int>(originalInput.shape()[2]);
+    unsigned int inputWidth    = boost::numeric_cast<unsigned int>(originalInput.shape()[3]);
+    unsigned int inputChannels = boost::numeric_cast<unsigned int>(originalInput.shape()[1]);
+    unsigned int inputNum      = boost::numeric_cast<unsigned int>(originalInput.shape()[0]);
 
-    unsigned int outputHeight   = boost::numeric_cast<unsigned int>(outputExpected.shape()[2]);
-    unsigned int outputWidth    = boost::numeric_cast<unsigned int>(outputExpected.shape()[3]);
-    unsigned int outputChannels = boost::numeric_cast<unsigned int>(outputExpected.shape()[1]);
-    unsigned int outputNum      = boost::numeric_cast<unsigned int>(outputExpected.shape()[0]);
+    unsigned int outputHeight   = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[2]);
+    unsigned int outputWidth    = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[3]);
+    unsigned int outputChannels = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[1]);
+    unsigned int outputNum      = boost::numeric_cast<unsigned int>(originalOutputExpected.shape()[0]);
 
-    unsigned int kernelHeight = boost::numeric_cast<unsigned int>(kernel.shape()[2]);
-    unsigned int kernelWidth = boost::numeric_cast<unsigned int>(kernel.shape()[3]);
-    unsigned int kernelChannels = boost::numeric_cast<unsigned int>(kernel.shape()[1]);
-    unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(kernel.shape()[0]);
+    unsigned int kernelHeight = boost::numeric_cast<unsigned int>(originalKernel.shape()[2]);
+    unsigned int kernelWidth = boost::numeric_cast<unsigned int>(originalKernel.shape()[3]);
+    unsigned int kernelChannels = boost::numeric_cast<unsigned int>(originalKernel.shape()[1]);
+    unsigned int kernelDepthMul = boost::numeric_cast<unsigned int>(originalKernel.shape()[0]);
 
     bool biasEnabled = bias.size() > 0;
 
@@ -98,10 +123,11 @@
 
 
     // Note these tensors will use two (identical) batches.
-    armnn::TensorInfo inputTensorInfo({2*inputNum, inputChannels, inputHeight, inputWidth}, armnn::GetDataType<T>());
-    armnn::TensorInfo outputTensorInfo({2*outputNum, outputChannels, outputHeight, outputWidth},
-        armnn::GetDataType<T>());
-    armnn::TensorInfo kernelDesc({kernelDepthMul, kernelChannels, kernelHeight, kernelWidth}, armnn::GetDataType<T>());
+    // NOTE: if layout is unknown we will get an exception at this point.
+    armnn::TensorInfo inputTensorInfo = GetTensorInfo<T>(2*inputNum, inputChannels, inputHeight, inputWidth, layout);
+    armnn::TensorInfo outputTensorInfo = GetTensorInfo<T>(
+            2*outputNum, outputChannels, outputHeight, outputWidth, layout);
+    armnn::TensorInfo kernelDesc = GetTensorInfo<T>(kernelDepthMul, kernelChannels, kernelHeight, kernelWidth, layout);
     armnn::TensorInfo biasDesc({static_cast<unsigned int>(bias.size())}, armnn::GetDataType<B>());
 
     // Set quantization parameters if the requested type is a quantized type.
@@ -121,14 +147,25 @@
 
     // Construct input data - two batches of the same input image.
     std::vector<T> inputImage;
-    inputImage.assign(input.data(), input.data() + 1*inputChannels*inputHeight*inputWidth);
+    inputImage.assign(originalInput.data(), originalInput.data() + 1*inputChannels*inputHeight*inputWidth);
     std::vector<T> inputData;
     inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
     inputData.insert(inputData.end(), inputImage.begin(), inputImage.end());
+
+    // at this point if we require it permute the input data
+    const armnn::PermutationVector NCHWToNHWC = { 0, 3, 1, 2 };
+    if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
+    {
+        std::vector<T> tmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NCHWToNHWC, inputData.data(), tmp.data());
+        inputData = tmp;
+    }
+
     auto batchedInput = MakeTensor<T, 4>(inputTensorInfo, inputData);
 
     std::vector<T> outputImage;
-    outputImage.assign(outputExpected.data(), outputExpected.data() + outputChannels*outputHeight*outputWidth);
+    outputImage.assign(originalOutputExpected.data(),
+            originalOutputExpected.data() + outputChannels*outputHeight*outputWidth);
 
     // Apply bias to output image if it is enabled.
     if(biasEnabled)
@@ -145,6 +182,13 @@
     outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
     outputData.insert(outputData.end(), outputImage.begin(), outputImage.end());
 
+    // at this point if we require it permute the expected output
+    if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
+    {
+        std::vector<T> tmp(outputData.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NCHWToNHWC, outputData.data(), tmp.data());
+        outputData = tmp;
+    }
     ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, outputData);
 
     // Todo: nontrivial padding and strides.
@@ -158,7 +202,12 @@
     armnn::WorkloadInfo info;
     armnn::ScopedCpuTensorHandle weightsTensor(kernelDesc);
     armnn::ScopedCpuTensorHandle biasTensor(biasDesc);
-
+    // Permute the kernel if necessary
+    boost::multi_array<T, 4> kernel = boost::multi_array<T, 4>(originalKernel);
+    if (layout.GetDataLayout() == armnn::DataLayout::NHWC)
+    {
+        armnnUtils::Permute(kernelDesc.GetShape(), NCHWToNHWC, originalKernel.data(), kernel.data());
+    }
     AllocateAndCopyDataToITensorHandle(&weightsTensor, &kernel[0][0][0][0]);
 
     if(biasEnabled)
@@ -178,6 +227,7 @@
     data.m_Parameters.m_PadTop = padTop;
     data.m_Parameters.m_PadBottom = padBottom;
     data.m_Parameters.m_BiasEnabled = biasEnabled;
+    data.m_Parameters.m_DataLayout = layout.GetDataLayout();
 
     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateConvolution2d(data, info);
     inputHandle->Allocate();
diff --git a/src/backends/test/LayerTests.cpp b/src/backends/test/LayerTests.cpp
index e5a4258..e536cc9 100755
--- a/src/backends/test/LayerTests.cpp
+++ b/src/backends/test/LayerTests.cpp
@@ -85,7 +85,8 @@
 LayerTestResult<T, 4> SimpleConvolution2d3x5TestCommon(armnn::IWorkloadFactory& workloadFactory,
                                                        float                    qScale,
                                                        int32_t                  qOffset,
-                                                       bool                     biasEnabled)
+                                                       bool                     biasEnabled,
+                                                       const armnn::DataLayoutIndexed& layout)
 {
     // Use common single-batch 3-channel 16x8 image.
     armnn::TensorInfo inputDesc({1, 3, 8, 16}, armnn::GetDataType<T>());
@@ -156,7 +157,8 @@
       GetBias2<typename FullyConnectedBiasTypeForInputType<T>::Type>(biasEnabled, qScale, qOffset),
       expectedOutput,
       qScale,
-      qOffset);
+      qOffset,
+      layout);
 }
 
 template<typename T>
@@ -278,15 +280,17 @@
 }
 
 LayerTestResult<float, 4> SimpleConvolution2d3x5Test(armnn::IWorkloadFactory& workloadFactory,
-                                                     bool                     biasEnabled)
+                                                     bool                     biasEnabled,
+                                                     const armnn::DataLayoutIndexed& layout)
 {
-    return SimpleConvolution2d3x5TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled);
+    return SimpleConvolution2d3x5TestCommon<float>(workloadFactory, 0.f, 0, biasEnabled, layout);
 }
 
 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory,
-                                                         bool                     biasEnabled)
+                                                            bool                     biasEnabled,
+                                                            const armnn::DataLayoutIndexed& layout)
 {
-    return SimpleConvolution2d3x5TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled);
+    return SimpleConvolution2d3x5TestCommon<uint8_t>(workloadFactory, 0.5f, 50, biasEnabled, layout);
 }
 
 LayerTestResult<float, 4> SimpleConvolution2d3x3Test(armnn::IWorkloadFactory& workloadFactory,
@@ -359,6 +363,7 @@
       expectedOutput,
       qScale,
       qOffset,
+      armnn::DataLayout::NCHW,
       1,  // Padding left.
       2,  // Padding top.
       3,  // Padding right.
@@ -410,6 +415,7 @@
         expectedOutput,
         qScale,
         qOffset,
+        armnn::DataLayout::NCHW,
         1,  // Padding left.
         1,  // Padding top.
         2,  // Padding right.
diff --git a/src/backends/test/LayerTests.hpp b/src/backends/test/LayerTests.hpp
index ebd3841..6c3b9e1 100644
--- a/src/backends/test/LayerTests.hpp
+++ b/src/backends/test/LayerTests.hpp
@@ -50,7 +50,8 @@
 };
 
 LayerTestResult<float, 4> SimpleConvolution2d3x5Test(armnn::IWorkloadFactory& workloadFactory,
-                                                     bool biasEnabled);
+                                                     bool biasEnabled,
+                                                     const armnn::DataLayoutIndexed& layout);
 
 LayerTestResult<float, 4> SimpleConvolution2d3x3Test(armnn::IWorkloadFactory& workloadFactory,
                                                      bool biasEnabled);
@@ -312,7 +313,8 @@
 LayerTestResult<uint8_t, 4> DivisionBroadcast1DVectorUint8Test(armnn::IWorkloadFactory& workloadFactory);
 
 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x5Uint8Test(armnn::IWorkloadFactory& workloadFactory,
-                                                            bool                     biasEnabled);
+                                                            bool                     biasEnabled,
+                                                            const armnn::DataLayoutIndexed& layout);
 
 LayerTestResult<uint8_t, 4> SimpleConvolution2d3x3Uint8Test(armnn::IWorkloadFactory& workloadFactory,
                                                             bool                     biasEnabled);