IVGCVSW-1974 - Update CreateWorkload test CL and Neon

Change-Id: Ie02ccbd5945cbacd609b3b5d8d746c202c8e9c69
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index b63e95d..ada6027 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -177,7 +177,8 @@
 
 template <typename Convolution2dWorkload, armnn::DataType DataType>
 std::unique_ptr<Convolution2dWorkload> CreateConvolution2dWorkloadTest(armnn::IWorkloadFactory& factory,
-                                                                              armnn::Graph&            graph)
+                                                                       armnn::Graph&            graph,
+                                                                       DataLayout dataLayout = DataLayout::NCHW)
 {
     // Creates the layer we're testing.
     Convolution2dDescriptor layerDesc;
@@ -188,10 +189,15 @@
     layerDesc.m_StrideX = 2;
     layerDesc.m_StrideY = 4;
     layerDesc.m_BiasEnabled = true;
+    layerDesc.m_DataLayout = dataLayout;
 
     Convolution2dLayer* const layer = graph.AddLayer<Convolution2dLayer>(layerDesc, "layer");
 
-    layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({2, 3, 5, 3}, DataType));
+    TensorShape weightShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 5, 3} : TensorShape{2, 5, 3, 3};
+    TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
+    TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
+
+    layer->m_Weight = std::make_unique<ScopedCpuTensorHandle>(TensorInfo(weightShape, DataType));
     layer->m_Bias   = std::make_unique<ScopedCpuTensorHandle>(TensorInfo({2}, GetBiasDataType(DataType)));
 
     layer->m_Weight->Allocate();
@@ -201,9 +207,9 @@
     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
 
-    // Connecst up.
-    Connect(input, layer, TensorInfo({2, 3, 8, 16}, DataType));
-    Connect(layer, output, TensorInfo({2, 2, 2, 10}, DataType));
+    // Connects up.
+    Connect(input, layer, TensorInfo(inputShape, DataType));
+    Connect(layer, output, TensorInfo(outputShape, DataType));
     CreateTensorHandles(graph, factory);
 
     // Makes the workload and checks it.
@@ -216,11 +222,12 @@
     BOOST_TEST(queueDescriptor.m_Parameters.m_PadRight == 3);
     BOOST_TEST(queueDescriptor.m_Parameters.m_PadTop == 1);
     BOOST_TEST(queueDescriptor.m_Parameters.m_PadBottom == 1);
-    BOOST_TEST(queueDescriptor.m_Parameters.m_BiasEnabled == true);
+    BOOST_TEST(queueDescriptor.m_Parameters.m_BiasEnabled);
+    BOOST_TEST((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
 
     BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
     BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
-    BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo({2, 3, 5, 3}, DataType)));
+    BOOST_TEST((queueDescriptor.m_Weight->GetTensorInfo() == TensorInfo(weightShape, DataType)));
     BOOST_TEST((queueDescriptor.m_Bias->GetTensorInfo() ==
         TensorInfo({2}, GetBiasDataType(DataType))));
 
@@ -501,7 +508,7 @@
 
     NormalizationLayer* layer = graph.AddLayer<NormalizationLayer>(layerDesc, "layer");
 
-    // Creatse extra layers.
+    // Creates extra layers.
     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
     Layer* const output = graph.AddLayer<OutputLayer>(0, "output");
 
diff --git a/src/backends/test/CreateWorkloadCl.cpp b/src/backends/test/CreateWorkloadCl.cpp
index 0314f6d..411f72b 100644
--- a/src/backends/test/CreateWorkloadCl.cpp
+++ b/src/backends/test/CreateWorkloadCl.cpp
@@ -199,32 +199,49 @@
     BOOST_TEST((outputHandle->GetTensor().info()->data_type() == arm_compute::DataType::F16));
 }
 
-template <typename armnn::DataType DataType>
-static void ClConvolution2dWorkloadTest()
+template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
+static void ClConvolution2dWorkloadTest(DataLayout dataLayout)
 {
     Graph graph;
     ClWorkloadFactory factory;
-    auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory, graph);
+    auto workload = CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, DataType>(factory,
+                                                                                       graph,
+                                                                                       dataLayout);
+
+    std::initializer_list<unsigned int> inputShape  = (dataLayout == DataLayout::NCHW) ?
+            std::initializer_list<unsigned int>({2, 3, 8, 16}) : std::initializer_list<unsigned int>({2, 8, 16, 3});
+    std::initializer_list<unsigned int> outputShape = (dataLayout == DataLayout::NCHW) ?
+            std::initializer_list<unsigned int>({2, 2, 2, 10}) : std::initializer_list<unsigned int>({2, 2, 10, 2});
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
     auto inputHandle  = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
     auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
-    BOOST_TEST(CompareIClTensorHandleShape(inputHandle, {2, 3, 8, 16}));
-    BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {2, 2, 2, 10}));
+    BOOST_TEST(CompareIClTensorHandleShape(inputHandle, inputShape));
+    BOOST_TEST(CompareIClTensorHandleShape(outputHandle, outputShape));
 }
 
-BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatWorkload)
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
 {
-    ClConvolution2dWorkloadTest<armnn::DataType::Float32>();
+    ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
 }
 
-BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16Workload)
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
 {
-    ClConvolution2dWorkloadTest<armnn::DataType::Float16>();
+    ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
 }
 
-template <typename armnn::DataType DataType>
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
+{
+    ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
+}
+
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
+{
+    ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
+}
+
+template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
 static void ClDirectConvolution2dWorkloadTest()
 {
     Graph graph;
@@ -241,17 +258,17 @@
 
 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloatWorkload)
 {
-    ClDirectConvolution2dWorkloadTest<armnn::DataType::Float32>();
+    ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>();
 }
 
 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dFloat16Workload)
 {
-    ClDirectConvolution2dWorkloadTest<armnn::DataType::Float16>();
+    ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>();
 }
 
 BOOST_AUTO_TEST_CASE(CreateDirectConvolution2dUint8Workload)
 {
-    ClDirectConvolution2dWorkloadTest<armnn::DataType::QuantisedAsymm8>();
+    ClDirectConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::QuantisedAsymm8>();
 }
 
 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
diff --git a/src/backends/test/CreateWorkloadNeon.cpp b/src/backends/test/CreateWorkloadNeon.cpp
index a67e68d..b2ec563 100644
--- a/src/backends/test/CreateWorkloadNeon.cpp
+++ b/src/backends/test/CreateWorkloadNeon.cpp
@@ -179,33 +179,46 @@
 }
 
 template <typename Convolution2dWorkloadType, typename armnn::DataType DataType>
-static void NeonCreateConvolution2dWorkloadTest()
+static void NeonCreateConvolution2dWorkloadTest(DataLayout dataLayout = DataLayout::NCHW)
 {
     Graph                graph;
     NeonWorkloadFactory  factory;
     auto                 workload = CreateConvolution2dWorkloadTest<Convolution2dWorkloadType,
-                                    DataType>(factory, graph);
+                                    DataType>(factory, graph, dataLayout);
+
+    TensorShape inputShape  = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 3, 8, 16} : TensorShape{2, 8, 16, 3};
+    TensorShape outputShape = (dataLayout == DataLayout::NCHW) ? TensorShape{2, 2, 2, 10} : TensorShape{2, 2, 10, 2};
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateConvolution2dWorkloadTest).
     Convolution2dQueueDescriptor queueDescriptor = workload->GetData();
     auto inputHandle  = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
     auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
-    BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({2, 3, 8, 16}, DataType)));
-    BOOST_TEST(TestNeonTensorHandleInfo(outputHandle,  TensorInfo({2, 2, 2, 10}, DataType)));
+    BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, DataType)));
+    BOOST_TEST(TestNeonTensorHandleInfo(outputHandle,  TensorInfo(outputShape, DataType)));
 }
 
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16Workload)
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NchwWorkload)
 {
     NeonCreateConvolution2dWorkloadTest<NeonConvolution2dFloatWorkload, DataType::Float16>();
 }
-#endif
 
-BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatWorkload)
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload)
+{
+    NeonCreateConvolution2dWorkloadTest<NeonConvolution2dFloatWorkload, DataType::Float16>(DataLayout::NHWC);
+}
+
+#endif
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNchwWorkload)
 {
     NeonCreateConvolution2dWorkloadTest<NeonConvolution2dFloatWorkload, DataType::Float32>();
 }
 
+BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload)
+{
+    NeonCreateConvolution2dWorkloadTest<NeonConvolution2dFloatWorkload, DataType::Float32>(DataLayout::NHWC);
+}
+
 template <typename FullyConnectedWorkloadType, typename armnn::DataType DataType>
 static void NeonCreateFullyConnectedWorkloadTest()
 {