IVGCVSW-1920 Unittests for NHWC Normalization Workloads and Layer

Change-Id: Iea941c1747454f5a4342351e4e82b10ffb9ccbbd
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index c111fe6..66f6282 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -485,9 +485,10 @@
     return workload;
 }
 
-template <typename NormalizationFloat32Workload, armnn::DataType DataType>
-std::unique_ptr<NormalizationFloat32Workload> CreateNormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
-                                                                              armnn::Graph&            graph)
+template <typename NormalizationWorkload, armnn::DataType DataType>
+std::unique_ptr<NormalizationWorkload> CreateNormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
+                                                                       armnn::Graph& graph,
+                                                                       DataLayout dataLayout = DataLayout::NCHW)
 {
     // Creates the layer we're testing.
     NormalizationDescriptor layerDesc;
@@ -497,6 +498,7 @@
     layerDesc.m_Alpha = 0.5f;
     layerDesc.m_Beta = -1.0f;
     layerDesc.m_K = 0.2f;
+    layerDesc.m_DataLayout = dataLayout;
 
     NormalizationLayer* layer = graph.AddLayer<NormalizationLayer>(layerDesc, "layer");
 
@@ -510,7 +512,7 @@
     CreateTensorHandles(graph, factory);
 
     // Makes the workload and checks it.
-    auto workload = MakeAndCheckWorkload<NormalizationFloat32Workload>(*layer, graph, factory);
+    auto workload = MakeAndCheckWorkload<NormalizationWorkload>(*layer, graph, factory);
 
     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
     BOOST_TEST((queueDescriptor.m_Parameters.m_NormChannelType == NormalizationAlgorithmChannel::Across));
@@ -519,6 +521,7 @@
     BOOST_TEST(queueDescriptor.m_Parameters.m_Alpha == 0.5f);
     BOOST_TEST(queueDescriptor.m_Parameters.m_Beta == -1.0f);
     BOOST_TEST(queueDescriptor.m_Parameters.m_K == 0.2f);
+    BOOST_TEST((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
 
     BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
     BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
index 969c9bb..f6c07e1 100644
--- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
@@ -37,6 +37,10 @@
     arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
 
+    arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
+    input.info()->set_data_layout(aclDataLayout);
+    output.info()->set_data_layout(aclDataLayout);
+
     arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters);
 
     m_NormalizationLayer.configure(&input, &output, normalizationInfo);
diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
index 9cd315e..7019c82 100644
--- a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
@@ -49,6 +49,9 @@
 
     arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
     arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
+    input.info()->set_data_layout(aclDataLayout);
+    output.info()->set_data_layout(aclDataLayout);
 
     const arm_compute::NormType normType =
         ConvertNormalizationAlgorithmChannelToAclNormType(m_Data.m_Parameters.m_NormChannelType);
diff --git a/src/backends/test/ArmComputeCl.cpp b/src/backends/test/ArmComputeCl.cpp
index d83f812..a106c78 100644
--- a/src/backends/test/ArmComputeCl.cpp
+++ b/src/backends/test/ArmComputeCl.cpp
@@ -92,6 +92,11 @@
 ARMNN_AUTO_TEST_CASE(SimpleMerger, MergerTest)
 ARMNN_AUTO_TEST_CASE(MergerUint8, MergerUint8Test)
 
+// Normalization
+ARMNN_AUTO_TEST_CASE(SimpleNormalizationAcross, SimpleNormalizationAcrossTest)
+ARMNN_AUTO_TEST_CASE(SimpleNormalizationWithin, SimpleNormalizationWithinTest)
+ARMNN_AUTO_TEST_CASE(SimpleNormalizationAcrossNhwc, SimpleNormalizationAcrossNhwcTest)
+
 // Pooling
 ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4, SimpleMaxPooling2dSize3x3Stride2x4Test, true)
 ARMNN_AUTO_TEST_CASE(SimpleMaxPooling2dSize3x3Stride2x4Uint8, SimpleMaxPooling2dSize3x3Stride2x4Uint8Test, true)
diff --git a/src/backends/test/ArmComputeNeon.cpp b/src/backends/test/ArmComputeNeon.cpp
index 4844cc2..66cce25 100644
--- a/src/backends/test/ArmComputeNeon.cpp
+++ b/src/backends/test/ArmComputeNeon.cpp
@@ -387,6 +387,11 @@
 ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet2, PermuteFloat32ValueSet2Test)
 ARMNN_AUTO_TEST_CASE(PermuteFloat32ValueSet3, PermuteFloat32ValueSet3Test)
 
+// Normalization
+ARMNN_AUTO_TEST_CASE(SimpleNormalizationAcross, SimpleNormalizationAcrossTest)
+ARMNN_AUTO_TEST_CASE(SimpleNormalizationWithin, SimpleNormalizationWithinTest)
+ARMNN_AUTO_TEST_CASE(SimpleNormalizationAcrossNhwc, SimpleNormalizationAcrossNhwcTest)
+
 // ============================================================================
 // COMPARE tests
 
diff --git a/src/backends/test/CreateWorkloadCl.cpp b/src/backends/test/CreateWorkloadCl.cpp
index 8599920..fb28ce1 100644
--- a/src/backends/test/CreateWorkloadCl.cpp
+++ b/src/backends/test/CreateWorkloadCl.cpp
@@ -285,13 +285,13 @@
 }
 
 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
-static void ClNormalizationWorkloadTest()
+static void ClNormalizationWorkloadTest(DataLayout dataLayout)
 {
     Graph graph;
     ClWorkloadFactory factory;
 
     auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>
-                    (factory, graph);
+                    (factory, graph, dataLayout);
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
@@ -302,14 +302,24 @@
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, {3, 5, 5, 1}));
 }
 
-BOOST_AUTO_TEST_CASE(CreateNormalizationFloatWorkload)
+BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NchwWorkload)
 {
-    ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>();
+    ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
 }
 
-BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16Workload)
+BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
 {
-    ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>();
+    ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
+}
+
+BOOST_AUTO_TEST_CASE(CreateNormalizationFloat32NhwcWorkload)
+{
+    ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
+}
+
+BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
+{
+    ClNormalizationWorkloadTest<ClNormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
 }
 
 template <typename Pooling2dWorkloadType, typename armnn::DataType DataType>
diff --git a/src/backends/test/CreateWorkloadNeon.cpp b/src/backends/test/CreateWorkloadNeon.cpp
index e9fcb56..a6f3540 100644
--- a/src/backends/test/CreateWorkloadNeon.cpp
+++ b/src/backends/test/CreateWorkloadNeon.cpp
@@ -235,11 +235,11 @@
 }
 
 template <typename NormalizationWorkloadType, typename armnn::DataType DataType>
-static void NeonCreateNormalizationWorkloadTest()
+static void NeonCreateNormalizationWorkloadTest(DataLayout dataLayout)
 {
-    Graph               graph;
+    Graph graph;
     NeonWorkloadFactory factory;
-    auto                workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph);
+    auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph, dataLayout);
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     NormalizationQueueDescriptor queueDescriptor = workload->GetData();
@@ -250,17 +250,28 @@
 }
 
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16Workload)
+BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NchwWorkload)
 {
-    NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>();
+    NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
+}
+
+BOOST_AUTO_TEST_CASE(CreateNormalizationFloat16NhwcWorkload)
+{
+    NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
 }
 #endif
 
-BOOST_AUTO_TEST_CASE(CreateNormalizationFloatWorkload)
+BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNchwWorkload)
 {
-    NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>();
+    NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
 }
 
+BOOST_AUTO_TEST_CASE(CreateNormalizationFloatNhwcWorkload)
+{
+    NeonCreateNormalizationWorkloadTest<NeonNormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
+}
+
+
 template <typename Pooling2dWorkloadType, typename armnn::DataType DataType>
 static void NeonCreatePooling2dWorkloadTest()
 {
diff --git a/src/backends/test/CreateWorkloadRef.cpp b/src/backends/test/CreateWorkloadRef.cpp
index 9313ee8..c30093d 100644
--- a/src/backends/test/CreateWorkloadRef.cpp
+++ b/src/backends/test/CreateWorkloadRef.cpp
@@ -227,17 +227,22 @@
     RefCreateFullyConnectedWorkloadTest<RefFullyConnectedUint8Workload, armnn::DataType::QuantisedAsymm8>();
 }
 
-BOOST_AUTO_TEST_CASE(CreateNormalizationWorkload)
+template <typename NormalizationWorkloadType, armnn::DataType DataType>
+static void RefCreateNormalizationWorkloadTest()
 {
-    Graph                graph;
+    Graph graph;
     RefWorkloadFactory factory;
-    auto                 workload = CreateNormalizationWorkloadTest<RefNormalizationFloat32Workload,
-                                    armnn::DataType::Float32>(factory, graph);
+    auto workload = CreateNormalizationWorkloadTest<NormalizationWorkloadType, DataType>(factory, graph);
 
     // Checks that outputs and inputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     CheckInputOutput(std::move(workload),
-                     TensorInfo({3, 5, 5, 1}, DataType::Float32),
-                     TensorInfo({3, 5, 5, 1}, DataType::Float32));
+                     TensorInfo({3, 5, 5, 1}, DataType),
+                     TensorInfo({3, 5, 5, 1}, DataType));
+}
+
+BOOST_AUTO_TEST_CASE(CreateRefNormalizationNchwWorkload)
+{
+    RefCreateNormalizationWorkloadTest<RefNormalizationFloat32Workload, armnn::DataType::Float32>();
 }
 
 template <typename Pooling2dWorkloadType, armnn::DataType DataType>
diff --git a/src/backends/test/LayerTests.cpp b/src/backends/test/LayerTests.cpp
index 55f4a5c..267a8d6 100644
--- a/src/backends/test/LayerTests.cpp
+++ b/src/backends/test/LayerTests.cpp
@@ -520,6 +520,13 @@
     return SimpleNormalizationTestImpl(workloadFactory, normChannel, normMethod);
 }
 
+LayerTestResult<float,4> SimpleNormalizationAcrossNhwcTest(armnn::IWorkloadFactory& workloadFactory)
+{
+    auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
+    auto normChannel = armnn::NormalizationAlgorithmChannel::Across;
+    return SimpleNormalizationNhwcClNeonTestImpl(workloadFactory, normChannel, normMethod);
+}
+
 LayerTestResult<float,2> SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta)
 {
     return SimpleSoftmaxTestImpl<float>(workloadFactory, beta);
diff --git a/src/backends/test/LayerTests.hpp b/src/backends/test/LayerTests.hpp
index 8939903..3e5bb3d 100644
--- a/src/backends/test/LayerTests.hpp
+++ b/src/backends/test/LayerTests.hpp
@@ -131,6 +131,7 @@
 
 LayerTestResult<float, 4> SimpleNormalizationAcrossTest(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<float, 4> SimpleNormalizationWithinTest(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float,4> SimpleNormalizationAcrossNhwcTest(armnn::IWorkloadFactory& workloadFactory);
 
 LayerTestResult<float, 2> SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta);
 LayerTestResult<uint8_t, 2> SimpleSoftmaxUint8Test(armnn::IWorkloadFactory& workloadFactory, float beta);
diff --git a/src/backends/test/NormTestImpl.hpp b/src/backends/test/NormTestImpl.hpp
index dfa2f70..300eece 100644
--- a/src/backends/test/NormTestImpl.hpp
+++ b/src/backends/test/NormTestImpl.hpp
@@ -5,6 +5,7 @@
 
 #include <armnn/Exceptions.hpp>
 #include <armnn/LayerSupport.hpp>
+#include "armnn/Types.hpp"
 
 #include <backends/CpuTensorHandle.hpp>
 #include <backends/WorkloadFactory.hpp>
@@ -58,6 +59,7 @@
     data.m_Parameters.m_Alpha = alpha;
     data.m_Parameters.m_Beta = beta;
     data.m_Parameters.m_K = kappa;
+    data.m_Parameters.m_DataLayout = armnn::DataLayout::NCHW;
 
     armnn::PassthroughCpuTensorHandle refHandle(outputTensorInfo, &ret.outputExpected[0][0][0][0]);
     armnn::NormalizationQueueDescriptor refData = data;
@@ -150,6 +152,108 @@
     return ret;
 }
 
+// This is test implementation for CL and NEON,
+// as currently, only Across Normalization is supported on CL and NEON for NHWC.
+LayerTestResult<float,4> SimpleNormalizationNhwcClNeonTestImpl(armnn::IWorkloadFactory& workloadFactory,
+                                                               armnn::NormalizationAlgorithmChannel normChannel,
+                                                               armnn::NormalizationAlgorithmMethod normMethod)
+{
+    const unsigned int inputHeight = 2;
+    const unsigned int inputWidth = 2;
+    const unsigned int inputChannels = 1;
+    const unsigned int inputNum = 2;
+
+    unsigned int outputHeight = inputHeight;
+    unsigned int outputWidth = inputWidth;
+    unsigned int outputChannels = inputChannels;
+    unsigned int outputNum = inputNum;
+
+    unsigned int inputShape[] = { inputNum, inputHeight, inputWidth, inputChannels };
+    unsigned int outputShape[] = { outputNum, outputHeight, outputWidth, outputChannels };
+
+    auto inputTensorInfo = armnn::TensorInfo(4, inputShape, armnn::DataType::Float32);
+    auto outputTensorInfo = armnn::TensorInfo(4, outputShape, armnn::DataType::Float32);
+
+    LayerTestResult<float,4> ret(outputTensorInfo);
+
+    auto input = MakeTensor<float, 4>(inputTensorInfo, std::vector<float>({
+        // Batch #0
+        1.0f, 2.0f,
+        3.0f, 4.0f,
+        // Batch #1
+        5.0f, 6.0f,
+        7.0f, 8.0f
+    }));
+
+    float alpha = 1.f;
+    float beta = 1.f;
+    float kappa = 1.f;
+    uint32_t normSize = 3;
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::NormalizationQueueDescriptor data;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+    data.m_Parameters.m_NormChannelType = normChannel;
+    data.m_Parameters.m_NormMethodType = normMethod;
+    data.m_Parameters.m_NormSize = normSize;
+    data.m_Parameters.m_Alpha = alpha;
+    data.m_Parameters.m_Beta = beta;
+    data.m_Parameters.m_K = kappa;
+    data.m_Parameters.m_DataLayout = armnn::DataLayout::NHWC;
+
+    armnn::PassthroughCpuTensorHandle refHandle(outputTensorInfo, &ret.outputExpected[0][0][0][0]);
+    armnn::NormalizationQueueDescriptor refData = data;
+    armnn::WorkloadInfo refInfo = info;
+    SetWorkloadOutput(refData, refInfo, 0, outputTensorInfo, &refHandle);
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateNormalization(data, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workloadFactory.Finalize();
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+    switch (normMethod)
+    {
+        case armnn::NormalizationAlgorithmMethod::LocalBrightness:
+        {
+            switch (normChannel)
+            {
+                case armnn::NormalizationAlgorithmChannel::Across:
+                {
+                    std::vector<float> expectedOutput{ 0.5f, 0.400000006f, 0.300000012f, 0.235294119f,
+                                                       0.192307696f, 0.16216217f, 0.140000001f, 0.123076923f };
+                    ret.outputExpected = MakeTensor<float, 4>(outputTensorInfo, expectedOutput);
+                    break;
+                }
+                default:
+                {
+                    throw armnn::UnimplementedException("Unsupported normalisation channel type, "
+                                                        "Only Cross-map is supported for NHWC layout");
+                }
+            }
+            break;
+        }
+        case armnn::NormalizationAlgorithmMethod::LocalContrast: // NOTE: intentional fallthrough.
+        default:
+        {
+            throw armnn::UnimplementedException("Unsupported normalisation method type, "
+                                                "only LocalBrightness is supported");
+        }
+    }
+
+    return ret;
+}
+
 LayerTestResult<float,4> CompareNormalizationTestImpl(armnn::IWorkloadFactory& workloadFactory,
                                                       armnn::IWorkloadFactory& refWorkloadFactory,
                                                       armnn::NormalizationAlgorithmChannel normChannel,