IVGCVSW-2032 + IVGCVSW-2033 Add NHWC implementation to the reference
Normalization workload

 * Enabled NHWC support in RefNormalizationFloat32Workload for
   Across normalization
 * Added unit test for the reference implementation

Change-Id: I0e1f319e76491d43b83b121c273fadb5b259d1a0
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 6cfa4a3..2137161 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -100,8 +100,10 @@
 ARMNN_AUTO_TEST_CASE(ConstantLinearActivation, ConstantLinearActivationTest)
 ARMNN_AUTO_TEST_CASE(ConstantLinearActivationUint8, ConstantLinearActivationUint8Test)
 
+// Normalization
 ARMNN_AUTO_TEST_CASE(SimpleNormalizationAcross, SimpleNormalizationAcrossTest)
 ARMNN_AUTO_TEST_CASE(SimpleNormalizationWithin, SimpleNormalizationWithinTest)
+ARMNN_AUTO_TEST_CASE(SimpleNormalizationAcrossNhwc, SimpleNormalizationAcrossNhwcTest)
 
 ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta1, SimpleSoftmaxTest, 1.0f)
 ARMNN_AUTO_TEST_CASE(SimpleSoftmaxBeta2, SimpleSoftmaxTest, 2.0f)
diff --git a/src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp b/src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp
index 5c24416..4cec023 100644
--- a/src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp
+++ b/src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp
@@ -6,6 +6,7 @@
 #include "RefNormalizationFloat32Workload.hpp"
 
 #include "RefWorkloadUtils.hpp"
+#include "TensorBufferArrayView.hpp"
 
 #include "Profiling.hpp"
 
@@ -87,12 +88,22 @@
                             uint32_t           norm_size,
                             float              alpha,
                             float              beta,
-                            float              kappa)
+                            float              kappa,
+                            DataLayout         dataLayout)
 {
+    TensorBufferArrayView<const float> input(tensorShape,
+                                             inputData,
+                                             dataLayout);
+    TensorBufferArrayView<float> output(tensorShape,
+                                        outputData,
+                                        dataLayout);
+
+    DataLayoutIndexed dataLayoutIndexed(dataLayout);
+
     const unsigned int batchSize = tensorShape[0];
-    const unsigned int depth     = tensorShape[1];
-    const unsigned int rows      = tensorShape[2];
-    const unsigned int cols      = tensorShape[3];
+    const unsigned int depth     = tensorShape[dataLayoutIndexed.GetChannelsIndex()];
+    const unsigned int rows      = tensorShape[dataLayoutIndexed.GetHeightIndex()];
+    const unsigned int cols      = tensorShape[dataLayoutIndexed.GetWidthIndex()];
 
     int radius = boost::numeric_cast<int>(norm_size / 2u); /* Strong Assumption on rounding Mode */
 
@@ -114,23 +125,15 @@
                             continue;
                         }
 
-                        float inval = inputData[n * cols * rows * depth +
-                                                boost::numeric_cast<unsigned int>(k) * cols * rows +
-                                                h * cols +
-                                                w];
+                        float inval = input.Get(n, boost::numeric_cast<unsigned int>(k), h, w);
 
-                        accumulated_scale += inval*inval;
+                        accumulated_scale += inval * inval;
                     }
+
                     float scale = kappa + (accumulated_scale * alpha);
                     scale = powf(scale, -beta);
-                    outputData[n * cols * rows * depth +
-                               c * cols * rows +
-                               h * cols +
-                               w] = scale *
-                                   inputData[n * cols * rows * depth +
-                                             c * cols * rows +
-                                             h * cols +
-                                             w];
+
+                    output.Get(n, c, h, w) = scale * input.Get(n, c, h, w);
                 }
             }
         }
@@ -146,7 +149,6 @@
     float*       outputData = GetOutputTensorDataFloat(0, m_Data);
     const float* inputData = GetInputTensorDataFloat(0, m_Data);
 
-
     if (NormalizationAlgorithmMethod::LocalBrightness == m_Data.m_Parameters.m_NormMethodType)
     {
         if (NormalizationAlgorithmChannel::Within == m_Data.m_Parameters.m_NormChannelType)
@@ -167,7 +169,8 @@
                                    m_Data.m_Parameters.m_NormSize,
                                    m_Data.m_Parameters.m_Alpha,
                                    m_Data.m_Parameters.m_Beta,
-                                   m_Data.m_Parameters.m_K);
+                                   m_Data.m_Parameters.m_K,
+                                   m_Data.m_Parameters.m_DataLayout);
         }
         else
         {
diff --git a/src/backends/test/LayerTests.cpp b/src/backends/test/LayerTests.cpp
index 1faacac..726cb7d 100755
--- a/src/backends/test/LayerTests.cpp
+++ b/src/backends/test/LayerTests.cpp
@@ -694,7 +694,7 @@
 {
     auto normMethod = armnn::NormalizationAlgorithmMethod::LocalBrightness;
     auto normChannel = armnn::NormalizationAlgorithmChannel::Across;
-    return SimpleNormalizationNhwcClNeonTestImpl(workloadFactory, normChannel, normMethod);
+    return SimpleNormalizationNhwcTestImpl(workloadFactory, normChannel, normMethod);
 }
 
 LayerTestResult<float,2> SimpleSoftmaxTest(armnn::IWorkloadFactory& workloadFactory, float beta)
diff --git a/src/backends/test/NormTestImpl.hpp b/src/backends/test/NormTestImpl.hpp
index 300eece..f4e6aea 100644
--- a/src/backends/test/NormTestImpl.hpp
+++ b/src/backends/test/NormTestImpl.hpp
@@ -152,11 +152,9 @@
     return ret;
 }
 
-// This is test implementation for CL and NEON,
-// as currently, only Across Normalization is supported on CL and NEON for NHWC.
-LayerTestResult<float,4> SimpleNormalizationNhwcClNeonTestImpl(armnn::IWorkloadFactory& workloadFactory,
-                                                               armnn::NormalizationAlgorithmChannel normChannel,
-                                                               armnn::NormalizationAlgorithmMethod normMethod)
+LayerTestResult<float,4> SimpleNormalizationNhwcTestImpl(armnn::IWorkloadFactory& workloadFactory,
+                                                         armnn::NormalizationAlgorithmChannel normChannel,
+                                                         armnn::NormalizationAlgorithmMethod normMethod)
 {
     const unsigned int inputHeight = 2;
     const unsigned int inputWidth = 2;