IVGCVSW-1863 Support NHWC for L2Normalization

 * Added L2NormalizationDescriptor struct with m_DataLyaout member
 * Updated all IsL2NormalizationSupported calls to take a descriptor
   as an argument
 * Updated L2NormalizationLayer to take a descriptor as an argument

!android-nn-driver:150116

Change-Id: I0459352d19cfd269bc864a70cf73910bf44fdc01
diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
index bc1b59b..30c8144 100644
--- a/include/armnn/Descriptors.hpp
+++ b/include/armnn/Descriptors.hpp
@@ -274,6 +274,15 @@
     DataLayout                    m_DataLayout;
 };
 
+struct L2NormalizationDescriptor
+{
+    L2NormalizationDescriptor()
+        : m_DataLayout(DataLayout::NCHW)
+    {}
+
+    DataLayout m_DataLayout;
+};
+
 struct BatchNormalizationDescriptor
 {
     BatchNormalizationDescriptor()
diff --git a/include/armnn/DescriptorsFwd.hpp b/include/armnn/DescriptorsFwd.hpp
index 9cb3463..739c120 100644
--- a/include/armnn/DescriptorsFwd.hpp
+++ b/include/armnn/DescriptorsFwd.hpp
@@ -15,6 +15,7 @@
 struct LstmDescriptor;
 struct PermuteDescriptor;
 struct NormalizationDescriptor;
+struct L2NormalizationDescriptor;
 struct MeanDescriptor;
 struct PadDescriptor;
 struct Pooling2dDescriptor;
diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
index 1d2679a..2c83909 100644
--- a/include/armnn/INetwork.hpp
+++ b/include/armnn/INetwork.hpp
@@ -228,9 +228,11 @@
 
     /// Adds an L2 normalization layer to the network.
     /// Normalization is performed along dimension 1, but requires a 4d input.
+    /// @param desc - Parameters for the L2 normalization operation.
     /// @param name - Optional name for the layer.
     /// @return - Interface for configuring the layer.
-    virtual IConnectableLayer* AddL2NormalizationLayer(const char* name = nullptr) = 0;
+    virtual IConnectableLayer* AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
+                                                       const char* name = nullptr) = 0;
 
     /// Adds a layer with no inputs and a single output, which always corresponds to
     /// the passed in constant tensor.
diff --git a/include/armnn/LayerSupport.hpp b/include/armnn/LayerSupport.hpp
index 3c7cce5..25e888e 100644
--- a/include/armnn/LayerSupport.hpp
+++ b/include/armnn/LayerSupport.hpp
@@ -104,6 +104,7 @@
 bool IsL2NormalizationSupported(Compute compute,
                                 const TensorInfo& input,
                                 const TensorInfo& output,
+                                const L2NormalizationDescriptor& descriptor,
                                 char* reasonIfUnsupported = nullptr,
                                 size_t reasonIfUnsupportedMaxLength = 1024);
 
diff --git a/src/armnn/LayerSupport.cpp b/src/armnn/LayerSupport.cpp
index 6e7e3e1..74243df 100644
--- a/src/armnn/LayerSupport.cpp
+++ b/src/armnn/LayerSupport.cpp
@@ -196,10 +196,11 @@
 bool IsL2NormalizationSupported(Compute compute,
                                 const TensorInfo& input,
                                 const TensorInfo& output,
+                                const L2NormalizationDescriptor& descriptor,
                                 char* reasonIfUnsupported,
                                 size_t reasonIfUnsupportedMaxLength)
 {
-    FORWARD_LAYER_SUPPORT_FUNC(compute, IsL2NormalizationSupported, input, output);
+    FORWARD_LAYER_SUPPORT_FUNC(compute, IsL2NormalizationSupported, input, output, descriptor);
 }
 
 bool IsLstmSupported(Compute compute, const TensorInfo& input, const TensorInfo& outputStateIn,
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 4f5e297..49e60e1 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -468,9 +468,10 @@
     return m_Graph->AddLayer<ResizeBilinearLayer>(resizeDescriptor,name);
 }
 
-IConnectableLayer* Network::AddL2NormalizationLayer(const char* name)
+IConnectableLayer* Network::AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
+                                                    const char* name)
 {
-    return m_Graph->AddLayer<L2NormalizationLayer>(name);
+    return m_Graph->AddLayer<L2NormalizationLayer>(desc, name);
 }
 
 IConnectableLayer* Network::AddConstantLayer(const ConstTensor& input, const char* name)
diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
index ea4284e..129513d 100644
--- a/src/armnn/Network.hpp
+++ b/src/armnn/Network.hpp
@@ -98,7 +98,8 @@
     IConnectableLayer* AddResizeBilinearLayer(const ResizeBilinearDescriptor& resizeDesc,
                                               const char* name = nullptr) override;
 
-    IConnectableLayer* AddL2NormalizationLayer(const char* name = nullptr) override;
+    IConnectableLayer* AddL2NormalizationLayer(const L2NormalizationDescriptor& desc,
+                                               const char* name = nullptr) override;
 
     IConnectableLayer* AddConstantLayer(const ConstTensor& input, const char* name = nullptr) override;
 
diff --git a/src/armnn/layers/L2NormalizationLayer.cpp b/src/armnn/layers/L2NormalizationLayer.cpp
index c114a80..683c7db 100644
--- a/src/armnn/layers/L2NormalizationLayer.cpp
+++ b/src/armnn/layers/L2NormalizationLayer.cpp
@@ -13,8 +13,8 @@
 namespace armnn
 {
 
-L2NormalizationLayer::L2NormalizationLayer(const char* name)
-    : Layer(1, 1, LayerType::L2Normalization, name)
+L2NormalizationLayer::L2NormalizationLayer(const L2NormalizationDescriptor& param, const char* name)
+    : LayerWithParameters(1, 1, LayerType::L2Normalization, param, name)
 {
 }
 
@@ -27,7 +27,7 @@
 
 L2NormalizationLayer* L2NormalizationLayer::Clone(Graph& graph) const
 {
-    return CloneBase<L2NormalizationLayer>(graph, GetName());
+    return CloneBase<L2NormalizationLayer>(graph, m_Param, GetName());
 }
 
 void L2NormalizationLayer::ValidateTensorShapesFromInputs()
diff --git a/src/armnn/layers/L2NormalizationLayer.hpp b/src/armnn/layers/L2NormalizationLayer.hpp
index fdf46eb..bf4d49e 100644
--- a/src/armnn/layers/L2NormalizationLayer.hpp
+++ b/src/armnn/layers/L2NormalizationLayer.hpp
@@ -4,12 +4,12 @@
 //
 #pragma once
 
-#include <Layer.hpp>
+#include "LayerWithParameters.hpp"
 
 namespace armnn
 {
 
-class L2NormalizationLayer : public Layer
+class L2NormalizationLayer : public LayerWithParameters<L2NormalizationDescriptor>
 {
 public:
     virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph& graph,
@@ -20,7 +20,7 @@
     void ValidateTensorShapesFromInputs() override;
 
 protected:
-    L2NormalizationLayer(const char* name);
+    L2NormalizationLayer(const L2NormalizationDescriptor& param, const char* name);
     ~L2NormalizationLayer() = default;
 };
 
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index 52f0673..61f9f1c 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -836,10 +836,13 @@
 
 template <typename L2NormalizationWorkload, armnn::DataType DataType>
 std::unique_ptr<L2NormalizationWorkload> CreateL2NormalizationWorkloadTest(armnn::IWorkloadFactory& factory,
-    armnn::Graph& graph)
+    armnn::Graph& graph, DataLayout dataLayout = DataLayout::NCHW)
 {
     // Creates the layer we're testing.
-    Layer* const layer = graph.AddLayer<L2NormalizationLayer>("l2norm");
+    L2NormalizationDescriptor layerDesc;
+    layerDesc.m_DataLayout = dataLayout;
+
+    Layer* const layer = graph.AddLayer<L2NormalizationLayer>(layerDesc, "l2norm");
 
     // Creates extra layers.
     Layer* const input = graph.AddLayer<InputLayer>(0, "input");
@@ -856,6 +859,7 @@
     auto workload = MakeAndCheckWorkload<L2NormalizationWorkload>(*layer, graph, factory);
 
     L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
+    BOOST_TEST((queueDescriptor.m_Parameters.m_DataLayout == dataLayout));
     BOOST_TEST(queueDescriptor.m_Inputs.size() == 1);
     BOOST_TEST(queueDescriptor.m_Outputs.size() == 1);
 
diff --git a/src/backends/ClLayerSupport.cpp b/src/backends/ClLayerSupport.cpp
index 8c9ba6e..c003d55 100644
--- a/src/backends/ClLayerSupport.cpp
+++ b/src/backends/ClLayerSupport.cpp
@@ -290,9 +290,10 @@
 
 bool IsL2NormalizationSupportedCl(const TensorInfo& input,
                                   const TensorInfo& output,
+                                  const L2NormalizationDescriptor& descriptor,
                                   std::string* reasonIfUnsupported)
 {
-    FORWARD_WORKLOAD_VALIDATE_FUNC(ClL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output);
+    FORWARD_WORKLOAD_VALIDATE_FUNC(ClL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
 }
 
 bool IsMergerSupportedCl(const std::vector<const TensorInfo*> inputs,
diff --git a/src/backends/ClLayerSupport.hpp b/src/backends/ClLayerSupport.hpp
index 69c9b64..700d718 100644
--- a/src/backends/ClLayerSupport.hpp
+++ b/src/backends/ClLayerSupport.hpp
@@ -76,6 +76,7 @@
 
 bool IsL2NormalizationSupportedCl(const TensorInfo& input,
                                   const TensorInfo& output,
+                                  const L2NormalizationDescriptor& descriptor,
                                   std::string* reasonIfUnsupported = nullptr);
 
 bool IsLstmSupportedCl(const TensorInfo& input, const TensorInfo& outputStateIn,
diff --git a/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp
index 8f5ed5f..0ca3343 100644
--- a/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp
+++ b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp
@@ -15,10 +15,11 @@
 using namespace armcomputetensorutils;
 
 arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input,
-                                                      const TensorInfo& output)
+                                                      const TensorInfo& output,
+                                                      const L2NormalizationDescriptor& descriptor)
 {
-    const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
-    const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
+    const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+    const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     arm_compute::NormalizationLayerInfo normalizationInfo =
             CreateAclNormalizationLayerInfoForL2Normalization(input);
diff --git a/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp
index f3f7de1..20c0426 100644
--- a/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp
+++ b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp
@@ -13,7 +13,8 @@
 {
 
 arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input,
-                                                      const TensorInfo& output);
+                                                      const TensorInfo& output,
+                                                      const L2NormalizationDescriptor& descriptor);
 
 class ClL2NormalizationFloatWorkload : public FloatWorkload<L2NormalizationQueueDescriptor>
 {
diff --git a/src/backends/WorkloadData.hpp b/src/backends/WorkloadData.hpp
index ff5916e..9fcc044 100644
--- a/src/backends/WorkloadData.hpp
+++ b/src/backends/WorkloadData.hpp
@@ -252,7 +252,7 @@
     void Validate(const WorkloadInfo& workloadInfo) const;
 };
 
-struct L2NormalizationQueueDescriptor : QueueDescriptor
+struct L2NormalizationQueueDescriptor : QueueDescriptorWithParameters<L2NormalizationDescriptor>
 {
     void Validate(const WorkloadInfo& workloadInfo) const;
 };
diff --git a/src/backends/WorkloadFactory.cpp b/src/backends/WorkloadFactory.cpp
index d23ac83..aaccabd 100644
--- a/src/backends/WorkloadFactory.cpp
+++ b/src/backends/WorkloadFactory.cpp
@@ -274,10 +274,18 @@
         }
         case LayerType::L2Normalization:
         {
+            auto cLayer = boost::polymorphic_downcast<const L2NormalizationLayer*>(&layer);
+            const L2NormalizationDescriptor& descriptor = cLayer->GetParameters();
+
             const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
-            result = IsL2NormalizationSupported(compute, OverrideDataType(input, dataType),
-                    OverrideDataType(output, dataType), reason, reasonCapacity);
+
+            result = IsL2NormalizationSupported(compute,
+                                                OverrideDataType(input, dataType),
+                                                OverrideDataType(output, dataType),
+                                                descriptor,
+                                                reason,
+                                                reasonCapacity);
             break;
         }
         case LayerType::Lstm:
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index dfaea5c..a79f4c0 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -279,9 +279,10 @@
 
 bool IsL2NormalizationSupportedNeon(const TensorInfo& input,
                                     const TensorInfo& output,
+                                    const L2NormalizationDescriptor& descriptor,
                                     std::string* reasonIfUnsupported)
 {
-    FORWARD_WORKLOAD_VALIDATE_FUNC(NeonL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output);
+    FORWARD_WORKLOAD_VALIDATE_FUNC(NeonL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
 }
 
 bool IsMergerSupportedNeon(const std::vector<const TensorInfo*> inputs,
diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp
index 95b14b3..419c226 100644
--- a/src/backends/neon/NeonLayerSupport.hpp
+++ b/src/backends/neon/NeonLayerSupport.hpp
@@ -81,6 +81,7 @@
 
 bool IsL2NormalizationSupportedNeon(const TensorInfo& input,
                                     const TensorInfo& output,
+                                    const L2NormalizationDescriptor& descriptor,
                                     std::string* reasonIfUnsupported = nullptr);
 
 bool IsMergerSupportedNeon(const std::vector<const TensorInfo*> inputs,
diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
index dee789a..4bddd9a 100644
--- a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
@@ -8,12 +8,14 @@
 
 namespace armnn
 {
+using namespace armcomputetensorutils;
 
 arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input,
-                                                        const TensorInfo& output)
+                                                        const TensorInfo& output,
+                                                        const L2NormalizationDescriptor& descriptor)
 {
-    const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
-    const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+    const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+    const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
 
     arm_compute::NormalizationLayerInfo normalizationInfo =
             CreateAclNormalizationLayerInfoForL2Normalization(input);
diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp
index c1221fb..70ab385 100644
--- a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp
+++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp
@@ -14,7 +14,8 @@
 {
 
 arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input,
-                                                        const TensorInfo& output);
+                                                        const TensorInfo& output,
+                                                        const L2NormalizationDescriptor& descriptor);
 
 class NeonL2NormalizationFloatWorkload : public FloatWorkload<L2NormalizationQueueDescriptor>
 {
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index 12a2817..536dd17 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -171,9 +171,11 @@
 
 bool IsL2NormalizationSupportedRef(const TensorInfo& input,
                                    const TensorInfo& output,
+                                   const L2NormalizationDescriptor& descriptor,
                                    std::string* reasonIfUnsupported)
 {
     ignore_unused(output);
+    ignore_unused(descriptor);
     return IsSupportedForDataTypeRef(reasonIfUnsupported,
                                      input.GetDataType(),
                                      &TrueFunc<>,
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index ff2e7e3..d29e956 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -73,6 +73,7 @@
 
 bool IsL2NormalizationSupportedRef(const TensorInfo& input,
                                    const TensorInfo& output,
+                                   const L2NormalizationDescriptor& descriptor,
                                    std::string* reasonIfUnsupported = nullptr);
 
 bool IsLstmSupportedRef(const TensorInfo& input, const TensorInfo& outputStateIn,
diff --git a/src/backends/test/CreateWorkloadCl.cpp b/src/backends/test/CreateWorkloadCl.cpp
index 39bc259..cc0e12d 100644
--- a/src/backends/test/CreateWorkloadCl.cpp
+++ b/src/backends/test/CreateWorkloadCl.cpp
@@ -524,13 +524,14 @@
     CreateMemCopyWorkloads<IClTensorHandle>(factory);
 }
 
-BOOST_AUTO_TEST_CASE(CreateL2NormalizationWorkload)
+template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
+static void ClL2NormalizationWorkloadTest(DataLayout dataLayout)
 {
     Graph graph;
     ClWorkloadFactory factory;
 
-    auto workload = CreateL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>
-        (factory, graph);
+    auto workload = CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType, DataType>
+        (factory, graph, dataLayout);
 
     // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
     L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
@@ -541,6 +542,26 @@
     BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 5, 20, 50, 67 }));
 }
 
+BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNchwWorkload)
+{
+    ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
+}
+
+BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloatNhwcWorkload)
+{
+    ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
+}
+
+BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
+{
+    ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NCHW);
+}
+
+BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
+{
+    ClL2NormalizationWorkloadTest<ClL2NormalizationFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
+}
+
 template <typename LstmWorkloadType>
 static void ClCreateLstmWorkloadTest()
 {
diff --git a/src/backends/test/CreateWorkloadNeon.cpp b/src/backends/test/CreateWorkloadNeon.cpp
index 36ad9a4..e9fcb56 100644
--- a/src/backends/test/CreateWorkloadNeon.cpp
+++ b/src/backends/test/CreateWorkloadNeon.cpp
@@ -452,4 +452,42 @@
     CreateMemCopyWorkloads<INeonTensorHandle>(factory);
 }
 
+template <typename L2NormalizationWorkloadType, typename armnn::DataType DataType>
+static void NeonCreateL2NormalizationWorkloadTest(DataLayout dataLayout)
+{
+    Graph                graph;
+    NeonWorkloadFactory  factory;
+    auto                 workload = CreateL2NormalizationWorkloadTest<L2NormalizationWorkloadType,
+                                    DataType>(factory, graph, dataLayout);
+
+    // Checks that inputs/outputs are as we expect them (see definition of CreateNormalizationWorkloadTest).
+    L2NormalizationQueueDescriptor queueDescriptor = workload->GetData();
+    auto inputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto outputHandle = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo({ 5, 20, 50, 67 }, DataType)));
+    BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo({ 5, 20, 50, 67 }, DataType)));
+}
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NchwWorkload)
+{
+    NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NCHW);
+}
+
+BOOST_AUTO_TEST_CASE(CreateL2NormalizationFloat16NhwcWorkload)
+{
+    NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float16>(DataLayout::NHWC);
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(CreateL2NormalizationNchwWorkload)
+{
+    NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NCHW);
+}
+
+BOOST_AUTO_TEST_CASE(CreateL2NormalizationNhwcWorkload)
+{
+    NeonCreateL2NormalizationWorkloadTest<NeonL2NormalizationFloatWorkload, DataType::Float32>(DataLayout::NHWC);
+}
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/test/IsLayerSupportedTestImpl.hpp b/src/backends/test/IsLayerSupportedTestImpl.hpp
index e166513..0f31c8e 100644
--- a/src/backends/test/IsLayerSupportedTestImpl.hpp
+++ b/src/backends/test/IsLayerSupportedTestImpl.hpp
@@ -324,7 +324,7 @@
 
 DECLARE_LAYER_POLICY_CUSTOM_PARAM(Input, armnn::LayerBindingId)
 
-DECLARE_LAYER_POLICY_1_PARAM(L2Normalization)
+DECLARE_LAYER_POLICY_2_PARAM(L2Normalization)
 
 DECLARE_LAYER_POLICY_2_PARAM(Lstm)