IVGCVSW-3537 Add support for L2 Normalization with < 4 dimensional tensors

* Fix reference L2 Normalization workload to support < 4 dimensional tensors
* Add unit test for L2 Normalization with 2d tensor to Reference, Neon and CL test suites
* Fix typo in StackLayer

Signed-off-by: Matthew Jackson <matthew.jackson@arm.com>
Change-Id: I48a6a1289bcb02955b24f261bc70b467bd1abc23
diff --git a/src/armnn/layers/StackLayer.cpp b/src/armnn/layers/StackLayer.cpp
index 59bc8d5..7f1dbec 100644
--- a/src/armnn/layers/StackLayer.cpp
+++ b/src/armnn/layers/StackLayer.cpp
@@ -73,7 +73,7 @@
         TensorShape inputShape = GetInputSlot(i).GetConnection()->GetTensorInfo().GetShape();
         if (inputShape != m_Param.m_InputShape)
         {
-            throw LayerValidationException("ConcatLayer: TensorShape set on InputSlot[" +
+            throw LayerValidationException("StackLayer: TensorShape set on InputSlot[" +
                                            std::to_string(i) +
                                            "] does not match defined input shape");
         }
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index 3d4e27c..2000ce4 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -1120,8 +1120,10 @@
     const TensorInfo& inputTensorInfo  = workloadInfo.m_InputTensorInfos[0];
     const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0];
 
-    ValidateTensorNumDimensions(inputTensorInfo,  descriptorName, 4, "input");
-    ValidateTensorNumDimensions(outputTensorInfo, descriptorName, 4, "output");
+    if (inputTensorInfo.GetNumDimensions() > 4)
+    {
+        throw InvalidArgumentException(descriptorName + ": Input tensors with rank greater than 4 are not supported.");
+    }
 
     ValidateTensorShapesMatch(inputTensorInfo, outputTensorInfo, descriptorName, "input", "output");
 
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
index f996eda..4606380 100644
--- a/src/backends/backendsCommon/test/LayerTests.cpp
+++ b/src/backends/backendsCommon/test/LayerTests.cpp
@@ -7179,6 +7179,71 @@
                                                                          1.f/128, 128, layout);
 }
 
+LayerTestResult<float, 2> L2Normalization2dShapeTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    const armnn::DataLayout layout = armnn::DataLayout::NHWC;
+    const armnn::TensorShape inputOutputTensorShape = armnn::TensorShape({ 5, 2 });
+
+    std::vector<float> inputData
+    {
+        1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f
+    };
+    std::vector<float> expectedOutputData
+    {
+        1.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
+        2.0f * CalcInvL2Norm({ 1.0f,  2.0f }),
+        3.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
+        4.0f * CalcInvL2Norm({ 3.0f,  4.0f }),
+        5.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
+        6.0f * CalcInvL2Norm({ 5.0f,  6.0f }),
+        7.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
+        8.0f * CalcInvL2Norm({ 7.0f,  8.0f }),
+        9.0f  * CalcInvL2Norm({ 9.0f, 10.0f }),
+        10.0f * CalcInvL2Norm({ 9.0f, 10.0f })
+    };
+
+    const armnn::TensorInfo inputTensorInfo(inputOutputTensorShape, armnn::DataType::Float32, 0.f, 0);
+    const armnn::TensorInfo outputTensorInfo(inputOutputTensorShape, armnn::DataType::Float32, 0.f, 0);
+
+    auto inputTensor = MakeTensor<float, 2>(inputTensorInfo, QuantizedVector<float>(
+                                                             inputTensorInfo.GetQuantizationScale(),
+                                                             inputTensorInfo.GetQuantizationOffset(),
+                                                             inputData));
+
+    LayerTestResult<float, 2> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<float, 2>(outputTensorInfo, QuantizedVector<float>(
+                                                                   outputTensorInfo.GetQuantizationScale(),
+                                                                   outputTensorInfo.GetQuantizationOffset(),
+                                                                   expectedOutputData));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::L2NormalizationQueueDescriptor descriptor;
+    descriptor.m_Parameters.m_Eps = 1e-12f;
+    descriptor.m_Parameters.m_DataLayout = layout;
+    armnn::WorkloadInfo info;
+
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateL2Normalization(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
+
+    workload->PostAllocationConfigure();
+    ExecuteWorkload(*workload, memoryManager);
+
+    CopyDataFromITensorHandle(&result.output[0][0], outputHandle.get());
+
+    return result;
+}
+
 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
 LayerTestResult<T, 4> L2Normalization3dTestCommon(
     armnn::IWorkloadFactory& workloadFactory,
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index 913c3a6..fb7ce92 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -1059,6 +1059,10 @@
         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
         const armnn::DataLayout layout);
 
+LayerTestResult<float, 2> L2Normalization2dShapeTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
 LayerTestResult<float, 4> L2Normalization3dTest(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index 160b3a9..37af471 100644
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -302,6 +302,8 @@
 ARMNN_AUTO_TEST_CASE(L2Normalization3dNhwc, L2Normalization3dTest, armnn::DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE(L2Normalization4dNhwc, L2Normalization4dTest, armnn::DataLayout::NHWC)
 
+ARMNN_AUTO_TEST_CASE(L2Normalization2dShape, L2Normalization2dShapeTest);
+
 ARMNN_AUTO_TEST_CASE(L2NormalizationDefaultEpsilon, L2NormalizationDefaultEpsilonTest, armnn::DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE(L2NormalizationNonDefaultEpsilon, L2NormalizationNonDefaultEpsilonTest, armnn::DataLayout::NCHW)
 
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index d551431..9f7413c 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -473,6 +473,8 @@
 ARMNN_AUTO_TEST_CASE(L2Normalization3dNhwc, L2Normalization3dTest, armnn::DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE(L2Normalization4dNhwc, L2Normalization4dTest, armnn::DataLayout::NHWC)
 
+ARMNN_AUTO_TEST_CASE(L2Normalization2dShape, L2Normalization2dShapeTest);
+
 ARMNN_AUTO_TEST_CASE(L2NormalizationDefaultEpsilon, L2NormalizationDefaultEpsilonTest, armnn::DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE(L2NormalizationNonDefaultEpsilon, L2NormalizationNonDefaultEpsilonTest, armnn::DataLayout::NCHW)
 
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index 5cb8042..f7fb78a 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -795,6 +795,8 @@
 ARMNN_AUTO_TEST_CASE(L2Normalization3dUint8Nhwc, L2Normalization3dUint8Test, armnn::DataLayout::NHWC)
 ARMNN_AUTO_TEST_CASE(L2Normalization4dUint8Nhwc, L2Normalization4dUint8Test, armnn::DataLayout::NHWC)
 
+ARMNN_AUTO_TEST_CASE(L2Normalization2dShape, L2Normalization2dShapeTest);
+
 ARMNN_AUTO_TEST_CASE(L2NormalizationDefaultEpsilon, L2NormalizationDefaultEpsilonTest, armnn::DataLayout::NCHW)
 ARMNN_AUTO_TEST_CASE(L2NormalizationNonDefaultEpsilon, L2NormalizationNonDefaultEpsilonTest, armnn::DataLayout::NCHW)
 
diff --git a/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp b/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp
index 3b2ab50..3764b9a 100644
--- a/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp
+++ b/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp
@@ -10,9 +10,10 @@
 #include "Encoders.hpp"
 #include "DataLayoutIndexed.hpp"
 
-
 #include "Profiling.hpp"
 
+#include <boost/numeric/conversion/cast.hpp>
+
 #include <cmath>
 
 using namespace armnnUtils;
@@ -36,10 +37,32 @@
 
         DataLayoutIndexed dataLayout(m_Data.m_Parameters.m_DataLayout);
 
-        const unsigned int batches  = inputInfo.GetShape()[0];
-        const unsigned int channels = inputInfo.GetShape()[dataLayout.GetChannelsIndex()];
-        const unsigned int height   = inputInfo.GetShape()[dataLayout.GetHeightIndex()];
-        const unsigned int width    = inputInfo.GetShape()[dataLayout.GetWidthIndex()];
+        const TensorShape& shape = inputInfo.GetShape();
+        unsigned int paddedShapeArray[4];
+        const int idxShift = 4 - boost::numeric_cast<int>(shape.GetNumDimensions());
+
+        const unsigned int batches = (idxShift == 0) ? shape[0] : 1;
+        paddedShapeArray[0] = batches;
+
+        const int channelsIdx = boost::numeric_cast<int>(dataLayout.GetChannelsIndex());
+        const unsigned int channels = (channelsIdx - idxShift >= 0)
+                                      ? shape[boost::numeric_cast<unsigned int>(channelsIdx - idxShift)]
+                                      : 1;
+        paddedShapeArray[channelsIdx] = channels;
+
+        const int heightIdx = boost::numeric_cast<int>(dataLayout.GetHeightIndex());
+        const unsigned int height = (heightIdx - idxShift >= 0)
+                                    ? shape[boost::numeric_cast<unsigned int>(heightIdx - idxShift)]
+                                    : 1;
+        paddedShapeArray[heightIdx] = height;
+
+        const int widthIdx = boost::numeric_cast<int>(dataLayout.GetWidthIndex());
+        const unsigned int width = (widthIdx - idxShift >= 0)
+                                   ? shape[boost::numeric_cast<unsigned int>(widthIdx - idxShift)]
+                                   : 1;
+        paddedShapeArray[widthIdx] = width;
+
+        const TensorShape& paddedShape = TensorShape(4, paddedShapeArray);
 
         for (unsigned int n = 0; n < batches; ++n)
         {
@@ -52,14 +75,14 @@
                         float reduction = 0.0;
                         for (unsigned int d = 0; d < channels; ++d)
                         {
-                            unsigned int inputIndex = dataLayout.GetIndex(inputInfo.GetShape(), n, d, h, w);
+                            unsigned int inputIndex = dataLayout.GetIndex(paddedShape, n, d, h, w);
 
                             (*inputDecoder)[inputIndex];
                             const float value = inputDecoder->Get();
                             reduction += value * value;
                         }
 
-                        unsigned int index = dataLayout.GetIndex(inputInfo.GetShape(), n, c, h, w);
+                        unsigned int index = dataLayout.GetIndex(paddedShape, n, c, h, w);
 
                         float maximum = reduction < m_Data.m_Parameters.m_Eps ? m_Data.m_Parameters.m_Eps : reduction;