IVGCVSW-3226 Refactor the reference normalization workload

 * Refactored RefNormalizationFloat32Workload into RefNormalizationWorkload
 * Added ref support of Uint8 norm workloads
 * Added workload unit tests for Uint8

Change-Id: I063ce919c267e02a32e739848e49d75fd98a5eb6
Signed-off-by: Matteo Martincigh <matteo.martincigh@arm.com>
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index a95abf1..9482136 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -608,6 +608,23 @@
 {
     ValidateNumInputs(workloadInfo, "NormalizationQueueDescriptor", 1);
     ValidateNumOutputs(workloadInfo, "NormalizationQueueDescriptor", 1);
+
+    // Check the supported data types
+    std::vector<DataType> supportedTypes =
+    {
+        DataType::Float16,
+        DataType::Float32,
+        DataType::QuantisedAsymm8
+    };
+
+    ValidateDataTypes(workloadInfo.m_InputTensorInfos[0],
+                      supportedTypes,
+                      "NormalizationQueueDescriptor");
+
+    ValidateDataTypes(workloadInfo.m_OutputTensorInfos[0],
+                      { workloadInfo.m_InputTensorInfos[0].GetDataType() },
+                      "NormalizationQueueDescriptor");
+
     ValidateTensorShapesMatch(workloadInfo.m_InputTensorInfos[0],
                               workloadInfo.m_OutputTensorInfos[0],
                               "NormalizationQueueDescriptor",
diff --git a/src/backends/backendsCommon/test/WorkloadDataValidation.cpp b/src/backends/backendsCommon/test/WorkloadDataValidation.cpp
index a2e049d..7c7af2d 100644
--- a/src/backends/backendsCommon/test/WorkloadDataValidation.cpp
+++ b/src/backends/backendsCommon/test/WorkloadDataValidation.cpp
@@ -173,7 +173,7 @@
     invalidData.m_Parameters.m_K               = kappa;
 
     //Invalid argument exception is expected, because input height != output height.
-    BOOST_CHECK_THROW(RefNormalizationFloat32Workload(invalidData, invalidInfo), armnn::InvalidArgumentException);
+    BOOST_CHECK_THROW(RefNormalizationWorkload(invalidData, invalidInfo), armnn::InvalidArgumentException);
 }
 
 BOOST_AUTO_TEST_CASE(SplitterQueueDescriptor_Validate_WrongWindow)
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index f177385..6053608 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -959,12 +959,29 @@
                                                const NormalizationDescriptor& descriptor,
                                                Optional<std::string&> reasonIfUnsupported) const
 {
-    ignore_unused(output);
     ignore_unused(descriptor);
-    return IsSupportedForDataTypeRef(reasonIfUnsupported,
-                                     input.GetDataType(),
-                                     &TrueFunc<>,
-                                     &FalseFuncU8<>);
+
+    // Define supported types
+    std::array<DataType, 3> supportedTypes =
+    {
+        DataType::Float16,
+        DataType::Float32,
+        DataType::QuantisedAsymm8
+    };
+
+    bool supported = true;
+
+    supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported,
+                                  "Reference normalization: input type not supported.");
+
+    supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
+                                  "Reference normalization: output type not supported.");
+
+    supported &= CheckSupportRule(ShapesAreSameTotalSize(input, output), reasonIfUnsupported,
+                                  "Reference normalization: input and output shapes have different "
+                                  "num total elements.");
+
+    return supported;
 }
 
 bool RefLayerSupport::IsOutputSupported(const TensorInfo& output,
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 7613902..319a620 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -189,7 +189,7 @@
 std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateNormalization(
     const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
 {
-    return MakeWorkload<RefNormalizationFloat32Workload, NullWorkload>(descriptor, info);
+    return std::make_unique<RefNormalizationWorkload>(descriptor, info);
 }
 
 std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor,
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index 6f95113..2822c30 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -47,7 +47,7 @@
         workloads/RefLstmWorkload.cpp \
         workloads/RefMeanFloat32Workload.cpp \
         workloads/RefMeanUint8Workload.cpp \
-        workloads/RefNormalizationFloat32Workload.cpp \
+        workloads/RefNormalizationWorkload.cpp \
         workloads/RefPadWorkload.cpp \
         workloads/RefPermuteWorkload.cpp \
         workloads/RefPooling2dWorkload.cpp \
diff --git a/src/backends/reference/test/RefCreateWorkloadTests.cpp b/src/backends/reference/test/RefCreateWorkloadTests.cpp
index 8216ed5..3da9de9 100644
--- a/src/backends/reference/test/RefCreateWorkloadTests.cpp
+++ b/src/backends/reference/test/RefCreateWorkloadTests.cpp
@@ -372,14 +372,24 @@
     CheckInputOutput(std::move(workload), TensorInfo(inputShape, DataType), TensorInfo(outputShape, DataType));
 }
 
-BOOST_AUTO_TEST_CASE(CreateRefNormalizationNchwWorkload)
+BOOST_AUTO_TEST_CASE(CreateRefNormalizationFloat32NchwWorkload)
 {
-    RefCreateNormalizationWorkloadTest<RefNormalizationFloat32Workload, armnn::DataType::Float32>(DataLayout::NCHW);
+    RefCreateNormalizationWorkloadTest<RefNormalizationWorkload, armnn::DataType::Float32>(DataLayout::NCHW);
 }
 
-BOOST_AUTO_TEST_CASE(CreateRefNormalizationNhwcWorkload)
+BOOST_AUTO_TEST_CASE(CreateRefNormalizationFloat32NhwcWorkload)
 {
-    RefCreateNormalizationWorkloadTest<RefNormalizationFloat32Workload, armnn::DataType::Float32>(DataLayout::NHWC);
+    RefCreateNormalizationWorkloadTest<RefNormalizationWorkload, armnn::DataType::Float32>(DataLayout::NHWC);
+}
+
+BOOST_AUTO_TEST_CASE(CreateRefNormalizationUint8NchwWorkload)
+{
+    RefCreateNormalizationWorkloadTest<RefNormalizationWorkload, armnn::DataType::QuantisedAsymm8>(DataLayout::NCHW);
+}
+
+BOOST_AUTO_TEST_CASE(CreateRefNormalizationUint8NhwcWorkload)
+{
+    RefCreateNormalizationWorkloadTest<RefNormalizationWorkload, armnn::DataType::QuantisedAsymm8>(DataLayout::NHWC);
 }
 
 template <typename Pooling2dWorkloadType, armnn::DataType DataType>
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 82502c5..9d5c444 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -76,8 +76,8 @@
     RefLstmWorkload.hpp
     RefConcatWorkload.cpp
     RefConcatWorkload.hpp
-    RefNormalizationFloat32Workload.cpp
-    RefNormalizationFloat32Workload.hpp
+    RefNormalizationWorkload.cpp
+    RefNormalizationWorkload.hpp
     RefPadWorkload.cpp
     RefPadWorkload.hpp
     RefPermuteWorkload.cpp
diff --git a/src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp b/src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp
deleted file mode 100644
index 9dff187..0000000
--- a/src/backends/reference/workloads/RefNormalizationFloat32Workload.hpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include <backendsCommon/Workload.hpp>
-#include <backendsCommon/WorkloadData.hpp>
-
-namespace armnn
-{
-
-class RefNormalizationFloat32Workload : public Float32Workload<NormalizationQueueDescriptor>
-{
-public:
-    using Float32Workload<NormalizationQueueDescriptor>::Float32Workload;
-    virtual void Execute() const override;
-};
-
-} //namespace armnn
diff --git a/src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp b/src/backends/reference/workloads/RefNormalizationWorkload.cpp
similarity index 62%
rename from src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp
rename to src/backends/reference/workloads/RefNormalizationWorkload.cpp
index 3a2f2b9..8ff2d9c 100644
--- a/src/backends/reference/workloads/RefNormalizationFloat32Workload.cpp
+++ b/src/backends/reference/workloads/RefNormalizationWorkload.cpp
@@ -3,31 +3,34 @@
 // SPDX-License-Identifier: MIT
 //
 
-#include "RefNormalizationFloat32Workload.hpp"
+#include "RefNormalizationWorkload.hpp"
 
 #include "RefWorkloadUtils.hpp"
-#include "TensorBufferArrayView.hpp"
-
-#include "Profiling.hpp"
+#include "Decoders.hpp"
+#include "Encoders.hpp"
 
 #include <armnn/Tensor.hpp>
 
+#include <DataLayoutIndexed.hpp>
+#include <Profiling.hpp>
+
 #include <boost/log/trivial.hpp>
 #include <boost/numeric/conversion/cast.hpp>
 
+using namespace armnn;
 using namespace armnnUtils;
 
-namespace armnn
+namespace
 {
 
 // Helper function to compute "Within" normalization using Krichevsky 2012: Local Brightness Normalization.
-static void NormalizeWithinUingLbr(const float*       inputData,
-                                   float*             outputData,
-                                   const TensorShape& tensorShape,
-                                   uint32_t           norm_size,
-                                   float              alpha,
-                                   float              beta,
-                                   float              kappa)
+void NormalizeWithinUingLbr(Decoder<float>&    inputData,
+                            Encoder<float>&    outputData,
+                            const TensorShape& tensorShape,
+                            uint32_t           norm_size,
+                            float              alpha,
+                            float              beta,
+                            float              kappa)
 {
     const unsigned int batchSize = tensorShape[0];
     const unsigned int depth = tensorShape[1];
@@ -62,21 +65,24 @@
                                 continue;
                             }
 
-                            float inval = inputData[n * cols * rows * depth +
-                                                    c * cols * rows +
-                                                    boost::numeric_cast<unsigned int>(j) * cols +
-                                                    boost::numeric_cast<unsigned int>(i)];
+                            unsigned int inputIndex = n * cols * rows * depth +
+                                                      c * cols * rows +
+                                                      boost::numeric_cast<unsigned int>(j) * cols +
+                                                      boost::numeric_cast<unsigned int>(i);
+                            inputData[inputIndex];
+                            float inval = inputData.Get();
 
                             accumulated_scale += inval*inval;
                         }
                     }
-                    outputData[n * cols * rows * depth +
-                               c * cols * rows +
-                               h * cols +
-                               w] = inputData[n * cols * rows * depth +
-                                              c * cols * rows +
-                                              h * cols +
-                                              w] / (powf((kappa + (accumulated_scale * alpha)), beta));
+
+                    unsigned int index = n * cols * rows * depth +
+                                         c * cols * rows +
+                                         h * cols +
+                                         w;
+                    inputData[index];
+                    outputData[index];
+                    outputData.Set(inputData.Get() / (powf((kappa + (accumulated_scale * alpha)), beta)));
                 }
             }
         }
@@ -84,8 +90,8 @@
 }
 
 // Helper function to compute "Across" normalization using Krichevsky 2012: Local Brightness Normalization.
-void NormalizeAcrossUingLbr(const float*       inputData,
-                            float*             outputData,
+void NormalizeAcrossUingLbr(Decoder<float>&    inputData,
+                            Encoder<float>&    outputData,
                             const TensorShape& tensorShape,
                             uint32_t           norm_size,
                             float              alpha,
@@ -93,13 +99,6 @@
                             float              kappa,
                             DataLayout         dataLayout)
 {
-    TensorBufferArrayView<const float> input(tensorShape,
-                                             inputData,
-                                             dataLayout);
-    TensorBufferArrayView<float> output(tensorShape,
-                                        outputData,
-                                        dataLayout);
-
     DataLayoutIndexed dataLayoutIndexed(dataLayout);
 
     const unsigned int batchSize = tensorShape[0];
@@ -127,7 +126,14 @@
                             continue;
                         }
 
-                        float inval = input.Get(n, boost::numeric_cast<unsigned int>(k), h, w);
+                        unsigned inputIndex = dataLayoutIndexed.GetIndex(tensorShape,
+                                                                         n,
+                                                                         boost::numeric_cast<unsigned int>(k),
+                                                                         h,
+                                                                         w);
+
+                        inputData[inputIndex];
+                        float inval = inputData.Get();
 
                         accumulated_scale += inval * inval;
                     }
@@ -135,28 +141,42 @@
                     float scale = kappa + (accumulated_scale * alpha);
                     scale = powf(scale, -beta);
 
-                    output.Get(n, c, h, w) = scale * input.Get(n, c, h, w);
+                    unsigned index = dataLayoutIndexed.GetIndex(tensorShape, n, c, h, w);
+
+                    inputData[index];
+                    outputData[index];
+                    outputData.Set(scale * inputData.Get());
                 }
             }
         }
     }
 }
 
-void RefNormalizationFloat32Workload::Execute() const
+} // Anonymous namespace
+
+namespace armnn
 {
-    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefNormalizationFloat32Workload_Execute");
+
+RefNormalizationWorkload::RefNormalizationWorkload(const NormalizationQueueDescriptor& descriptor,
+                                                   const WorkloadInfo& info)
+    : BaseWorkload(descriptor, info)
+{}
+
+void RefNormalizationWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefNormalizationWorkload_Execute");
 
     const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
 
-    float*       outputData = GetOutputTensorDataFloat(0, m_Data);
-    const float* inputData = GetInputTensorDataFloat(0, m_Data);
+    auto inputDecoder  = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
+    auto outputEncoder = MakeEncoder<float>(inputInfo, m_Data.m_Outputs[0]->Map());
 
     if (NormalizationAlgorithmMethod::LocalBrightness == m_Data.m_Parameters.m_NormMethodType)
     {
         if (NormalizationAlgorithmChannel::Within == m_Data.m_Parameters.m_NormChannelType)
         {
-            NormalizeWithinUingLbr(inputData,
-                                   outputData,
+            NormalizeWithinUingLbr(*inputDecoder,
+                                   *outputEncoder,
                                    inputInfo.GetShape(),
                                    m_Data.m_Parameters.m_NormSize,
                                    m_Data.m_Parameters.m_Alpha,
@@ -165,8 +185,8 @@
         }
         else if (NormalizationAlgorithmChannel::Across == m_Data.m_Parameters.m_NormChannelType)
         {
-            NormalizeAcrossUingLbr(inputData,
-                                   outputData,
+            NormalizeAcrossUingLbr(*inputDecoder,
+                                   *outputEncoder,
                                    inputInfo.GetShape(),
                                    m_Data.m_Parameters.m_NormSize,
                                    m_Data.m_Parameters.m_Alpha,
@@ -187,4 +207,4 @@
     }
 }
 
-} //namespace armnn
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefNormalizationWorkload.hpp b/src/backends/reference/workloads/RefNormalizationWorkload.hpp
new file mode 100644
index 0000000..6d33c8a
--- /dev/null
+++ b/src/backends/reference/workloads/RefNormalizationWorkload.hpp
@@ -0,0 +1,23 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+
+namespace armnn
+{
+
+class RefNormalizationWorkload : public BaseWorkload<NormalizationQueueDescriptor>
+{
+public:
+    explicit RefNormalizationWorkload(const NormalizationQueueDescriptor& descriptor,
+                                      const WorkloadInfo& info);
+
+    virtual void Execute() const override;
+};
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index ce1e688..96f98ee 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -30,7 +30,7 @@
 #include "RefSoftmaxWorkload.hpp"
 #include "RefResizeBilinearFloat32Workload.hpp"
 #include "ResizeBilinear.hpp"
-#include "RefNormalizationFloat32Workload.hpp"
+#include "RefNormalizationWorkload.hpp"
 #include "RefDetectionPostProcessWorkload.hpp"
 #include "BatchNormImpl.hpp"
 #include "Activation.hpp"