IVGCVSW-3212 Refactor the Reference BatchNormalization workloads to
handle Float32 and QAsymm8 types

 * Removed the type-specific workload implementations
 * Added type-independent RefBatchNormalizationWorkload implementation
 * Reworked BachNormImpl to use decoders/encoders
 * Improved the validation of the BatchNorm queue descriptor
 * Fixed unit tests where necessary

Change-Id: Icf3fa1332292d38ec2fa0b1cb984cab78426034b
Signed-off-by: Matteo Martincigh <matteo.martincigh@arm.com>
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index edd552b..aeff51d 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -280,22 +280,44 @@
 bool RefLayerSupport::IsBatchNormalizationSupported(const TensorInfo& input,
                                                     const TensorInfo& output,
                                                     const TensorInfo& mean,
-                                                    const TensorInfo& var,
+                                                    const TensorInfo& variance,
                                                     const TensorInfo& beta,
                                                     const TensorInfo& gamma,
                                                     const BatchNormalizationDescriptor& descriptor,
                                                     Optional<std::string&> reasonIfUnsupported) const
 {
-    ignore_unused(output);
-    ignore_unused(mean);
-    ignore_unused(var);
-    ignore_unused(beta);
-    ignore_unused(gamma);
     ignore_unused(descriptor);
-    return IsSupportedForDataTypeRef(reasonIfUnsupported,
-                                     input.GetDataType(),
-                                     &TrueFunc<>,
-                                     &TrueFunc<>);
+
+    std::array<DataType, 2> supportedTypes =
+    {
+        DataType::Float32,
+        DataType::QuantisedAsymm8
+    };
+
+    bool supported = true;
+
+    supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported,
+                                  "Reference batch normalization: input is not a supported type.");
+
+    supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
+                                  "Reference batch normalization: output is not a supported type.");
+
+    supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported,
+                                  "Reference batch normalization: input and output types are mismatched");
+
+    supported &= CheckSupportRule(TypeAnyOf(mean, supportedTypes), reasonIfUnsupported,
+                                  "Reference batch normalization: mean is not a supported type.");
+
+    supported &= CheckSupportRule(TypeAnyOf(variance, supportedTypes), reasonIfUnsupported,
+                                  "Reference batch normalization: variance is not a supported type.");
+
+    supported &= CheckSupportRule(TypeAnyOf(beta, supportedTypes), reasonIfUnsupported,
+                                  "Reference batch normalization: beta is not a supported type.");
+
+    supported &= CheckSupportRule(TypeAnyOf(gamma, supportedTypes), reasonIfUnsupported,
+                                  "Reference batch normalization: gamma is not a supported type.");
+
+    return supported;
 }
 
 bool RefLayerSupport::IsBatchToSpaceNdSupported(const TensorInfo& input,
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 1610655..d103f56 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -220,7 +220,7 @@
 std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateBatchNormalization(
     const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const
 {
-    return MakeWorkload<RefBatchNormalizationFloat32Workload, RefBatchNormalizationUint8Workload>(descriptor, info);
+    return std::make_unique<RefBatchNormalizationWorkload>(descriptor, info);
 }
 
 std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index f371c8b..81b6de1 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -12,6 +12,7 @@
         RefLayerSupport.cpp \
         RefWorkloadFactory.cpp \
         workloads/Activation.cpp \
+        workloads/BatchNormImpl.cpp \
         workloads/BatchToSpaceNd.cpp \
         workloads/Broadcast.cpp \
         workloads/ConvImpl.cpp \
@@ -25,8 +26,7 @@
         workloads/Pad.cpp \
         workloads/Pooling2d.cpp \
         workloads/RefActivationWorkload.cpp \
-        workloads/RefBatchNormalizationFloat32Workload.cpp \
-        workloads/RefBatchNormalizationUint8Workload.cpp \
+        workloads/RefBatchNormalizationWorkload.cpp \
         workloads/RefBatchToSpaceNdFloat32Workload.cpp \
         workloads/RefBatchToSpaceNdUint8Workload.cpp \
         workloads/RefConcatWorkload.cpp \
diff --git a/src/backends/reference/test/RefCreateWorkloadTests.cpp b/src/backends/reference/test/RefCreateWorkloadTests.cpp
index 7c5712b..a0c6145 100644
--- a/src/backends/reference/test/RefCreateWorkloadTests.cpp
+++ b/src/backends/reference/test/RefCreateWorkloadTests.cpp
@@ -181,8 +181,9 @@
 {
     Graph graph;
     RefWorkloadFactory factory;
-    auto workload =
-            CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>(factory, graph, dataLayout);
+    auto workload = CreateBatchNormalizationWorkloadTest<BatchNormalizationWorkloadType, DataType>(factory,
+                                                                                                   graph,
+                                                                                                   dataLayout);
 
     TensorShape inputShape;
     TensorShape outputShape;
@@ -206,25 +207,25 @@
 
 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat32Workload)
 {
-    RefCreateBatchNormalizationWorkloadTest<RefBatchNormalizationFloat32Workload,armnn::DataType::Float32>
+    RefCreateBatchNormalizationWorkloadTest<RefBatchNormalizationWorkload,armnn::DataType::Float32>
             (DataLayout::NCHW);
 }
 
 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationFloat32WorkloadNhwc)
 {
-    RefCreateBatchNormalizationWorkloadTest<RefBatchNormalizationFloat32Workload, armnn::DataType::Float32>
+    RefCreateBatchNormalizationWorkloadTest<RefBatchNormalizationWorkload, armnn::DataType::Float32>
             (DataLayout::NHWC);
 }
 
 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationUint8Workload)
 {
-    RefCreateBatchNormalizationWorkloadTest<RefBatchNormalizationUint8Workload, armnn::DataType::QuantisedAsymm8>
+    RefCreateBatchNormalizationWorkloadTest<RefBatchNormalizationWorkload, armnn::DataType::QuantisedAsymm8>
             (DataLayout::NCHW);
 }
 
 BOOST_AUTO_TEST_CASE(CreateBatchNormalizationUint8WorkloadNhwc)
 {
-    RefCreateBatchNormalizationWorkloadTest<RefBatchNormalizationUint8Workload, armnn::DataType::QuantisedAsymm8>
+    RefCreateBatchNormalizationWorkloadTest<RefBatchNormalizationWorkload, armnn::DataType::QuantisedAsymm8>
             (DataLayout::NHWC);
 }
 
diff --git a/src/backends/reference/workloads/BatchNormImpl.cpp b/src/backends/reference/workloads/BatchNormImpl.cpp
new file mode 100644
index 0000000..36e96d3
--- /dev/null
+++ b/src/backends/reference/workloads/BatchNormImpl.cpp
@@ -0,0 +1,82 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BatchNormImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include <armnn/Tensor.hpp>
+
+#include <DataLayoutIndexed.hpp>
+
+#include <cmath>
+
+namespace armnn
+{
+
+void BatchNormImpl(const BatchNormalizationQueueDescriptor& data,
+                   Decoder<float>& meanDecoder,
+                   Decoder<float>& varianceDecoder,
+                   Decoder<float>& betaDecoder,
+                   Decoder<float>& gammaDecoder,
+                   Decoder<float>& inputDecoder,
+                   Encoder<float>& outputEncoder)
+{
+    const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
+    const TensorShape inputShape = inputInfo.GetShape();
+
+    armnnUtils::DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
+
+    unsigned int inputBatches  = inputShape[0];
+    unsigned int inputHeight   = inputShape[dataLayout.GetHeightIndex()];
+    unsigned int inputWidth    = inputShape[dataLayout.GetWidthIndex()];
+    unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
+
+    for (unsigned int c = 0; c < inputChannels; c++)
+    {
+        meanDecoder[c];
+        varianceDecoder[c];
+        betaDecoder[c];
+        gammaDecoder[c];
+        float mean  = meanDecoder.Get();
+        float var   = varianceDecoder.Get();
+        float beta  = betaDecoder.Get();
+        float gamma = gammaDecoder.Get();
+
+        float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps);
+        float add  = beta - mult * mean;
+
+        for (unsigned int n = 0; n < inputBatches; n++)
+        {
+            for (unsigned int h = 0; h < inputHeight; h++)
+            {
+                for (unsigned int w = 0; w < inputWidth; w++)
+                {
+                    unsigned int index = 0;
+
+                    if (dataLayout == DataLayout::NHWC)
+                    {
+                        index = n * inputHeight * inputWidth * inputChannels +
+                                h * inputWidth * inputChannels +
+                                w * inputChannels +
+                                c;
+                    }
+                    else // dataLayout == DataLayout::NCHW
+                    {
+                        index = n * inputHeight * inputWidth * inputChannels +
+                                c * inputHeight * inputWidth +
+                                h * inputWidth +
+                                w;
+                    }
+
+                    inputDecoder[index];
+                    outputEncoder[index];
+                    outputEncoder.Set(mult * inputDecoder.Get() + add);
+                }
+            }
+        }
+    }
+}
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/BatchNormImpl.hpp b/src/backends/reference/workloads/BatchNormImpl.hpp
index 799e7a3..c0250b9 100644
--- a/src/backends/reference/workloads/BatchNormImpl.hpp
+++ b/src/backends/reference/workloads/BatchNormImpl.hpp
@@ -5,60 +5,20 @@
 
 #pragma once
 
-#include "RefWorkloadUtils.hpp"
-#include "TensorBufferArrayView.hpp"
+#include "Encoders.hpp"
+#include "Decoders.hpp"
 
-#include <armnn/Tensor.hpp>
-
-#include <DataLayoutIndexed.hpp>
-
-#include <cmath>
+#include <backendsCommon/WorkloadData.hpp>
 
 namespace armnn
 {
 
-template<typename NormData>
-static void BatchNormImpl(NormData     data,
-                          const float* varIn,
-                          const float* meanIn,
-                          const float* gammaIn,
-                          const float* betaIn,
-                          float*       outputData,
-                          const float* inputData)
-{
-    const TensorInfo& inputInfo = GetTensorInfo(data.m_Inputs[0]);
-    const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
+void BatchNormImpl(const BatchNormalizationQueueDescriptor& data,
+                   Decoder<float>& meanIn,
+                   Decoder<float>& varIn,
+                   Decoder<float>& betaIn,
+                   Decoder<float>& gammaIn,
+                   Decoder<float>& inputData,
+                   Encoder<float>& outputData);
 
-    TensorBufferArrayView<const float> input(inputInfo.GetShape(),
-                                             inputData,
-                                             data.m_Parameters.m_DataLayout);
-    TensorBufferArrayView<float> output(outputInfo.GetShape(),
-                                        outputData,
-                                        data.m_Parameters.m_DataLayout);
-
-    armnnUtils::DataLayoutIndexed dataLayout(data.m_Parameters.m_DataLayout);
-
-    for (unsigned int c = 0; c < inputInfo.GetShape()[dataLayout.GetChannelsIndex()]; c++)
-    {
-        float var   = varIn[c];
-        float mean  = meanIn[c];
-        float gamma = gammaIn[c];
-        float beta  = betaIn[c];
-
-        float mult = gamma / sqrtf(var + data.m_Parameters.m_Eps);
-        float add  = beta - mult * mean;
-
-        for (unsigned int n = 0; n < inputInfo.GetShape()[0]; n++)
-        {
-            for (unsigned int h = 0; h < inputInfo.GetShape()[dataLayout.GetHeightIndex()]; h++)
-            {
-                for (unsigned int w = 0; w < inputInfo.GetShape()[dataLayout.GetWidthIndex()]; w++)
-                {
-                    output.Get(n, c, h, w) = mult * input.Get(n, c, h, w) + add;
-                }
-            }
-        }
-    }
-}
-
-} //namespace armnn
+} // namespace armnn
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index df126c4..cdca22d 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -7,6 +7,7 @@
     Activation.cpp
     Activation.hpp
     BaseIterator.hpp
+    BatchNormImpl.cpp
     BatchNormImpl.hpp
     BatchToSpaceNd.cpp
     BatchToSpaceNd.hpp
@@ -37,10 +38,8 @@
     Pooling2d.hpp
     RefActivationWorkload.cpp
     RefActivationWorkload.hpp
-    RefBatchNormalizationFloat32Workload.cpp
-    RefBatchNormalizationFloat32Workload.hpp
-    RefBatchNormalizationUint8Workload.cpp
-    RefBatchNormalizationUint8Workload.hpp
+    RefBatchNormalizationWorkload.cpp
+    RefBatchNormalizationWorkload.hpp
     RefBatchToSpaceNdFloat32Workload.cpp
     RefBatchToSpaceNdFloat32Workload.hpp
     RefBatchToSpaceNdUint8Workload.cpp
diff --git a/src/backends/reference/workloads/Encoders.hpp b/src/backends/reference/workloads/Encoders.hpp
index 547bead..af3b937 100644
--- a/src/backends/reference/workloads/Encoders.hpp
+++ b/src/backends/reference/workloads/Encoders.hpp
@@ -7,6 +7,8 @@
 
 #include "BaseIterator.hpp"
 
+#include <boost/assert.hpp>
+
 namespace armnn
 {
 
diff --git a/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.cpp b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.cpp
deleted file mode 100644
index 313af9c..0000000
--- a/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include "RefBatchNormalizationFloat32Workload.hpp"
-
-#include "BatchNormImpl.hpp"
-#include "RefWorkloadUtils.hpp"
-
-#include "Profiling.hpp"
-
-namespace armnn
-{
-RefBatchNormalizationFloat32Workload::RefBatchNormalizationFloat32Workload(
-   const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
-      : Float32Workload<BatchNormalizationQueueDescriptor>(descriptor, info),
-        m_Mean(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Mean))),
-        m_Variance(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Variance))),
-        m_Beta(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Beta))),
-        m_Gamma(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Gamma))) {}
-
-void RefBatchNormalizationFloat32Workload::Execute() const
-{
-    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationFloat32Workload_Execute");
-
-    const float* var   = m_Variance->GetConstTensor<float>();
-    const float* mean  = m_Mean->GetConstTensor<float>();
-    const float* gamma = m_Gamma->GetConstTensor<float>();
-    const float* beta  = m_Beta->GetConstTensor<float>();
-
-    auto inputData = GetInputTensorDataFloat(0, m_Data);
-    auto outputData = GetOutputTensorDataFloat(0, m_Data);
-
-    BatchNormImpl(m_Data, var, mean, gamma, beta, outputData, inputData);
-}
-
-} //namespace armnn
diff --git a/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp b/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp
deleted file mode 100644
index 9f92899..0000000
--- a/src/backends/reference/workloads/RefBatchNormalizationFloat32Workload.hpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#pragma once
-
-#include <backendsCommon/Workload.hpp>
-#include <backendsCommon/WorkloadData.hpp>
-
-namespace armnn
-{
-
-class RefBatchNormalizationFloat32Workload : public Float32Workload<BatchNormalizationQueueDescriptor>
-{
-public:
-    explicit RefBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor,
-                                                  const WorkloadInfo& info);
-    virtual void Execute() const override;
-
-private:
-    std::unique_ptr<ScopedCpuTensorHandle> m_Mean;
-    std::unique_ptr<ScopedCpuTensorHandle> m_Variance;
-    std::unique_ptr<ScopedCpuTensorHandle> m_Beta;
-    std::unique_ptr<ScopedCpuTensorHandle> m_Gamma;
-};
-
-} //namespace armnn
diff --git a/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.cpp b/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.cpp
deleted file mode 100644
index e248ad4..0000000
--- a/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include "RefBatchNormalizationUint8Workload.hpp"
-
-#include "BatchNormImpl.hpp"
-#include "RefWorkloadUtils.hpp"
-
-#include "Profiling.hpp"
-
-#include <vector>
-
-namespace armnn
-{
-RefBatchNormalizationUint8Workload::RefBatchNormalizationUint8Workload(
-    const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
-       : Uint8Workload<BatchNormalizationQueueDescriptor>(descriptor, info),
-         m_Mean(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Mean))),
-         m_Variance(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Variance))),
-         m_Beta(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Beta))),
-         m_Gamma(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Gamma))) {}
-
-void RefBatchNormalizationUint8Workload::Execute() const
-{
-    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationUint8Workload_Execute");
-
-    const TensorInfo& inputInfo0 = GetTensorInfo(m_Data.m_Inputs[0]);
-    const TensorInfo& varInfo = GetTensorInfo(m_Variance.get());
-    const TensorInfo& meanInfo = GetTensorInfo(m_Mean.get());
-    const TensorInfo& gammaInfo = GetTensorInfo(m_Gamma.get());
-    const TensorInfo& betaInfo = GetTensorInfo(m_Beta.get());
-    const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
-
-    auto input = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo0);
-    auto var = Dequantize(m_Variance->GetConstTensor<uint8_t>(), varInfo);
-    auto mean = Dequantize(m_Mean->GetConstTensor<uint8_t>(), meanInfo);
-    auto gamma = Dequantize(m_Gamma->GetConstTensor<uint8_t>(), gammaInfo);
-    auto beta = Dequantize(m_Beta->GetConstTensor<uint8_t>(), betaInfo);
-
-    std::vector<float> results(outputInfo.GetNumElements());
-    BatchNormImpl(m_Data, var.data(), mean.data(), gamma.data(), beta.data(), results.data(), input.data());
-    Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
-}
-
-} //namespace armnn
diff --git a/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp b/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp
new file mode 100644
index 0000000..b43b104
--- /dev/null
+++ b/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp
@@ -0,0 +1,45 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefBatchNormalizationWorkload.hpp"
+
+#include "BatchNormImpl.hpp"
+#include "RefWorkloadUtils.hpp"
+
+#include "Profiling.hpp"
+
+namespace armnn
+{
+
+RefBatchNormalizationWorkload::RefBatchNormalizationWorkload(const BatchNormalizationQueueDescriptor& descriptor,
+                                                             const WorkloadInfo& info)
+    : BaseWorkload(descriptor, info)
+    , m_Mean    (std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Mean)))
+    , m_Variance(std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Variance)))
+    , m_Beta    (std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Beta)))
+    , m_Gamma   (std::make_unique<ScopedCpuTensorHandle>(*(descriptor.m_Gamma)))
+{}
+
+void RefBatchNormalizationWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchNormalizationWorkload_Execute");
+
+    std::unique_ptr<Decoder<float>> meanDecoder = MakeDecoder<float>(GetTensorInfo(m_Mean.get()),
+                                                                     m_Mean.get()->Map(true));
+    std::unique_ptr<Decoder<float>> varianceDecoder = MakeDecoder<float>(GetTensorInfo(m_Variance.get()),
+                                                                         m_Variance.get()->Map(true));
+    std::unique_ptr<Decoder<float>> gammaDecoder = MakeDecoder<float>(GetTensorInfo(m_Gamma.get()),
+                                                                      m_Gamma.get()->Map(true));
+    std::unique_ptr<Decoder<float>> betaDecoder = MakeDecoder<float>(GetTensorInfo(m_Beta.get()),
+                                                                     m_Beta.get()->Map(true));
+    std::unique_ptr<Decoder<float>> inputDecoder = MakeDecoder<float>(GetTensorInfo(m_Data.m_Inputs[0]),
+                                                                      m_Data.m_Inputs[0]->Map());
+    std::unique_ptr<Encoder<float>> outputEncoder = MakeEncoder<float>(GetTensorInfo(m_Data.m_Outputs[0]),
+                                                                       m_Data.m_Outputs[0]->Map());
+
+    BatchNormImpl(m_Data, *meanDecoder, *varianceDecoder, *betaDecoder, *gammaDecoder, *inputDecoder, *outputEncoder);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp b/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp
similarity index 65%
rename from src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp
rename to src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp
index 7c288a5..9e71e7b 100644
--- a/src/backends/reference/workloads/RefBatchNormalizationUint8Workload.hpp
+++ b/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp
@@ -11,11 +11,11 @@
 namespace armnn
 {
 
-class RefBatchNormalizationUint8Workload : public Uint8Workload<BatchNormalizationQueueDescriptor>
+class RefBatchNormalizationWorkload : public BaseWorkload<BatchNormalizationQueueDescriptor>
 {
 public:
-    explicit RefBatchNormalizationUint8Workload(const BatchNormalizationQueueDescriptor& descriptor,
-                                          const WorkloadInfo& info);
+    explicit RefBatchNormalizationWorkload(const BatchNormalizationQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info);
     virtual void Execute() const override;
 
 private:
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index c8c26b0..7ccd4ef 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -21,7 +21,7 @@
 #include "RefGatherWorkload.hpp"
 #include "Softmax.hpp"
 #include "TensorBufferArrayView.hpp"
-#include "RefBatchNormalizationFloat32Workload.hpp"
+#include "RefBatchNormalizationWorkload.hpp"
 #include "Splitter.hpp"
 #include "RefDepthwiseConvolution2dWorkload.hpp"
 #include "FullyConnected.hpp"
@@ -29,7 +29,6 @@
 #include "RefFloorWorkload.hpp"
 #include "RefSoftmaxWorkload.hpp"
 #include "RefResizeBilinearFloat32Workload.hpp"
-#include "RefBatchNormalizationUint8Workload.hpp"
 #include "ResizeBilinear.hpp"
 #include "RefNormalizationFloat32Workload.hpp"
 #include "RefDetectionPostProcessFloat32Workload.hpp"