IVGCVSW-2202 Refactoring Arithmetic* names to Elementwise* names for workloads and workload functions

Change-Id: I6f3fce12a55f7d38ceafcdfcd6b5181bf56e2c09
diff --git a/src/armnn/test/CreateWorkload.hpp b/src/armnn/test/CreateWorkload.hpp
index b8ba72f..349c6a7 100644
--- a/src/armnn/test/CreateWorkload.hpp
+++ b/src/armnn/test/CreateWorkload.hpp
@@ -103,8 +103,8 @@
           typename DescriptorType,
           typename LayerType,
           armnn::DataType DataType>
-std::unique_ptr<WorkloadType> CreateArithmeticWorkloadTest(armnn::IWorkloadFactory& factory,
-                                                           armnn::Graph&            graph)
+std::unique_ptr<WorkloadType> CreateElementwiseWorkloadTest(armnn::IWorkloadFactory & factory,
+                                                            armnn::Graph & graph)
 {
     // Creates the layer we're testing.
     Layer* const layer = graph.AddLayer<LayerType>("layer");
diff --git a/src/backends/README.md b/src/backends/README.md
index ddd1bb6..60e4d0b 100644
--- a/src/backends/README.md
+++ b/src/backends/README.md
@@ -68,7 +68,7 @@
         RefLayerSupport.cpp \
         RefWorkloadFactory.cpp \
         workloads/Activation.cpp \
-        workloads/ArithmeticFunction.cpp \
+        workloads/ElementwiseFunction.cpp \
         workloads/Broadcast.cpp \
         ...
 
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index 8cef9d7..c5f685d 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -55,15 +55,15 @@
           typename DescriptorType,
           typename LayerType,
           armnn::DataType DataType>
-static void ClCreateArithmethicWorkloadTest()
+static void ClCreateElementwiseWorkloadTest()
 {
     Graph graph;
     ClWorkloadFactory factory =
         ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
 
-    auto workload = CreateArithmeticWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
+    auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
 
-    // Checks that inputs/outputs are as we expect them (see definition of CreateArithmeticWorkloadTest).
+    // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
     DescriptorType queueDescriptor = workload->GetData();
     auto inputHandle1 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
     auto inputHandle2 = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
@@ -75,7 +75,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
 {
-    ClCreateArithmethicWorkloadTest<ClAdditionWorkload,
+    ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
                                     AdditionQueueDescriptor,
                                     AdditionLayer,
                                     armnn::DataType::Float32>();
@@ -83,7 +83,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
 {
-    ClCreateArithmethicWorkloadTest<ClAdditionWorkload,
+    ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
                                     AdditionQueueDescriptor,
                                     AdditionLayer,
                                     armnn::DataType::Float16>();
@@ -91,7 +91,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
 {
-    ClCreateArithmethicWorkloadTest<ClSubtractionWorkload,
+    ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
                                     SubtractionQueueDescriptor,
                                     SubtractionLayer,
                                     armnn::DataType::Float32>();
@@ -99,7 +99,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
 {
-    ClCreateArithmethicWorkloadTest<ClSubtractionWorkload,
+    ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
                                     SubtractionQueueDescriptor,
                                     SubtractionLayer,
                                     armnn::DataType::Float16>();
@@ -107,7 +107,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkloadTest)
 {
-    ClCreateArithmethicWorkloadTest<ClMultiplicationWorkload,
+    ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
                                     MultiplicationQueueDescriptor,
                                     MultiplicationLayer,
                                     armnn::DataType::Float32>();
@@ -115,7 +115,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16WorkloadTest)
 {
-    ClCreateArithmethicWorkloadTest<ClMultiplicationWorkload,
+    ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
                                     MultiplicationQueueDescriptor,
                                     MultiplicationLayer,
                                     armnn::DataType::Float16>();
@@ -123,7 +123,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8WorkloadTest)
 {
-    ClCreateArithmethicWorkloadTest<ClMultiplicationWorkload,
+    ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
                                     MultiplicationQueueDescriptor,
                                     MultiplicationLayer,
                                     armnn::DataType::QuantisedAsymm8>();
@@ -131,7 +131,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkloadTest)
 {
-    ClCreateArithmethicWorkloadTest<ClDivisionFloatWorkload,
+    ClCreateElementwiseWorkloadTest<ClDivisionFloatWorkload,
                                     DivisionQueueDescriptor,
                                     DivisionLayer,
                                     armnn::DataType::Float32>();
@@ -139,7 +139,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateDivisionFloat16WorkloadTest)
 {
-    ClCreateArithmethicWorkloadTest<ClDivisionFloatWorkload,
+    ClCreateElementwiseWorkloadTest<ClDivisionFloatWorkload,
                                     DivisionQueueDescriptor,
                                     DivisionLayer,
                                     armnn::DataType::Float16>();
diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
index 1201253..dc6ec16 100644
--- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp
+++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
@@ -87,13 +87,13 @@
           typename DescriptorType,
           typename LayerType,
           armnn::DataType DataType>
-static void NeonCreateArithmethicWorkloadTest()
+static void NeonCreateElementwiseWorkloadTest()
 {
     Graph graph;
     NeonWorkloadFactory factory =
         NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
 
-    auto workload = CreateArithmeticWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
+    auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
 
     DescriptorType queueDescriptor = workload->GetData();
     auto inputHandle1 = boost::polymorphic_downcast<INeonTensorHandle*>(queueDescriptor.m_Inputs[0]);
@@ -107,7 +107,7 @@
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 BOOST_AUTO_TEST_CASE(CreateAdditionFloat16Workload)
 {
-    NeonCreateArithmethicWorkloadTest<NeonAdditionFloatWorkload,
+    NeonCreateElementwiseWorkloadTest<NeonAdditionFloatWorkload,
                                       AdditionQueueDescriptor,
                                       AdditionLayer,
                                       DataType::Float16>();
@@ -116,7 +116,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
 {
-    NeonCreateArithmethicWorkloadTest<NeonAdditionFloatWorkload,
+    NeonCreateElementwiseWorkloadTest<NeonAdditionFloatWorkload,
                                       AdditionQueueDescriptor,
                                       AdditionLayer,
                                       DataType::Float32>();
@@ -125,7 +125,7 @@
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 BOOST_AUTO_TEST_CASE(CreateSubtractionFloat16Workload)
 {
-    NeonCreateArithmethicWorkloadTest<NeonSubtractionFloatWorkload,
+    NeonCreateElementwiseWorkloadTest<NeonSubtractionFloatWorkload,
                                       SubtractionQueueDescriptor,
                                       SubtractionLayer,
                                       DataType::Float16>();
@@ -134,7 +134,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
 {
-    NeonCreateArithmethicWorkloadTest<NeonSubtractionFloatWorkload,
+    NeonCreateElementwiseWorkloadTest<NeonSubtractionFloatWorkload,
                                       SubtractionQueueDescriptor,
                                       SubtractionLayer,
                                       DataType::Float32>();
@@ -143,7 +143,7 @@
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloat16Workload)
 {
-    NeonCreateArithmethicWorkloadTest<NeonMultiplicationFloatWorkload,
+    NeonCreateElementwiseWorkloadTest<NeonMultiplicationFloatWorkload,
                                       MultiplicationQueueDescriptor,
                                       MultiplicationLayer,
                                       DataType::Float16>();
@@ -152,7 +152,7 @@
 
 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload)
 {
-    NeonCreateArithmethicWorkloadTest<NeonMultiplicationFloatWorkload,
+    NeonCreateElementwiseWorkloadTest<NeonMultiplicationFloatWorkload,
                                       MultiplicationQueueDescriptor,
                                       MultiplicationLayer,
                                       DataType::Float32>();
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index 7162d4a..66675bd 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -12,17 +12,16 @@
         RefLayerSupport.cpp \
         RefWorkloadFactory.cpp \
         workloads/Activation.cpp \
-        workloads/ArithmeticFunction.cpp \
         workloads/BatchToSpaceNd.cpp \
         workloads/Broadcast.cpp \
         workloads/ConvImpl.cpp \
+        workloads/ElementwiseFunction.cpp \
         workloads/FullyConnected.cpp \
         workloads/Mean.cpp \
         workloads/Pad.cpp \
         workloads/Pooling2d.cpp \
         workloads/RefActivationFloat32Workload.cpp \
         workloads/RefActivationUint8Workload.cpp \
-        workloads/RefArithmeticWorkload.cpp \
         workloads/RefBaseConstantWorkload.cpp \
         workloads/RefBatchNormalizationFloat32Workload.cpp \
         workloads/RefBatchNormalizationUint8Workload.cpp \
@@ -36,6 +35,7 @@
         workloads/RefConvolution2dUint8Workload.cpp \
         workloads/RefDepthwiseConvolution2dFloat32Workload.cpp \
         workloads/RefDepthwiseConvolution2dUint8Workload.cpp \
+        workloads/RefElementwiseWorkload.cpp \
         workloads/RefFakeQuantizationFloat32Workload.cpp \
         workloads/RefFloorFloat32Workload.cpp \
         workloads/RefFullyConnectedFloat32Workload.cpp \
diff --git a/src/backends/reference/test/RefCreateWorkloadTests.cpp b/src/backends/reference/test/RefCreateWorkloadTests.cpp
index 47f9d0e..8621122 100644
--- a/src/backends/reference/test/RefCreateWorkloadTests.cpp
+++ b/src/backends/reference/test/RefCreateWorkloadTests.cpp
@@ -67,11 +67,12 @@
           typename DescriptorType,
           typename LayerType,
           armnn::DataType DataType>
-static void RefCreateArithmethicWorkloadTest()
+static void RefCreateElementwiseWorkloadTest()
 {
     Graph graph;
     RefWorkloadFactory factory;
-    auto workload = CreateArithmeticWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
+    auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(
+        factory, graph);
 
     CheckInputsOutput(std::move(workload),
         TensorInfo({ 2, 3 }, DataType),
@@ -81,66 +82,66 @@
 
 BOOST_AUTO_TEST_CASE(CreateAdditionFloatWorkload)
 {
-    RefCreateArithmethicWorkloadTest<RefAdditionFloat32Workload,
-                                     AdditionQueueDescriptor,
-                                     AdditionLayer,
-                                     armnn::DataType::Float32>();
+    RefCreateElementwiseWorkloadTest<RefAdditionFloat32Workload,
+        AdditionQueueDescriptor,
+        AdditionLayer,
+        armnn::DataType::Float32>();
 }
 
 BOOST_AUTO_TEST_CASE(CreateAdditionUint8Workload)
 {
-    RefCreateArithmethicWorkloadTest<RefAdditionUint8Workload,
-                                     AdditionQueueDescriptor,
-                                     AdditionLayer,
-                                     armnn::DataType::QuantisedAsymm8>();
+    RefCreateElementwiseWorkloadTest<RefAdditionUint8Workload,
+        AdditionQueueDescriptor,
+        AdditionLayer,
+        armnn::DataType::QuantisedAsymm8>();
 }
 
 BOOST_AUTO_TEST_CASE(CreateSubtractionFloatWorkload)
 {
-    RefCreateArithmethicWorkloadTest<RefSubtractionFloat32Workload,
-                                     SubtractionQueueDescriptor,
-                                     SubtractionLayer,
-                                     armnn::DataType::Float32>();
+    RefCreateElementwiseWorkloadTest<RefSubtractionFloat32Workload,
+        SubtractionQueueDescriptor,
+        SubtractionLayer,
+        armnn::DataType::Float32>();
 }
 
 BOOST_AUTO_TEST_CASE(CreateSubtractionUint8Workload)
 {
-    RefCreateArithmethicWorkloadTest<RefSubtractionUint8Workload,
-                                     SubtractionQueueDescriptor,
-                                     SubtractionLayer,
-                                     armnn::DataType::QuantisedAsymm8>();
+    RefCreateElementwiseWorkloadTest<RefSubtractionUint8Workload,
+        SubtractionQueueDescriptor,
+        SubtractionLayer,
+        armnn::DataType::QuantisedAsymm8>();
 }
 
 BOOST_AUTO_TEST_CASE(CreateMultiplicationFloatWorkload)
 {
-    RefCreateArithmethicWorkloadTest<RefMultiplicationFloat32Workload,
-                                     MultiplicationQueueDescriptor,
-                                     MultiplicationLayer,
-                                     armnn::DataType::Float32>();
+    RefCreateElementwiseWorkloadTest<RefMultiplicationFloat32Workload,
+        MultiplicationQueueDescriptor,
+        MultiplicationLayer,
+        armnn::DataType::Float32>();
 }
 
 BOOST_AUTO_TEST_CASE(CreateMultiplicationUint8Workload)
 {
-    RefCreateArithmethicWorkloadTest<RefMultiplicationUint8Workload,
-                                     MultiplicationQueueDescriptor,
-                                     MultiplicationLayer,
-                                     armnn::DataType::QuantisedAsymm8>();
+    RefCreateElementwiseWorkloadTest<RefMultiplicationUint8Workload,
+        MultiplicationQueueDescriptor,
+        MultiplicationLayer,
+        armnn::DataType::QuantisedAsymm8>();
 }
 
 BOOST_AUTO_TEST_CASE(CreateDivisionFloatWorkload)
 {
-    RefCreateArithmethicWorkloadTest<RefDivisionFloat32Workload,
-                                     DivisionQueueDescriptor,
-                                     DivisionLayer,
-                                     armnn::DataType::Float32>();
+    RefCreateElementwiseWorkloadTest<RefDivisionFloat32Workload,
+        DivisionQueueDescriptor,
+        DivisionLayer,
+        armnn::DataType::Float32>();
 }
 
 BOOST_AUTO_TEST_CASE(CreateDivisionUint8Workload)
 {
-    RefCreateArithmethicWorkloadTest<RefDivisionUint8Workload,
-                                     DivisionQueueDescriptor,
-                                     DivisionLayer,
-                                     armnn::DataType::QuantisedAsymm8>();
+    RefCreateElementwiseWorkloadTest<RefDivisionUint8Workload,
+        DivisionQueueDescriptor,
+        DivisionLayer,
+        armnn::DataType::QuantisedAsymm8>();
 }
 
 template <typename BatchNormalizationWorkloadType, armnn::DataType DataType>
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 2d9ad92..86c5f90 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -6,8 +6,6 @@
 list(APPEND armnnRefBackendWorkloads_sources
     Activation.cpp
     Activation.hpp
-    ArithmeticFunction.cpp
-    ArithmeticFunction.hpp
     BatchNormImpl.hpp
     BatchToSpaceNd.cpp
     BatchToSpaceNd.hpp
@@ -15,6 +13,8 @@
     Broadcast.hpp
     ConvImpl.cpp
     ConvImpl.hpp
+    ElementwiseFunction.cpp
+    ElementwiseFunction.hpp
     FullyConnected.cpp
     FullyConnected.hpp
     Merger.hpp
@@ -26,8 +26,6 @@
     RefActivationFloat32Workload.hpp
     RefActivationUint8Workload.cpp
     RefActivationUint8Workload.hpp
-    RefArithmeticWorkload.cpp
-    RefArithmeticWorkload.hpp
     RefBaseConstantWorkload.cpp
     RefBaseConstantWorkload.hpp
     RefBatchNormalizationFloat32Workload.cpp
@@ -50,6 +48,8 @@
     RefConvolution2dFloat32Workload.hpp
     RefConvolution2dUint8Workload.cpp
     RefConvolution2dUint8Workload.hpp
+    RefElementwiseWorkload.cpp
+    RefElementwiseWorkload.hpp
     RefDepthwiseConvolution2dFloat32Workload.cpp
     RefDepthwiseConvolution2dFloat32Workload.hpp
     RefDepthwiseConvolution2dUint8Workload.cpp
diff --git a/src/backends/reference/workloads/ArithmeticFunction.cpp b/src/backends/reference/workloads/ElementwiseFunction.cpp
similarity index 64%
rename from src/backends/reference/workloads/ArithmeticFunction.cpp
rename to src/backends/reference/workloads/ElementwiseFunction.cpp
index fede138..bea3d2f 100644
--- a/src/backends/reference/workloads/ArithmeticFunction.cpp
+++ b/src/backends/reference/workloads/ElementwiseFunction.cpp
@@ -3,7 +3,7 @@
 // SPDX-License-Identifier: MIT
 //
 
-#include "ArithmeticFunction.hpp"
+#include "ElementwiseFunction.hpp"
 #include "Broadcast.hpp"
 #include <functional>
 
@@ -11,7 +11,7 @@
 {
 
 template <typename Functor>
-ArithmeticFunction<Functor>::ArithmeticFunction(const TensorShape& inShape0,
+ElementwiseFunction<Functor>::ElementwiseFunction(const TensorShape& inShape0,
                                                 const TensorShape& inShape1,
                                                 const TensorShape& outShape,
                                                 const float* inData0,
@@ -23,7 +23,7 @@
 
 } //namespace armnn
 
-template struct armnn::ArithmeticFunction<std::plus<float>>;
-template struct armnn::ArithmeticFunction<std::minus<float>>;
-template struct armnn::ArithmeticFunction<std::multiplies<float>>;
-template struct armnn::ArithmeticFunction<std::divides<float>>;
+template struct armnn::ElementwiseFunction<std::plus<float>>;
+template struct armnn::ElementwiseFunction<std::minus<float>>;
+template struct armnn::ElementwiseFunction<std::multiplies<float>>;
+template struct armnn::ElementwiseFunction<std::divides<float>>;
diff --git a/src/backends/reference/workloads/ArithmeticFunction.hpp b/src/backends/reference/workloads/ElementwiseFunction.hpp
similarity index 84%
rename from src/backends/reference/workloads/ArithmeticFunction.hpp
rename to src/backends/reference/workloads/ElementwiseFunction.hpp
index eafb644..5011616 100644
--- a/src/backends/reference/workloads/ArithmeticFunction.hpp
+++ b/src/backends/reference/workloads/ElementwiseFunction.hpp
@@ -11,9 +11,9 @@
 {
 
 template <typename Functor>
-struct ArithmeticFunction
+struct ElementwiseFunction
 {
-    ArithmeticFunction(const TensorShape& inShape0,
+    ElementwiseFunction(const TensorShape& inShape0,
                        const TensorShape& inShape1,
                        const TensorShape& outShape,
                        const float* inData0,
diff --git a/src/backends/reference/workloads/RefArithmeticWorkload.cpp b/src/backends/reference/workloads/RefArithmeticWorkload.cpp
deleted file mode 100644
index 6c39fa1..0000000
--- a/src/backends/reference/workloads/RefArithmeticWorkload.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// SPDX-License-Identifier: MIT
-//
-
-#include "RefArithmeticWorkload.hpp"
-#include "ArithmeticFunction.hpp"
-#include "RefWorkloadUtils.hpp"
-#include "Profiling.hpp"
-#include <vector>
-
-namespace armnn
-{
-
-template <typename ParentDescriptor, typename Functor>
-void BaseFloat32ArithmeticWorkload<ParentDescriptor, Functor>::ExecuteImpl(const char * debugString) const
-{
-    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, debugString);
-
-    auto data = Float32Workload<ParentDescriptor>::GetData();
-    const TensorShape& inShape0 = GetTensorInfo(data.m_Inputs[0]).GetShape();
-    const TensorShape& inShape1 = GetTensorInfo(data.m_Inputs[1]).GetShape();
-    const TensorShape& outShape = GetTensorInfo(data.m_Outputs[0]).GetShape();
-
-    const float* inData0 = GetInputTensorDataFloat(0, data);
-    const float* inData1 = GetInputTensorDataFloat(1, data);
-    float* outData = GetOutputTensorDataFloat(0, data);
-
-    ArithmeticFunction<Functor>(inShape0, inShape1, outShape, inData0, inData1, outData);
-}
-
-template <typename ParentDescriptor, typename Functor>
-void BaseUint8ArithmeticWorkload<ParentDescriptor, Functor>::ExecuteImpl(const char * debugString) const
-{
-    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, debugString);
-
-    auto data = Uint8Workload<ParentDescriptor>::GetData();
-    const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
-    const TensorInfo& inputInfo1 = GetTensorInfo(data.m_Inputs[1]);
-    const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
-
-    auto dequant0 = Dequantize(GetInputTensorDataU8(0, data), inputInfo0);
-    auto dequant1 = Dequantize(GetInputTensorDataU8(1, data), inputInfo1);
-
-    std::vector<float> results(outputInfo.GetNumElements());
-
-    ArithmeticFunction<Functor>(inputInfo0.GetShape(),
-                                inputInfo1.GetShape(),
-                                outputInfo.GetShape(),
-                                dequant0.data(),
-                                dequant1.data(),
-                                results.data());
-
-    Quantize(GetOutputTensorDataU8(0, data), results.data(), outputInfo);
-}
-
-}
-
-template class armnn::BaseFloat32ArithmeticWorkload<armnn::AdditionQueueDescriptor, std::plus<float>>;
-template class armnn::BaseUint8ArithmeticWorkload<armnn::AdditionQueueDescriptor, std::plus<float>>;
-
-template class armnn::BaseFloat32ArithmeticWorkload<armnn::SubtractionQueueDescriptor, std::minus<float>>;
-template class armnn::BaseUint8ArithmeticWorkload<armnn::SubtractionQueueDescriptor, std::minus<float>>;
-
-template class armnn::BaseFloat32ArithmeticWorkload<armnn::MultiplicationQueueDescriptor, std::multiplies<float>>;
-template class armnn::BaseUint8ArithmeticWorkload<armnn::MultiplicationQueueDescriptor, std::multiplies<float>>;
-
-template class armnn::BaseFloat32ArithmeticWorkload<armnn::DivisionQueueDescriptor, std::divides<float>>;
-template class armnn::BaseUint8ArithmeticWorkload<armnn::DivisionQueueDescriptor, std::divides<float>>;
diff --git a/src/backends/reference/workloads/RefElementwiseWorkload.cpp b/src/backends/reference/workloads/RefElementwiseWorkload.cpp
new file mode 100644
index 0000000..8e312a7
--- /dev/null
+++ b/src/backends/reference/workloads/RefElementwiseWorkload.cpp
@@ -0,0 +1,69 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefElementwiseWorkload.hpp"
+#include "ElementwiseFunction.hpp"
+#include "RefWorkloadUtils.hpp"
+#include "Profiling.hpp"
+#include <vector>
+
+namespace armnn
+{
+
+template <typename ParentDescriptor, typename Functor>
+void BaseFloat32ElementwiseWorkload<ParentDescriptor, Functor>::ExecuteImpl(const char * debugString) const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, debugString);
+
+    auto data = Float32Workload<ParentDescriptor>::GetData();
+    const TensorShape& inShape0 = GetTensorInfo(data.m_Inputs[0]).GetShape();
+    const TensorShape& inShape1 = GetTensorInfo(data.m_Inputs[1]).GetShape();
+    const TensorShape& outShape = GetTensorInfo(data.m_Outputs[0]).GetShape();
+
+    const float* inData0 = GetInputTensorDataFloat(0, data);
+    const float* inData1 = GetInputTensorDataFloat(1, data);
+    float* outData = GetOutputTensorDataFloat(0, data);
+
+    ElementwiseFunction<Functor>(inShape0, inShape1, outShape, inData0, inData1, outData);
+}
+
+template <typename ParentDescriptor, typename Functor>
+void BaseUint8ElementwiseWorkload<ParentDescriptor, Functor>::ExecuteImpl(const char * debugString) const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, debugString);
+
+    auto data = Uint8Workload<ParentDescriptor>::GetData();
+    const TensorInfo& inputInfo0 = GetTensorInfo(data.m_Inputs[0]);
+    const TensorInfo& inputInfo1 = GetTensorInfo(data.m_Inputs[1]);
+    const TensorInfo& outputInfo = GetTensorInfo(data.m_Outputs[0]);
+
+    auto dequant0 = Dequantize(GetInputTensorDataU8(0, data), inputInfo0);
+    auto dequant1 = Dequantize(GetInputTensorDataU8(1, data), inputInfo1);
+
+    std::vector<float> results(outputInfo.GetNumElements());
+
+    ElementwiseFunction<Functor>(inputInfo0.GetShape(),
+                                inputInfo1.GetShape(),
+                                outputInfo.GetShape(),
+                                dequant0.data(),
+                                dequant1.data(),
+                                results.data());
+
+    Quantize(GetOutputTensorDataU8(0, data), results.data(), outputInfo);
+}
+
+}
+
+template class armnn::BaseFloat32ElementwiseWorkload<armnn::AdditionQueueDescriptor, std::plus<float>>;
+template class armnn::BaseUint8ElementwiseWorkload<armnn::AdditionQueueDescriptor, std::plus<float>>;
+
+template class armnn::BaseFloat32ElementwiseWorkload<armnn::SubtractionQueueDescriptor, std::minus<float>>;
+template class armnn::BaseUint8ElementwiseWorkload<armnn::SubtractionQueueDescriptor, std::minus<float>>;
+
+template class armnn::BaseFloat32ElementwiseWorkload<armnn::MultiplicationQueueDescriptor, std::multiplies<float>>;
+template class armnn::BaseUint8ElementwiseWorkload<armnn::MultiplicationQueueDescriptor, std::multiplies<float>>;
+
+template class armnn::BaseFloat32ElementwiseWorkload<armnn::DivisionQueueDescriptor, std::divides<float>>;
+template class armnn::BaseUint8ElementwiseWorkload<armnn::DivisionQueueDescriptor, std::divides<float>>;
diff --git a/src/backends/reference/workloads/RefArithmeticWorkload.hpp b/src/backends/reference/workloads/RefElementwiseWorkload.hpp
similarity index 71%
rename from src/backends/reference/workloads/RefArithmeticWorkload.hpp
rename to src/backends/reference/workloads/RefElementwiseWorkload.hpp
index 7560617..156613a 100644
--- a/src/backends/reference/workloads/RefArithmeticWorkload.hpp
+++ b/src/backends/reference/workloads/RefElementwiseWorkload.hpp
@@ -17,13 +17,13 @@
           typename armnn::DataType DataType,
           typename ParentDescriptor,
           typename armnn::StringMapping::Id DebugString>
-class RefArithmeticWorkload
+class RefElementwiseWorkload
 {
     // Needs specialization. The default is empty on purpose.
 };
 
 template <typename ParentDescriptor, typename Functor>
-class BaseFloat32ArithmeticWorkload : public Float32Workload<ParentDescriptor>
+class BaseFloat32ElementwiseWorkload : public Float32Workload<ParentDescriptor>
 {
 public:
     using Float32Workload<ParentDescriptor>::Float32Workload;
@@ -33,21 +33,21 @@
 template <typename Functor,
           typename ParentDescriptor,
           typename armnn::StringMapping::Id DebugString>
-class RefArithmeticWorkload<Functor, armnn::DataType::Float32, ParentDescriptor, DebugString>
-    : public BaseFloat32ArithmeticWorkload<ParentDescriptor, Functor>
+class RefElementwiseWorkload<Functor, armnn::DataType::Float32, ParentDescriptor, DebugString>
+    : public BaseFloat32ElementwiseWorkload<ParentDescriptor, Functor>
 {
 public:
-    using BaseFloat32ArithmeticWorkload<ParentDescriptor, Functor>::BaseFloat32ArithmeticWorkload;
+    using BaseFloat32ElementwiseWorkload<ParentDescriptor, Functor>::BaseFloat32ElementwiseWorkload;
 
     virtual void Execute() const override
     {
-        using Parent = BaseFloat32ArithmeticWorkload<ParentDescriptor, Functor>;
+        using Parent = BaseFloat32ElementwiseWorkload<ParentDescriptor, Functor>;
         Parent::ExecuteImpl(StringMapping::Instance().Get(DebugString));
     }
 };
 
 template <typename ParentDescriptor, typename Functor>
-class BaseUint8ArithmeticWorkload : public Uint8Workload<ParentDescriptor>
+class BaseUint8ElementwiseWorkload : public Uint8Workload<ParentDescriptor>
 {
 public:
     using Uint8Workload<ParentDescriptor>::Uint8Workload;
@@ -57,64 +57,64 @@
 template <typename Functor,
           typename ParentDescriptor,
           typename armnn::StringMapping::Id DebugString>
-class RefArithmeticWorkload<Functor, armnn::DataType::QuantisedAsymm8, ParentDescriptor, DebugString>
-    : public BaseUint8ArithmeticWorkload<ParentDescriptor, Functor>
+class RefElementwiseWorkload<Functor, armnn::DataType::QuantisedAsymm8, ParentDescriptor, DebugString>
+    : public BaseUint8ElementwiseWorkload<ParentDescriptor, Functor>
 {
 public:
-    using BaseUint8ArithmeticWorkload<ParentDescriptor, Functor>::BaseUint8ArithmeticWorkload;
+    using BaseUint8ElementwiseWorkload<ParentDescriptor, Functor>::BaseUint8ElementwiseWorkload;
 
     virtual void Execute() const override
     {
-        using Parent = BaseUint8ArithmeticWorkload<ParentDescriptor, Functor>;
+        using Parent = BaseUint8ElementwiseWorkload<ParentDescriptor, Functor>;
         Parent::ExecuteImpl(StringMapping::Instance().Get(DebugString));
     }
 };
 
 using RefAdditionFloat32Workload =
-    RefArithmeticWorkload<std::plus<float>,
+    RefElementwiseWorkload<std::plus<float>,
                           DataType::Float32,
                           AdditionQueueDescriptor,
                           StringMapping::RefAdditionWorkload_Execute>;
 
 using RefAdditionUint8Workload =
-    RefArithmeticWorkload<std::plus<float>,
+    RefElementwiseWorkload<std::plus<float>,
                           DataType::QuantisedAsymm8,
                           AdditionQueueDescriptor,
                           StringMapping::RefAdditionWorkload_Execute>;
 
 
 using RefSubtractionFloat32Workload =
-    RefArithmeticWorkload<std::minus<float>,
+    RefElementwiseWorkload<std::minus<float>,
                           DataType::Float32,
                           SubtractionQueueDescriptor,
                           StringMapping::RefSubtractionWorkload_Execute>;
 
 using RefSubtractionUint8Workload =
-    RefArithmeticWorkload<std::minus<float>,
+    RefElementwiseWorkload<std::minus<float>,
                           DataType::QuantisedAsymm8,
                           SubtractionQueueDescriptor,
                           StringMapping::RefSubtractionWorkload_Execute>;
 
 using RefMultiplicationFloat32Workload =
-    RefArithmeticWorkload<std::multiplies<float>,
+    RefElementwiseWorkload<std::multiplies<float>,
                           DataType::Float32,
                           MultiplicationQueueDescriptor,
                           StringMapping::RefMultiplicationWorkload_Execute>;
 
 using RefMultiplicationUint8Workload =
-    RefArithmeticWorkload<std::multiplies<float>,
+    RefElementwiseWorkload<std::multiplies<float>,
                           DataType::QuantisedAsymm8,
                           MultiplicationQueueDescriptor,
                           StringMapping::RefMultiplicationWorkload_Execute>;
 
 using RefDivisionFloat32Workload =
-    RefArithmeticWorkload<std::divides<float>,
+    RefElementwiseWorkload<std::divides<float>,
                           DataType::Float32,
                           DivisionQueueDescriptor,
                           StringMapping::RefDivisionWorkload_Execute>;
 
 using RefDivisionUint8Workload =
-    RefArithmeticWorkload<std::divides<float>,
+    RefElementwiseWorkload<std::divides<float>,
                           DataType::QuantisedAsymm8,
                           DivisionQueueDescriptor,
                           StringMapping::RefDivisionWorkload_Execute>;
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 20e9a9f..86d8624 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -6,8 +6,8 @@
 #pragma once
 
 #include "RefConstantUint8Workload.hpp"
-#include "ArithmeticFunction.hpp"
-#include "RefArithmeticWorkload.hpp"
+#include "ElementwiseFunction.hpp"
+#include "RefElementwiseWorkload.hpp"
 #include "ConvImpl.hpp"
 #include "RefBaseConstantWorkload.hpp"
 #include "RefConvolution2dUint8Workload.hpp"