IVGCVSW-3808 Add ElementwiseBinaryLayer

!android-nn-driver:9329

 * Added ElementwiseBinaryLayer that can represent all ElementwiseBinary
   operations including Add, Div, Sub, Maximum, Mul and Minimum.
 * Updated Delegate to use ElementwiseBinaryLayer instead of the Add,
   Div, Sub, Maximum, Mul and Minimum layers.
 * Updated Deserializer to use ElementwiseBinaryLayer instead of the Add,
   Div, Sub, Maximum, Mul and Minimum layers.
 * Updated OnnxParser to use ElementwiseBinaryLayer instead of the Add
   layer.
 * Updated TfLiteParser to use ElementwiseBinaryLayer instead of the Add,
   Div, Sub, Maximum, Mul and Minimum layers.
 * Updated CL and Neon tests to use ElementwiseBinaryLayer.
 * Updated CL and Neon Backend Specific Optimizations to accept
   ElementBinaryLayers as well as Add, Div, Mul, Sub, Maximum and Minimum
   layers.

Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Signed-off-by: Mike Kelly <mike.kelly@arm.com>
Change-Id: I7cbb96b60eb01f0e2b57b0541016d48a08b86c75
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index a5015a7..cbc6723 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -189,6 +189,36 @@
             return IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported);
         case LayerType::Division:
             return IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+        case LayerType::ElementwiseBinary:
+        {
+            std::array<DataType, 7> supportedTypes =
+                    {
+                            DataType::Float32,
+                            DataType::Float16,
+                            DataType::QAsymmS8,
+                            DataType::QAsymmU8,
+                            DataType::QSymmS16,
+                            DataType::Signed32
+                    };
+
+            bool supported = true;
+            supported &= CheckSupportRule(TypeAnyOf(infos[0], supportedTypes), reasonIfUnsupported,
+                                          "Reference elementwise unary: input type not supported");
+
+            supported &= CheckSupportRule(TypeAnyOf(infos[1], supportedTypes), reasonIfUnsupported,
+                                          "Reference elementwise unary: input type not supported");
+
+            supported &= CheckSupportRule(TypeAnyOf(infos[2], supportedTypes), reasonIfUnsupported,
+                                          "Reference elementwise unary: output type not supported");
+
+            supported &= CheckSupportRule(TypesAreEqual(infos[0], infos[1]), reasonIfUnsupported,
+                                          "Reference elementwise unary: input types not matching");
+
+            supported &= CheckSupportRule(TypesAreEqual(infos[0], infos[2]), reasonIfUnsupported,
+                                          "Reference elementwise unary: input and output types not matching");
+
+            return supported;
+        }
         case LayerType::ElementwiseUnary:
             return IsElementwiseUnarySupported(infos[0],
                                                infos[1],
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index f0e9e35..8e1f68e 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #pragma once
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index bfe37d7..10f623e 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 #include <Layer.hpp>
@@ -302,6 +302,12 @@
                 return std::make_unique<RefDivisionWorkload<float>>(*divisionQueueDescriptor, info);
             }
         }
+        case LayerType::ElementwiseBinary:
+        {
+            auto elementwiseBinaryQueueDescriptor
+                    = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
+            return std::make_unique<RefElementwiseBinaryWorkload>(*elementwiseBinaryQueueDescriptor, info);
+        }
         case LayerType::ElementwiseUnary:
         {
             auto elementwiseUnaryQueueDescriptor
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index eb2ec2d..c23984c 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -67,6 +67,7 @@
         workloads/RefDepthwiseConvolution2dWorkload.cpp \
         workloads/RefDequantizeWorkload.cpp \
         workloads/RefDetectionPostProcessWorkload.cpp \
+        workloads/RefElementwiseBinaryWorkload.cpp \
         workloads/RefElementwiseWorkload.cpp \
         workloads/RefElementwiseUnaryWorkload.cpp \
         workloads/RefFakeQuantizationFloat32Workload.cpp \
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index 6ff5771..8bf414f 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -18,6 +18,7 @@
 #include <backendsCommon/test/DepthToSpaceEndToEndTestImpl.hpp>
 #include <backendsCommon/test/DequantizeEndToEndTestImpl.hpp>
 #include <backendsCommon/test/DetectionPostProcessEndToEndTestImpl.hpp>
+#include <backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp>
 #include <backendsCommon/test/ElementwiseUnaryEndToEndTestImpl.hpp>
 #include <backendsCommon/test/FillEndToEndTestImpl.hpp>
 #include <backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp>
@@ -185,7 +186,7 @@
 
     IConnectableLayer* input1 = net->AddInputLayer(0);
     IConnectableLayer* input2 = net->AddInputLayer(1);
-    IConnectableLayer* add    = net->AddAdditionLayer();
+    IConnectableLayer* add    = net->AddElementwiseBinaryLayer(ElementwiseBinaryDescriptor(BinaryOperation::Add));
     IConnectableLayer* output = net->AddOutputLayer(0);
 
     input1->GetOutputSlot(0).Connect(add->GetInputSlot(0));
@@ -347,7 +348,7 @@
 
     IConnectableLayer* input1 = net->AddInputLayer(0);
     IConnectableLayer* input2 = net->AddInputLayer(1);
-    IConnectableLayer* min    = net->AddMinimumLayer();
+    IConnectableLayer* min    = net->AddElementwiseBinaryLayer(ElementwiseBinaryDescriptor(BinaryOperation::Minimum));
     IConnectableLayer* output = net->AddOutputLayer(0);
 
     input1->GetOutputSlot(0).Connect(min->GetInputSlot(0));
@@ -1547,6 +1548,55 @@
 {
     armnn::experimental::StridedSlicedEndToEndTest<armnn::DataType::Float32>(defaultBackends, 3);
 }
+
+TEST_CASE("RefAddEndToEndTestFloat32")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::Add);
+}
+TEST_CASE("RefAddEndToEndTestUint8")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Add);
+}
+TEST_CASE("RefDivEndToEndTestFloat32")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::Div);
+}
+TEST_CASE("RefDivEndToEndTestUint8")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Div);
+}
+TEST_CASE("RefMulEndToEndTestFloat32")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::Mul);
+}
+TEST_CASE("RefMulEndToEndTestUint8")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Mul);
+}
+TEST_CASE("RefSubEndToEndTestFloat32")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::Sub);
+}
+TEST_CASE("RefSubEndToEndTestUint8")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Sub);
+}
+TEST_CASE("RefMaximumEndToEndTestFloat32")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::Maximum);
+}
+TEST_CASE("RefMaximumEndToEndTestUint8")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Maximum);
+}
+TEST_CASE("RefMinimumEndToEndTestFloat32")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::Minimum);
+}
+TEST_CASE("RefMinimumEndToEndTestUint8")
+{
+    ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Minimum);
+}
 #endif
 
 }
diff --git a/src/backends/reference/test/RefOptimizedNetworkTests.cpp b/src/backends/reference/test/RefOptimizedNetworkTests.cpp
index 7ca1e0e..7e8064f 100644
--- a/src/backends/reference/test/RefOptimizedNetworkTests.cpp
+++ b/src/backends/reference/test/RefOptimizedNetworkTests.cpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -53,7 +53,7 @@
     layer->GetOutputSlot(0).SetTensorInfo(desc);
 
     armnn::IConnectableLayer* prevLayer = layer;
-    layer = net->AddMultiplicationLayer("ml");
+    layer = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Mul, "ml");
 
     prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
     normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index de6c042..3592f22 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -1,5 +1,5 @@
 #
-# Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+# Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 # SPDX-License-Identifier: MIT
 #
 
@@ -108,6 +108,8 @@
     RefDequantizeWorkload.hpp
     RefDetectionPostProcessWorkload.cpp
     RefDetectionPostProcessWorkload.hpp
+    RefElementwiseBinaryWorkload.cpp
+    RefElementwiseBinaryWorkload.hpp
     RefElementwiseUnaryWorkload.cpp
     RefElementwiseUnaryWorkload.hpp
     RefFakeQuantizationFloat32Workload.cpp
diff --git a/src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp
new file mode 100644
index 0000000..5dc77f8
--- /dev/null
+++ b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp
@@ -0,0 +1,120 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefElementwiseBinaryWorkload.hpp"
+
+#include "Decoders.hpp"
+#include "ElementwiseFunction.hpp"
+#include "Encoders.hpp"
+#include "RefWorkloadUtils.hpp"
+#include "Maximum.hpp"
+#include "Minimum.hpp"
+
+#include <Profiling.hpp>
+
+#include <armnn/TypesUtils.hpp>
+
+#include <functional>
+
+namespace armnn
+{
+
+template<typename DataType>
+void ExecuteFunction(std::vector<ITensorHandle*> inputs,
+                     std::vector<ITensorHandle*> outputs,
+                     BinaryOperation operation)
+{
+    const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
+    const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]);
+    const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
+
+    const TensorShape& inShape0 = inputInfo0.GetShape();
+    const TensorShape& inShape1 = inputInfo1.GetShape();
+    const TensorShape& outShape = outputInfo.GetShape();
+
+    std::unique_ptr<Decoder<DataType>> input0 = MakeDecoder<DataType>(inputInfo0, inputs[0]->Map());
+    std::unique_ptr<Decoder<DataType>> input1 = MakeDecoder<DataType>(inputInfo1, inputs[1]->Map());
+    std::unique_ptr<Encoder<DataType>> output = MakeEncoder<DataType>(outputInfo, outputs[0]->Map());
+
+    using AddFunction     = ElementwiseBinaryFunction<std::plus<DataType>>;
+    using DivFunction     = ElementwiseBinaryFunction<std::divides<DataType>>;
+    using MaximumFunction = ElementwiseBinaryFunction<armnn::maximum<DataType>>;
+    using MinimumFunction = ElementwiseBinaryFunction<armnn::minimum<DataType>>;
+    using MulFunction     = ElementwiseBinaryFunction<std::multiplies<DataType>>;
+    using SubFunction     = ElementwiseBinaryFunction<std::minus<DataType>>;
+
+    switch (operation)
+    {
+        case BinaryOperation::Add:
+        {
+            AddFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+            break;
+        }
+        case BinaryOperation::Div:
+        {
+            DivFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+            break;
+        }
+        case BinaryOperation::Maximum:
+        {
+            MaximumFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+            break;
+        }
+        case BinaryOperation::Minimum:
+        {
+            MinimumFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+            break;
+        }
+        case BinaryOperation::Mul:
+        {
+            MulFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+            break;
+        }
+        case BinaryOperation::Sub:
+        {
+            SubFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+            break;
+        }
+        default:
+        {
+            throw InvalidArgumentException(std::string("Unsupported binary operation ") +
+                                           GetBinaryOperationAsCString(operation), CHECK_LOCATION());
+        }
+    }
+}
+
+RefElementwiseBinaryWorkload::RefElementwiseBinaryWorkload(const ElementwiseBinaryQueueDescriptor& desc,
+                                                         const WorkloadInfo& info)
+    : RefBaseWorkload<ElementwiseBinaryQueueDescriptor>(desc, info)
+{}
+
+void RefElementwiseBinaryWorkload::Execute() const
+{
+    Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefElementwiseBinaryWorkload::ExecuteAsync(ExecutionData& executionData)
+{
+
+    WorkingMemDescriptor* workingMemDescriptor = static_cast<WorkingMemDescriptor*>(executionData.m_Data);
+    Execute(workingMemDescriptor->m_Inputs, workingMemDescriptor->m_Outputs);
+}
+
+void RefElementwiseBinaryWorkload::Execute(std::vector<ITensorHandle*> inputs,
+                                           std::vector<ITensorHandle*> outputs) const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefElementwiseBinaryWorkload_Execute");
+
+    if (GetTensorInfo(inputs[0]).GetDataType() == DataType::Signed32)
+    {
+        ExecuteFunction<int32_t>(inputs, outputs, m_Data.m_Parameters.m_Operation);
+    }
+    else
+    {
+        ExecuteFunction<float>(inputs, outputs, m_Data.m_Parameters.m_Operation);
+    }
+}
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefElementwiseBinaryWorkload.hpp b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.hpp
new file mode 100644
index 0000000..37458a1
--- /dev/null
+++ b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "BaseIterator.hpp"
+
+#include "RefBaseWorkload.hpp"
+#include <armnn/backends/WorkloadData.hpp>
+
+namespace armnn
+{
+
+class RefElementwiseBinaryWorkload : public RefBaseWorkload<ElementwiseBinaryQueueDescriptor>
+{
+public:
+    using RefBaseWorkload<ElementwiseBinaryQueueDescriptor>::m_Data;
+
+    RefElementwiseBinaryWorkload(const ElementwiseBinaryQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+    void ExecuteAsync(ExecutionData& executionData)  override;
+
+private:
+    void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
+};
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index afed71b..dba880b 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -1,5 +1,5 @@
 //
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
 // SPDX-License-Identifier: MIT
 //
 
@@ -26,6 +26,7 @@
 #include "RefDetectionPostProcessWorkload.hpp"
 #include "RefDequantizeWorkload.hpp"
 #include "RefElementwiseWorkload.hpp"
+#include "RefElementwiseBinaryWorkload.hpp"
 #include "RefElementwiseUnaryWorkload.hpp"
 #include "RefFakeQuantizationFloat32Workload.hpp"
 #include "RefFillWorkload.hpp"