IVGCVSW-5379 'TfLiteDelegate: Implement the ElementWiseBinary operators'

* Implemented ADD operator
* Implemented FP32 unit tests for ADD operator

Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Change-Id: Id7238749308855bd2b2118f4b6e60e765815c38f
diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index aa48435..acce828 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -89,6 +89,8 @@
 set(armnnDelegate_unittest_sources)
 list(APPEND armnnDelegate_unittest_sources
         src/test/ArmnnDelegateTest.cpp
+        src/test/ElementwiseBinaryTest.cpp
+        src/test/ElementwiseBinaryTestHelper.hpp
         src/test/ElementwiseUnaryTest.cpp
         src/test/ElementwiseUnaryTestHelper.hpp)
 
diff --git a/delegate/src/DelegateUtils.hpp b/delegate/src/DelegateUtils.hpp
index 16dc8a8..fca6a6c 100644
--- a/delegate/src/DelegateUtils.hpp
+++ b/delegate/src/DelegateUtils.hpp
@@ -8,6 +8,7 @@
 #include <armnn/ArmNN.hpp>
 #include <armnn/BackendHelper.hpp>
 #include <armnn/utility/Assert.hpp>
+#include <armnn/utility/NumericCast.hpp>
 
 #include <tensorflow/lite/builtin_ops.h>
 #include <tensorflow/lite/c/builtin_op_data.h>
@@ -103,6 +104,198 @@
     return false;
 }
 
+TfLiteStatus Connect(armnn::IConnectableLayer* layer,
+                     TfLiteNode* tfLiteNode,
+                     armnnDelegate::DelegateData& data)
+{
+    ARMNN_ASSERT(tfLiteNode->inputs->size  == layer->GetNumInputSlots());
+    ARMNN_ASSERT(tfLiteNode->outputs->size == layer->GetNumOutputSlots());
+
+    // Connect the input slots
+    for (unsigned int inputIndex = 0; inputIndex < layer->GetNumInputSlots(); ++inputIndex)
+    {
+        data.m_OutputSlotForNode[tfLiteNode->inputs->data[inputIndex]]->Connect(layer->GetInputSlot(inputIndex));
+    }
+
+    // Prepare output slots
+    for (unsigned int outputIndex = 0; outputIndex < layer->GetNumOutputSlots(); ++outputIndex)
+    {
+        armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(outputIndex);
+        data.m_OutputSlotForNode[tfLiteNode->outputs->data[outputIndex]] = &outputSlot;
+    }
+    return kTfLiteOk;
+}
+
+armnn::IConnectableLayer* BroadcastTensor(const armnn::TensorInfo& inputInfo0,
+                                          const armnn::TensorInfo& inputInfo1,
+                                          armnn::IConnectableLayer* startLayer,
+                                          TfLiteContext* tfLiteContext,
+                                          TfLiteNode* tfLiteNode,
+                                          armnnDelegate::DelegateData& delegateData)
+{
+    unsigned int inputDimensions0 = inputInfo0.GetNumDimensions();
+    unsigned int inputDimensions1 = inputInfo1.GetNumDimensions();
+
+    if (inputDimensions0 == inputDimensions1)
+    {
+        auto status = Connect(startLayer, tfLiteNode, delegateData);
+        if(status == kTfLiteOk)
+        {
+            return startLayer;
+        }
+        else
+        {
+            return nullptr;
+        }
+    }
+
+    unsigned int biggerInputDimensions = std::max(inputDimensions0, inputDimensions1);
+    unsigned int dimDifference =
+        std::abs(armnn::numeric_cast<int>(inputDimensions0) - armnn::numeric_cast<int>(inputDimensions1));
+
+    bool input0IsSmaller = inputDimensions0 < inputDimensions1;
+    const armnn::TensorInfo& smallInfo = input0IsSmaller ? inputInfo0 : inputInfo1;
+    const armnn::TensorShape& smallShape = smallInfo.GetShape();
+
+    std::vector<unsigned int> reshapedDimensions(biggerInputDimensions, 1);
+    for (unsigned int i = dimDifference; i < biggerInputDimensions; ++i)
+    {
+        reshapedDimensions[i] = smallShape[i - dimDifference];
+    }
+
+    armnn::TensorInfo reshapedInfo = smallInfo;
+    reshapedInfo.SetShape(armnn::TensorShape{ armnn::numeric_cast<unsigned int>(reshapedDimensions.size()),
+                                              reshapedDimensions.data() });
+
+    armnn::ReshapeDescriptor reshapeDescriptor;
+    bool isSupported = false;
+    FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                               tfLiteContext,
+                               IsReshapeSupported,
+                               delegateData.m_Backends,
+                               isSupported,
+                               smallInfo,
+                               reshapedInfo,
+                               reshapeDescriptor);
+    if (!isSupported)
+    {
+        return nullptr;
+    }
+
+    ARMNN_ASSERT(delegateData.m_Network != nullptr);
+    // Add Reshape layer
+    reshapeDescriptor.m_TargetShape = reshapedInfo.GetShape();
+
+    armnn::IConnectableLayer* reshapeLayer = delegateData.m_Network->AddReshapeLayer(reshapeDescriptor);
+    ARMNN_ASSERT(reshapeLayer != nullptr);
+    reshapeLayer->GetOutputSlot(0).SetTensorInfo(reshapedInfo);
+
+    if (input0IsSmaller)
+    {
+        delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[0]]->Connect(reshapeLayer->GetInputSlot(0));
+        reshapeLayer->GetOutputSlot(0).Connect(startLayer->GetInputSlot(0));
+        delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[1]]->Connect(startLayer->GetInputSlot(1));
+    }
+    else
+    {
+        delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[1]]->Connect(reshapeLayer->GetInputSlot(0));
+        reshapeLayer->GetOutputSlot(0).Connect(startLayer->GetInputSlot(1));
+        delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[0]]->Connect(startLayer->GetInputSlot(0));
+    }
+
+    // Prepare output slots
+    for (unsigned int outputIndex = 0; outputIndex < startLayer->GetNumOutputSlots(); ++outputIndex)
+    {
+        armnn::IOutputSlot& outputSlot = startLayer->GetOutputSlot(outputIndex);
+        delegateData.m_OutputSlotForNode[tfLiteNode->outputs->data[outputIndex]] = &outputSlot;
+    }
+
+    return reshapeLayer;
+}
+
+TfLiteStatus FusedActivation(TfLiteContext* tfLiteContext,
+                             TfLiteNode* tfLiteNode,
+                             TfLiteFusedActivation activationType,
+                             armnn::IConnectableLayer* prevLayer,
+                             unsigned int outputSlotIndex,
+                             armnnDelegate::DelegateData& data)
+{
+
+    armnn::IOutputSlot& outputSlot = prevLayer->GetOutputSlot(outputSlotIndex);
+    const armnn::TensorInfo& activationOutputInfo = outputSlot.GetTensorInfo();
+
+    armnn::ActivationDescriptor activationDesc;
+
+    switch (activationType)
+    {
+        case kTfLiteActNone:
+        {
+            // No Activation
+            return kTfLiteOk;
+        }
+        case kTfLiteActRelu:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::ReLu;
+            break;
+        }
+        case kTfLiteActRelu1:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::BoundedReLu;
+            activationDesc.m_A = 1.0f;
+            activationDesc.m_B = -1.0f;
+            break;
+        }
+        case kTfLiteActRelu6:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::BoundedReLu;
+            activationDesc.m_A = 6.0f;
+            activationDesc.m_B = 0.0f;
+            break;
+        }
+        case kTfLiteActSigmoid:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::Sigmoid;
+            break;
+        }
+        case kTfLiteActTanh:
+        {
+            activationDesc.m_Function = armnn::ActivationFunction::TanH;
+            activationDesc.m_A = 1.0f;
+            activationDesc.m_B = 1.0f;
+            break;
+        }
+        default:
+            return kTfLiteError;
+    }
+
+    bool isSupported = false;
+    FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                               tfLiteContext,
+                               IsActivationSupported,
+                               data.m_Backends,
+                               isSupported,
+                               prevLayer->GetOutputSlot(0).GetTensorInfo(),
+                               activationOutputInfo,
+                               activationDesc);
+    if (!isSupported)
+    {
+        return kTfLiteError;
+    }
+    armnn::IConnectableLayer* activationLayer = data.m_Network->AddActivationLayer(activationDesc);
+
+    ARMNN_ASSERT(activationLayer != nullptr);
+    activationLayer->GetOutputSlot(0).SetTensorInfo(activationOutputInfo);
+
+    // Connect and prepare output slots
+    for (unsigned int outputIndex = 0; outputIndex < activationLayer->GetNumOutputSlots(); ++outputIndex)
+    {
+        data.m_OutputSlotForNode[tfLiteNode->outputs->data[outputIndex]]->Connect(activationLayer->GetInputSlot(0));
+        armnn::IOutputSlot& outputSlot = activationLayer->GetOutputSlot(outputIndex);
+        data.m_OutputSlotForNode[tfLiteNode->outputs->data[outputIndex]] = &outputSlot;
+    }
+    return kTfLiteOk;
+}
+
 armnn::TensorInfo GetTensorInfoForTfLiteTensor(const TfLiteTensor& tfLiteTensor)
 {
     armnn::DataType type;
@@ -162,13 +355,21 @@
         // get per-channel quantization parameters
         const auto* affineQuantization =
             reinterpret_cast<TfLiteAffineQuantization*>(tfLiteTensor.quantization.params);
-        std::vector<float> quantizationScales;
-        for (unsigned int i = 1; i < affineQuantization->scale->size; ++i)
+        if (affineQuantization->scale->size > 1)
         {
-            quantizationScales.push_back(affineQuantization->scale->data[i]);
+            std::vector<float> quantizationScales;
+            for (unsigned int i = 1; i < affineQuantization->scale->size; ++i)
+            {
+                quantizationScales.push_back(affineQuantization->scale->data[i]);
+            }
+            ret.SetQuantizationScales(quantizationScales);
+            ret.SetQuantizationDim(armnn::MakeOptional<unsigned int>(affineQuantization->quantized_dimension));
         }
-        ret.SetQuantizationScales(quantizationScales);
-        ret.SetQuantizationDim(armnn::MakeOptional<unsigned int>(affineQuantization->quantized_dimension));
+        else
+        {
+            ret.SetQuantizationScale(affineQuantization->scale->data[0]);
+            ret.SetQuantizationOffset(affineQuantization->zero_point->data[0]);
+        }
     }
     else
     {
@@ -180,26 +381,4 @@
     return ret;
 }
 
-TfLiteStatus Connect(armnn::IConnectableLayer& layer,
-                     TfLiteNode* tfLiteNode,
-                     armnnDelegate::DelegateData& data)
-{
-    ARMNN_ASSERT(tfLiteNode->inputs->size  == layer.GetNumInputSlots());
-    ARMNN_ASSERT(tfLiteNode->outputs->size == layer.GetNumOutputSlots());
-
-    // connect the input slots
-    for (unsigned int inputIndex = 0; inputIndex < layer.GetNumInputSlots(); ++inputIndex)
-    {
-        data.m_OutputSlotForNode[tfLiteNode->inputs->data[inputIndex]]->Connect(layer.GetInputSlot(inputIndex));
-    }
-
-    // prepare output slots
-    for (unsigned int outputIndex = 0; outputIndex < layer.GetNumOutputSlots(); ++outputIndex)
-    {
-        armnn::IOutputSlot& outputSlot = layer.GetOutputSlot(outputIndex);
-        data.m_OutputSlotForNode[tfLiteNode->outputs->data[outputIndex]] = &outputSlot;
-    }
-    return kTfLiteOk;
-}
-
 } // namespace anonymous
diff --git a/delegate/src/ElementwiseBinary.hpp b/delegate/src/ElementwiseBinary.hpp
index ff24012..a22d9f5 100644
--- a/delegate/src/ElementwiseBinary.hpp
+++ b/delegate/src/ElementwiseBinary.hpp
@@ -5,6 +5,8 @@
 
 #pragma once
 
+#include "DelegateUtils.hpp"
+
 #include <tensorflow/lite/builtin_ops.h>
 #include <tensorflow/lite/c/builtin_op_data.h>
 #include <tensorflow/lite/c/common.h>
@@ -13,13 +15,134 @@
 namespace armnnDelegate
 {
 
+TfLiteStatus ValidateAddOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 const armnn::TensorInfo& inputInfo1,
+                                 const armnn::TensorInfo& inputInfo2,
+                                 const armnn::TensorInfo& outputInfo)
+{
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsAdditionSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputInfo1,
+                                   inputInfo2,
+                                   outputTensorInfo);
+    };
+
+    validateFunc(outputInfo, isSupported);
+    return isSupported ? kTfLiteOk : kTfLiteError;
+}
+
+armnn::IConnectableLayer* AddAdditionLayer(DelegateData& delegateData)
+{
+
+    if (!delegateData.m_Network)
+    {
+        return nullptr;
+    }
+
+    return delegateData.m_Network->AddAdditionLayer();
+}
+
 TfLiteStatus VisitElementwiseBinaryOperator(DelegateData& delegateData,
                                             TfLiteContext* tfLiteContext,
                                             TfLiteNode* tfLiteNode,
                                             int nodeIndex,
                                             int32_t elementwiseBinaryOperatorCode)
 {
-    return kTfLiteError;
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor0 = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (IsDynamicTensor(tfLiteInputTensor0))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            elementwiseBinaryOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteInputTensor1 = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (IsDynamicTensor(tfLiteInputTensor1))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            elementwiseBinaryOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            elementwiseBinaryOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo0 = GetTensorInfoForTfLiteTensor(tfLiteInputTensor0);
+    const armnn::TensorInfo& inputTensorInfo1 = GetTensorInfoForTfLiteTensor(tfLiteInputTensor1);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    if (!delegateData.m_Network)
+    {
+        switch(elementwiseBinaryOperatorCode)
+        {
+            case kTfLiteBuiltinAdd:
+                return ValidateAddOperator(delegateData,
+                                           tfLiteContext,
+                                           inputTensorInfo0,
+                                           inputTensorInfo1,
+                                           outputTensorInfo);
+            default:
+                return kTfLiteError;
+        }
+    }
+
+    armnn::IConnectableLayer* elementwiseBinaryLayer = nullptr;
+
+    switch(elementwiseBinaryOperatorCode)
+    {
+        case kTfLiteBuiltinAdd:
+            elementwiseBinaryLayer = AddAdditionLayer(delegateData);
+            break;
+        default:
+            return kTfLiteError;
+    }
+    ARMNN_ASSERT(elementwiseBinaryLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = elementwiseBinaryLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    auto reshapeLayer = BroadcastTensor(inputTensorInfo0,
+                                        inputTensorInfo1,
+                                        elementwiseBinaryLayer,
+                                        tfLiteContext,
+                                        tfLiteNode,
+                                        delegateData);
+    if (!reshapeLayer)
+    {
+        return kTfLiteError;
+    }
+
+    auto* tfLiteNodeParameters = reinterpret_cast<TfLiteAddParams*>(tfLiteNode->builtin_data);
+    if (!tfLiteNodeParameters)
+    {
+        // No Activation
+        return kTfLiteOk;
+    }
+    // Check activation
+    TfLiteFusedActivation activationType = tfLiteNodeParameters->activation;
+    return FusedActivation(tfLiteContext, tfLiteNode, activationType, reshapeLayer, 0, delegateData);
 }
 
 } // namespace armnnDelegate
diff --git a/delegate/src/ElementwiseUnary.hpp b/delegate/src/ElementwiseUnary.hpp
index 7527fa1..f2f5301 100644
--- a/delegate/src/ElementwiseUnary.hpp
+++ b/delegate/src/ElementwiseUnary.hpp
@@ -77,7 +77,7 @@
     outputSlot.SetTensorInfo(outputTensorInfo);
 
     // Connect
-    return Connect(*layer, tfLiteNode, delegateData);
+    return Connect(layer, tfLiteNode, delegateData);
 }
 
 } // namespace armnnDelegate
diff --git a/delegate/src/test/ArmnnDelegateTest.cpp b/delegate/src/test/ArmnnDelegateTest.cpp
index fdf786f..7cec70b 100644
--- a/delegate/src/test/ArmnnDelegateTest.cpp
+++ b/delegate/src/test/ArmnnDelegateTest.cpp
@@ -7,6 +7,7 @@
 #include <doctest/doctest.h>
 
 #include <armnn_delegate.hpp>
+#include "ElementwiseUnaryTestHelper.hpp"
 
 #include "tensorflow/lite/kernels/builtin_op_kernels.h"
 #include <tensorflow/lite/interpreter.h>
@@ -19,30 +20,31 @@
 
 TEST_CASE ("ArmnnDelegate Registered")
 {
-    std::unique_ptr<tflite::impl::Interpreter> tfLiteInterpreter;
-    tfLiteInterpreter.reset(new tflite::impl::Interpreter);
+    using namespace tflite;
+    auto tfLiteInterpreter =  std::make_unique<Interpreter>();
 
-    // Create the network
     tfLiteInterpreter->AddTensors(3);
-    tfLiteInterpreter->SetInputs({0});
+    tfLiteInterpreter->SetInputs({0, 1});
     tfLiteInterpreter->SetOutputs({2});
 
-    TfLiteQuantizationParams quantizationParams;
-    tfLiteInterpreter->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, quantizationParams);
-    tfLiteInterpreter->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, quantizationParams);
-    tfLiteInterpreter->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3}, quantizationParams);
-    TfLiteRegistration* nodeRegistration = tflite::ops::builtin::Register_ABS();
-    void* data = malloc(sizeof(int));
+    tfLiteInterpreter->SetTensorParametersReadWrite(0, kTfLiteFloat32, "input1", {1,2,2,1}, TfLiteQuantization());
+    tfLiteInterpreter->SetTensorParametersReadWrite(1, kTfLiteFloat32, "input2", {1,2,2,1}, TfLiteQuantization());
+    tfLiteInterpreter->SetTensorParametersReadWrite(2, kTfLiteFloat32, "output", {1,2,2,1}, TfLiteQuantization());
 
-    tfLiteInterpreter->AddNodeWithParameters({0}, {2}, nullptr, 0, data, nodeRegistration);
+    tflite::ops::builtin::BuiltinOpResolver opResolver;
+    const TfLiteRegistration* opRegister = opResolver.FindOp(BuiltinOperator_ADD, 1);
+    tfLiteInterpreter->AddNodeWithParameters({0, 1}, {2}, "", 0, nullptr, opRegister);
 
     // create the Armnn Delegate
-    auto delegateOptions = TfLiteArmnnDelegateOptionsDefault();
-    auto delegate = TfLiteArmnnDelegateCreate(delegateOptions);
-    auto status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(delegate));
+    std::vector<armnn::BackendId> backends = { armnn::Compute::CpuRef };
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                       theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                        armnnDelegate::TfLiteArmnnDelegateDelete);
+
+    auto status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
     CHECK(status == kTfLiteOk);
     CHECK(tfLiteInterpreter != nullptr);
-
 }
 
 }
diff --git a/delegate/src/test/ElementwiseBinaryTest.cpp b/delegate/src/test/ElementwiseBinaryTest.cpp
new file mode 100644
index 0000000..bd4019a
--- /dev/null
+++ b/delegate/src/test/ElementwiseBinaryTest.cpp
@@ -0,0 +1,169 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ElementwiseBinaryTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+TEST_SUITE("ElementwiseBinaryTest")
+{
+
+TEST_CASE ("Add_Float32_GpuAcc_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    // Set input data
+    std::vector<int32_t> input0Shape { 2, 2, 2, 3 };
+    std::vector<int32_t> input1Shape { 2, 2, 2, 3 };
+    std::vector<int32_t> outputShape { 2, 2, 2, 3 };
+
+    std::vector<float> input0Values =
+    {
+        0.0f, 2.0f, 1.0f,
+        0.2f, 1.0f, 2.0f,
+
+        1.0f, 2.0f, 1.0f,
+        0.2f, 1.0f, 2.0f,
+
+        0.0f, 2.0f, 1.0f,
+        4.2f, 1.0f, 2.0f,
+
+        0.0f, 0.0f, 1.0f,
+        0.2f, 1.0f, 2.0f,
+
+    };
+
+    std::vector<float> input1Values =
+    {
+        1.0f, 2.0f,  1.0f,
+        0.0f, 1.0f,  2.0f,
+
+        1.0f, 2.0f, -2.0f,
+        0.2f, 1.0f,  2.0f,
+
+        0.0f, 2.0f,  1.0f,
+        4.2f, 0.0f, -3.0f,
+
+        0.0f, 0.0f,  1.0f,
+        0.7f, 1.0f,  5.0f,
+    };
+
+    std::vector<float> expectedOutputValues =
+    {
+        1.0f, 4.0f,  2.0f,
+        0.2f, 2.0f,  4.0f,
+
+        2.0f, 4.0f, -1.0f,
+        0.4f, 2.0f,  4.0f,
+
+        0.0f, 4.0f,  2.0f,
+        8.4f, 1.0f, -1.0f,
+
+        0.0f, 0.0f,  2.0f,
+        0.9f, 2.0f,  7.0f,
+    };
+
+
+    ElementwiseBinaryFP32Test(tflite::BuiltinOperator_ADD,
+                              tflite::ActivationFunctionType_NONE,
+                              backends,
+                              input0Shape,
+                              input1Shape,
+                              outputShape,
+                              input0Values,
+                              input1Values,
+                              expectedOutputValues);
+}
+
+TEST_CASE ("Add_Broadcast_Float32_GpuAcc_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    // Set input data
+    std::vector<int32_t> input0Shape { 1, 3, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 1, 2, 3 };
+    std::vector<int32_t> outputShape { 1, 3, 2, 3 };
+
+    std::vector<float> input0Values
+    {
+        0.0f,
+        1.0f,
+
+        2.0f,
+        3.0f,
+
+        4.0f,
+        5.0f,
+    };
+    std::vector<float> input1Values
+    {
+        0.5f, 1.5f, 2.5f,
+        3.5f, 4.5f, 5.5f,
+    };
+    // Set output data
+    std::vector<float> expectedOutputValues
+    {
+        0.5f, 1.5f, 2.5f,
+        4.5f, 5.5f, 6.5f,
+
+        2.5f, 3.5f, 4.5f,
+        6.5f, 7.5f, 8.5f,
+
+        4.5f, 5.5f, 6.5f,
+        8.5f, 9.5f, 10.5f,
+    };
+    ElementwiseBinaryFP32Test(tflite::BuiltinOperator_ADD,
+                              tflite::ActivationFunctionType_NONE,
+                              backends,
+                              input0Shape,
+                              input1Shape,
+                              outputShape,
+                              input0Values,
+                              input1Values,
+                              expectedOutputValues);
+}
+
+TEST_CASE ("Add_ActivationRELU_Float32_GpuAcc_Test")
+{
+    // Create the ArmNN Delegate
+    std::vector<armnn::BackendId> backends = { armnn::Compute::GpuAcc,
+                                               armnn::Compute::CpuRef };
+    // Set input data
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> outputShape { 1, 2, 2, 1 };
+
+    std::vector<float> input0Values { 4.0f, 0.8f, 0.7f, -0.8f };
+    std::vector<float> input1Values { 0.7f, -1.2f, 0.8f, 0.5f };
+    // Set output data
+    std::vector<float> expectedOutputValues { 4.7f, 0.0f, 1.5f, 0.0f };
+    ElementwiseBinaryFP32Test(tflite::BuiltinOperator_ADD,
+                              tflite::ActivationFunctionType_RELU,
+                              backends,
+                              input0Shape,
+                              input1Shape,
+                              outputShape,
+                              input0Values,
+                              input1Values,
+                              expectedOutputValues);
+}
+
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/test/ElementwiseBinaryTestHelper.hpp b/delegate/src/test/ElementwiseBinaryTestHelper.hpp
new file mode 100644
index 0000000..72f9f85
--- /dev/null
+++ b/delegate/src/test/ElementwiseBinaryTestHelper.hpp
@@ -0,0 +1,211 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateElementwiseBinaryTfLiteModel(tflite::BuiltinOperator binaryOperatorCode,
+                                                     tflite::ActivationFunctionType activationType,
+                                                     tflite::TensorType tensorType,
+                                                     const std::vector <int32_t>& input0TensorShape,
+                                                     const std::vector <int32_t>& input1TensorShape,
+                                                     const std::vector <int32_t>& outputTensorShape)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input0TensorShape.data(),
+                                                                      input0TensorShape.size()),
+                              tensorType, 0);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input1TensorShape.data(),
+                                                                      input1TensorShape.size()),
+                              tensorType, 0);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_NONE;
+    flatbuffers::Offset<void> operatorBuiltinOptions = 0;
+    switch (binaryOperatorCode)
+    {
+        case BuiltinOperator_ADD:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_AddOptions;
+            operatorBuiltinOptions = CreateAddOptions(flatBufferBuilder, activationType).Union();
+            break;
+        }
+        case BuiltinOperator_DIV:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_DivOptions;
+            operatorBuiltinOptions = CreateDivOptions(flatBufferBuilder, activationType).Union();
+            break;
+        }
+        case BuiltinOperator_MUL:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_MulOptions;
+            operatorBuiltinOptions = CreateMulOptions(flatBufferBuilder, activationType).Union();
+            break;
+        }
+        case BuiltinOperator_SUB:
+        {
+            operatorBuiltinOptionsType = BuiltinOptions_SubOptions;
+            operatorBuiltinOptions = CreateSubOptions(flatBufferBuilder, activationType).Union();
+            break;
+        }
+        default:
+            break;
+    }
+    const std::vector<int32_t> operatorInputs{ {0, 1} };
+    const std::vector<int32_t> operatorOutputs{{2}};
+    flatbuffers::Offset <Operator> elementwiseBinaryOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1} };
+    const std::vector<int> subgraphOutputs{{2}};
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&elementwiseBinaryOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Elementwise Binary Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, binaryOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+void ElementwiseBinaryFP32Test(tflite::BuiltinOperator binaryOperatorCode,
+                               tflite::ActivationFunctionType activationType,
+                               std::vector<armnn::BackendId>& backends,
+                               std::vector<int32_t>& input0Shape,
+                               std::vector<int32_t>& input1Shape,
+                               std::vector<int32_t>& outputShape,
+                               std::vector<float>& input0Values,
+                               std::vector<float>& input1Values,
+                               std::vector<float>& expectedOutputValues)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateElementwiseBinaryTfLiteModel(binaryOperatorCode,
+                                                                       activationType,
+                                                                      ::tflite::TensorType_FLOAT32,
+                                                                      input0Shape,
+                                                                      input1Shape,
+                                                                      outputShape);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    auto tfLiteDelegateInput0Id = tfLiteInterpreter->inputs()[0];
+    auto tfLiteDelageInput0Data = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateInput0Id);
+    for (unsigned int i = 0; i < input0Values.size(); ++i)
+    {
+        tfLiteDelageInput0Data[i] = input0Values[i];
+    }
+
+    auto tfLiteDelegateInput1Id = tfLiteInterpreter->inputs()[1];
+    auto tfLiteDelageInput1Data = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateInput1Id);
+    for (unsigned int i = 0; i < input1Values.size(); ++i)
+    {
+        tfLiteDelageInput1Data[i] = input1Values[i];
+    }
+
+    auto armnnDelegateInput0Id = armnnDelegateInterpreter->inputs()[0];
+    auto armnnDelegateInput0Data = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateInput0Id);
+    for (unsigned int i = 0; i < input0Values.size(); ++i)
+    {
+        armnnDelegateInput0Data[i] = input0Values[i];
+    }
+
+    auto armnnDelegateInput1Id = armnnDelegateInterpreter->inputs()[1];
+    auto armnnDelegateInput1Data = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateInput1Id);
+    for (unsigned int i = 0; i < input1Values.size(); ++i)
+    {
+        armnnDelegateInput1Data[i] = input1Values[i];
+    }
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<float>(armnnDelegateOutputId);
+    for (size_t i = 0; i < expectedOutputValues.size(); i++)
+    {
+        CHECK(expectedOutputValues[i] == armnnDelegateOutputData[i]);
+        CHECK(tfLiteDelageOutputData[i] == expectedOutputValues[i]);
+        CHECK(tfLiteDelageOutputData[i] == armnnDelegateOutputData[i]);
+    }
+
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
+
+
+
+
diff --git a/delegate/src/test/ElementwiseUnaryTestHelper.hpp b/delegate/src/test/ElementwiseUnaryTestHelper.hpp
index 4d45f4e..b4a55cb 100644
--- a/delegate/src/test/ElementwiseUnaryTestHelper.hpp
+++ b/delegate/src/test/ElementwiseUnaryTestHelper.hpp
@@ -97,12 +97,15 @@
               (&tfLiteInterpreter) == kTfLiteOk);
     CHECK(tfLiteInterpreter != nullptr);
     CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
     // Create the ArmNN Delegate
     armnnDelegate::DelegateOptions delegateOptions(backends);
-    auto armnnDelegate = TfLiteArmnnDelegateCreate(delegateOptions);
-    CHECK(armnnDelegate != nullptr);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                        theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                         armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
     // Modify armnnDelegateInterpreter to use armnnDelegate
-    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(armnnDelegate) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
 
     // Set input data
     auto tfLiteDelegateInputId = tfLiteInterpreter->inputs()[0];