IVGCVSW-5486 TfLiteDelegate: Implement Concat and Mean operators

 * Implemented Concatenation & Mean operator.
 * Added unit tests for Concatenation & Mean operator.
 * Added CompareOutputData function to TestUtils.hpp.

Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com>
Change-Id: I31b7b1517a9ce041c3269f69f16a419f967d0fb0
diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index 6c238fa..1ea0cdd 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -107,6 +107,8 @@
         src/test/ArmnnDelegateTest.cpp
         src/test/ComparisonTest.cpp
         src/test/ComparisonTestHelper.hpp
+        src/test/ControlTest.cpp
+        src/test/ControlTestHelper.hpp
         src/test/Convolution2dTest.cpp
         src/test/ConvolutionTestHelper.hpp
         src/test/DepthwiseConvolution2dTest.cpp
diff --git a/delegate/src/Control.hpp b/delegate/src/Control.hpp
index 437b224..a964514 100644
--- a/delegate/src/Control.hpp
+++ b/delegate/src/Control.hpp
@@ -10,24 +10,316 @@
 #include <tensorflow/lite/builtin_ops.h>
 #include <tensorflow/lite/c/builtin_op_data.h>
 #include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/kernels/internal/tensor_ctypes.h>
 #include <tensorflow/lite/minimal_logging.h>
 
+#include <algorithm>
+#include <iterator>
+#include <string>
+#include <vector>
+
 namespace armnnDelegate
 {
 
+void SetupConcatViewOrigin(const armnn::TensorInfo& inputTensorInfo,
+                           armnn::OriginsDescriptor& concatDescriptor,
+                           const unsigned int concatAxis,
+                           unsigned int inputIndex,
+                           unsigned int& mergeDimOrigin)
+{
+    const uint32_t inputRank = concatDescriptor.GetNumDimensions();
+
+    // double check dimensions of the tensors
+    if (inputTensorInfo.GetNumDimensions() != inputRank)
+    {
+        throw armnn::ParseException("The number of dimensions for input tensors "
+                                    "of the concatenation operator should be: " + std::to_string(inputRank));
+    }
+
+    for (unsigned int j = 0; j < concatAxis; ++j)
+    {
+        concatDescriptor.SetViewOriginCoord(inputIndex, j, 0);
+    }
+
+    concatDescriptor.SetViewOriginCoord(inputIndex, concatAxis, mergeDimOrigin);
+    mergeDimOrigin += inputTensorInfo.GetShape()[concatAxis];
+
+    for (unsigned int j = concatAxis + 1; j < inputRank; ++j)
+    {
+        concatDescriptor.SetViewOriginCoord(inputIndex, j, 0);
+    }
+}
+
+TfLiteStatus VisitConcatenationOperator(DelegateData& delegateData,
+                                        TfLiteContext* tfLiteContext,
+                                        TfLiteNode* tfLiteNode,
+                                        int nodeIndex,
+                                        int32_t tfLiteConcatOperatorCode)
+{
+    unsigned int numInputs = tfLiteNode->inputs->size;
+    if (numInputs < 2)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext, "TfLiteArmnnDelegate: Minimum number of inputs (%d != %d) in node #%d",
+            2, numInputs, nodeIndex);
+        return kTfLiteError;
+    }
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+
+    std::vector<armnn::TensorInfo> inputTensorInfos;
+    for (unsigned int i = 0; i < numInputs; ++i)
+    {
+        const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[i]];
+        if(!IsValid(&tfLiteInputTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Invalid input tensor in operator #%d node #%d: ",
+                tfLiteConcatOperatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+        if (IsDynamicTensor(tfLiteInputTensor))
+        {
+            TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext,
+                "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+                tfLiteConcatOperatorCode, nodeIndex);
+            return kTfLiteError;
+        }
+
+        armnn::TensorInfo inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+        inputTensorInfos.emplace_back(inputTensorInfo);
+    }
+
+    // Convert input tensors to const armnn::TensorInfo* type for FORWARD_LAYER_SUPPORT_FUNC.
+    std::vector<const armnn::TensorInfo*> inputConstTensorInfos;
+    std::transform(inputTensorInfos.begin(),
+                   inputTensorInfos.end(),
+                   std::back_inserter(inputConstTensorInfos),
+                   [](armnn::TensorInfo& t)->const armnn::TensorInfo*{ return &t; });
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if(!IsValid(&tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid output tensor in operator #%d node #%d: ",
+            tfLiteConcatOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            tfLiteConcatOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    // Setup OriginsDescriptor, axis and view origin
+    unsigned int numConcatView = static_cast<unsigned int>(numInputs);
+    uint32_t inputRank = tfLiteTensors[tfLiteNode->inputs->data[0]].dims->size;
+
+    auto* concatenationParameters = reinterpret_cast<TfLiteConcatenationParams*>(tfLiteNode->builtin_data);
+    const unsigned int concatDimInput = static_cast<unsigned int>(
+            (static_cast<int>(inputRank) + concatenationParameters->axis) % static_cast<int>(inputRank));
+
+    armnn::OriginsDescriptor concatDescriptor(static_cast<uint32_t>(numConcatView), inputRank);
+    concatDescriptor.SetConcatAxis(concatDimInput);
+
+    unsigned int mergeDimOrigin = 0;
+    for (unsigned int viewIndex = 0; viewIndex < numConcatView; ++viewIndex)
+    {
+        armnn::TensorInfo inputTensorInfo = GetTensorInfoForTfLiteTensor(
+                tfLiteTensors[tfLiteNode->inputs->data[viewIndex]]);
+
+        // Sets up concatDescriptor view origin
+        SetupConcatViewOrigin(inputTensorInfo, concatDescriptor, concatDimInput, viewIndex, mergeDimOrigin);
+    }
+
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    // Check if supported
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsConcatSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputConstTensorInfos,
+                                   outputTensorInfo,
+                                   concatDescriptor);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Setup layer and connect.
+    armnn::IConnectableLayer* concatenationLayer = delegateData.m_Network->AddConcatLayer(concatDescriptor);
+    ARMNN_ASSERT(concatenationLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = concatenationLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+    Connect(concatenationLayer, tfLiteNode, delegateData);
+
+    if (!concatenationParameters)
+    {
+        // No Activation
+        return kTfLiteOk;
+    }
+
+    // Check activation
+    TfLiteFusedActivation activationType = concatenationParameters->activation;
+    return FusedActivation(tfLiteContext, tfLiteNode, activationType, concatenationLayer, 0, delegateData);
+}
+
+TfLiteStatus VisitMeanOperator(DelegateData& delegateData,
+                               TfLiteContext* tfLiteContext,
+                               TfLiteNode* tfLiteNode,
+                               int nodeIndex,
+                               int32_t tfLiteMeanOperatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if(!IsValid(&tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid input tensor in operator #%d node #%d: ",
+            tfLiteMeanOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteInputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic input tensors are not supported in operator #%d node #%d: ",
+            tfLiteMeanOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteAxisTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if(!IsValid(&tfLiteAxisTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid axis tensor in operator #%d node #%d: ",
+            tfLiteMeanOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteAxisTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic axis tensors are not supported in operator #%d node #%d: ",
+            tfLiteMeanOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if(!IsValid(&tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Invalid output tensor in operator #%d node #%d: ",
+            tfLiteAxisTensor, nodeIndex);
+        return kTfLiteError;
+    }
+    if (IsDynamicTensor(tfLiteOutputTensor))
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Dynamic output tensors are not supported in operator #%d node #%d: ",
+            tfLiteMeanOperatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo =  GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& axisTensorInfo =   GetTensorInfoForTfLiteTensor(tfLiteAxisTensor);
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    auto* axisTensorData = tflite::GetTensorData<int32_t>(&tfLiteAxisTensor);
+
+    std::vector<int32_t> axis;
+    // Add axis data to vector to be converter to unsigned int and assigned to descriptor axis.
+    for (unsigned int i = 0; i < axisTensorInfo.GetNumElements(); ++i)
+    {
+        axis.emplace_back(axisTensorData[i]);
+    }
+
+    // Convert the axis to unsigned int and remove duplicates.
+    unsigned int rank = inputTensorInfo.GetNumDimensions();
+    std::set<unsigned int> uniqueAxis;
+    std::transform(axis.begin(),
+                   axis.end(),
+                   std::inserter(uniqueAxis, uniqueAxis.begin()),
+                   [rank](int i)->unsigned int{ return (i + rank) % rank; });
+
+    // Setup MeanDescriptor and assign axis and keepDims
+    armnn::MeanDescriptor desc;
+    desc.m_Axis.assign(uniqueAxis.begin(), uniqueAxis.end());
+    desc.m_KeepDims = inputTensorInfo.GetNumDimensions() == outputTensorInfo.GetNumDimensions() ? true : false;
+
+    // Check if supported
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsMeanSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   desc);
+    };
+
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Setup layer and connect.
+    armnn::IConnectableLayer* meanLayer = delegateData.m_Network->AddMeanLayer(desc);
+    ARMNN_ASSERT(meanLayer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = meanLayer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+    return Connect(meanLayer, tfLiteNode, delegateData);
+}
+
 TfLiteStatus VisitControlOperator(DelegateData& delegateData,
                                   TfLiteContext* tfLiteContext,
                                   TfLiteNode* tfLiteNode,
                                   int nodeIndex,
-                                  int32_t controlOperatorCode)
+                                  int32_t operatorCode)
 {
     armnn::IgnoreUnused(delegateData,
                         tfLiteContext,
                         tfLiteNode,
                         nodeIndex,
-                        controlOperatorCode);
-
-    return kTfLiteError;
+                        operatorCode);
+                        
+    switch(operatorCode)
+    {
+        case kTfLiteBuiltinConcatenation:
+            return VisitConcatenationOperator(delegateData, tfLiteContext, tfLiteNode, nodeIndex, operatorCode);
+        case kTfLiteBuiltinMean:
+            return VisitMeanOperator(delegateData, tfLiteContext, tfLiteNode, nodeIndex, operatorCode);
+        default:
+            return kTfLiteError;
+    }
 }
 
 } // namespace armnnDelegate
diff --git a/delegate/src/test/ControlTest.cpp b/delegate/src/test/ControlTest.cpp
new file mode 100644
index 0000000..43491be
--- /dev/null
+++ b/delegate/src/test/ControlTest.cpp
@@ -0,0 +1,420 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ControlTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+// CONCATENATION Operator
+void ConcatUint8TwoInputsTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 4, 2 };
+
+    // Set input and output data
+    std::vector<std::vector<uint8_t>> inputValues;
+    std::vector<uint8_t> inputValue1 { 0, 1, 2, 3 }; // Lower bounds
+    std::vector<uint8_t> inputValue2 { 252, 253, 254, 255 }; // Upper bounds
+    inputValues.push_back(inputValue1);
+    inputValues.push_back(inputValue2);
+
+    std::vector<uint8_t> expectedOutputValues { 0, 1, 2, 3, 252, 253, 254, 255 };
+
+    ConcatenationTest<uint8_t>(tflite::BuiltinOperator_CONCATENATION,
+                               ::tflite::TensorType_UINT8,
+                               backends,
+                               inputShape,
+                               expectedOutputShape,
+                               inputValues,
+                               expectedOutputValues);
+}
+
+void ConcatInt16TwoInputsTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 4, 2 };
+
+    std::vector<std::vector<int16_t>> inputValues;
+    std::vector<int16_t> inputValue1 { -32768, -16384, -1, 0 };
+    std::vector<int16_t> inputValue2 { 1, 2, 16384, 32767 };
+    inputValues.push_back(inputValue1);
+    inputValues.push_back(inputValue2);
+
+    std::vector<int16_t> expectedOutputValues { -32768, -16384, -1, 0, 1, 2, 16384, 32767};
+
+    ConcatenationTest<int16_t>(tflite::BuiltinOperator_CONCATENATION,
+                               ::tflite::TensorType_INT16,
+                               backends,
+                               inputShape,
+                               expectedOutputShape,
+                               inputValues,
+                               expectedOutputValues);
+}
+
+void ConcatFloat32TwoInputsTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 4, 2 };
+
+    std::vector<std::vector<float>> inputValues;
+    std::vector<float> inputValue1 { -127.f, -126.f, -1.f, 0.f };
+    std::vector<float> inputValue2 { 1.f, 2.f, 126.f, 127.f };
+    inputValues.push_back(inputValue1);
+    inputValues.push_back(inputValue2);
+
+    std::vector<float> expectedOutputValues { -127.f, -126.f, -1.f, 0.f, 1.f, 2.f, 126.f, 127.f };
+
+    ConcatenationTest<float>(tflite::BuiltinOperator_CONCATENATION,
+                             ::tflite::TensorType_FLOAT32,
+                             backends,
+                             inputShape,
+                             expectedOutputShape,
+                             inputValues,
+                             expectedOutputValues);
+}
+
+void ConcatThreeInputsTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 6, 2 };
+
+    std::vector<std::vector<uint8_t>> inputValues;
+    std::vector<uint8_t> inputValue1 { 0, 1, 2, 3 };
+    std::vector<uint8_t> inputValue2 { 125, 126, 127, 128 };
+    std::vector<uint8_t> inputValue3 { 252, 253, 254, 255 };
+    inputValues.push_back(inputValue1);
+    inputValues.push_back(inputValue2);
+    inputValues.push_back(inputValue3);
+
+    std::vector<uint8_t> expectedOutputValues { 0, 1, 2, 3, 125, 126, 127, 128, 252, 253, 254, 255 };
+
+    ConcatenationTest<uint8_t>(tflite::BuiltinOperator_CONCATENATION,
+                               ::tflite::TensorType_UINT8,
+                               backends,
+                               inputShape,
+                               expectedOutputShape,
+                               inputValues,
+                               expectedOutputValues);
+}
+
+void ConcatAxisTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 1, 2, 2 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 4 };
+
+    std::vector<std::vector<uint8_t>> inputValues;
+    std::vector<uint8_t> inputValue1 { 0, 1, 2, 3 };
+    std::vector<uint8_t> inputValue3 { 252, 253, 254, 255 };
+    inputValues.push_back(inputValue1);
+    inputValues.push_back(inputValue3);
+
+    std::vector<uint8_t> expectedOutputValues { 0, 1, 252, 253, 2, 3, 254, 255 };
+
+    ConcatenationTest<uint8_t>(tflite::BuiltinOperator_CONCATENATION,
+                               ::tflite::TensorType_UINT8,
+                               backends,
+                               inputShape,
+                               expectedOutputShape,
+                               inputValues,
+                               expectedOutputValues,
+                               2);
+}
+
+// MEAN Operator
+void MeanUint8KeepDimsTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 3 };
+    std::vector<int32_t> input1Shape { 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 1 };
+
+    std::vector<uint8_t> input0Values { 5, 10, 15 }; // Inputs
+    std::vector<int32_t> input1Values { 1 }; // Axis
+
+    std::vector<uint8_t> expectedOutputValues { 10 };
+
+    MeanTest<uint8_t>(tflite::BuiltinOperator_MEAN,
+                      ::tflite::TensorType_UINT8,
+                      backends,
+                      input0Shape,
+                      input1Shape,
+                      expectedOutputShape,
+                      input0Values,
+                      input1Values,
+                      expectedOutputValues,
+                      true);
+}
+
+void MeanUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2 };
+    std::vector<int32_t> input1Shape { 1 };
+    std::vector<int32_t> expectedOutputShape { 2, 2 };
+
+    std::vector<uint8_t> input0Values { 5, 10, 15, 20 }; // Inputs
+    std::vector<int32_t> input1Values { 0 }; // Axis
+
+    std::vector<uint8_t> expectedOutputValues { 5, 10, 15, 20 };
+
+    MeanTest<uint8_t>(tflite::BuiltinOperator_MEAN,
+                      ::tflite::TensorType_UINT8,
+                      backends,
+                      input0Shape,
+                      input1Shape,
+                      expectedOutputShape,
+                      input0Values,
+                      input1Values,
+                      expectedOutputValues,
+                      false);
+}
+
+void MeanFp32KeepDimsTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2 };
+    std::vector<int32_t> input1Shape { 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 1, 2 };
+
+    std::vector<float>   input0Values { 1.0f, 1.5f, 2.0f, 2.5f }; // Inputs
+    std::vector<int32_t> input1Values { 1 }; // Axis
+
+    std::vector<float>   expectedOutputValues { 1.5f, 2.0f };
+
+    MeanTest<float>(tflite::BuiltinOperator_MEAN,
+                    ::tflite::TensorType_FLOAT32,
+                    backends,
+                    input0Shape,
+                    input1Shape,
+                    expectedOutputShape,
+                    input0Values,
+                    input1Values,
+                    expectedOutputValues,
+                    true);
+}
+
+void MeanFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> input0Shape { 1, 2, 2, 1 };
+    std::vector<int32_t> input1Shape { 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 1 };
+
+    std::vector<float>   input0Values { 1.0f, 1.5f, 2.0f, 2.5f }; // Inputs
+    std::vector<int32_t> input1Values { 2 }; // Axis
+
+    std::vector<float>   expectedOutputValues { 1.25f, 2.25f };
+
+    MeanTest<float>(tflite::BuiltinOperator_MEAN,
+                    ::tflite::TensorType_FLOAT32,
+                    backends,
+                    input0Shape,
+                    input1Shape,
+                    expectedOutputShape,
+                    input0Values,
+                    input1Values,
+                    expectedOutputValues,
+                    false);
+}
+
+// CONCATENATION Tests.
+TEST_SUITE("Concatenation_CpuAccTests")
+{
+
+TEST_CASE ("Concatenation_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    ConcatUint8TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Int16_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    ConcatInt16TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Float32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    ConcatFloat32TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Three_Inputs_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    ConcatThreeInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Axis_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    ConcatAxisTest(backends);
+}
+
+}
+
+TEST_SUITE("Concatenation_GpuAccTests")
+{
+
+TEST_CASE ("Concatenation_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    ConcatUint8TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Int16_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    ConcatInt16TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Float32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    ConcatFloat32TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Three_Inputs_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    ConcatThreeInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Axis_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    ConcatAxisTest(backends);
+}
+
+}
+
+TEST_SUITE("Concatenation_CpuRefTests")
+{
+
+TEST_CASE ("Concatenation_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    ConcatUint8TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Int16_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    ConcatInt16TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Float32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    ConcatFloat32TwoInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Three_Inputs_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    ConcatThreeInputsTest(backends);
+}
+
+TEST_CASE ("Concatenation_Axis_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    ConcatAxisTest(backends);
+}
+
+}
+
+// MEAN Tests
+TEST_SUITE("Mean_CpuAccTests")
+{
+
+TEST_CASE ("Mean_Uint8_KeepDims_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    MeanUint8KeepDimsTest(backends);
+}
+
+TEST_CASE ("Mean_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    MeanUint8Test(backends);
+}
+
+TEST_CASE ("Mean_Fp32_KeepDims_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    MeanFp32KeepDimsTest(backends);
+}
+
+TEST_CASE ("Mean_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    MeanFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("Mean_GpuAccTests")
+{
+
+TEST_CASE ("Mean_Uint8_KeepDims_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    MeanUint8KeepDimsTest(backends);
+}
+
+TEST_CASE ("Mean_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    MeanUint8Test(backends);
+}
+
+TEST_CASE ("Mean_Fp32_KeepDims_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    MeanFp32KeepDimsTest(backends);
+}
+
+TEST_CASE ("Mean_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    MeanFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("Mean_CpuRefTests")
+{
+
+TEST_CASE ("Mean_Uint8_KeepDims_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    MeanUint8KeepDimsTest(backends);
+}
+
+TEST_CASE ("Mean_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    MeanUint8Test(backends);
+}
+
+TEST_CASE ("Mean_Fp32_KeepDims_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    MeanFp32KeepDimsTest(backends);
+}
+
+TEST_CASE ("Mean_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    MeanFp32Test(backends);
+}
+
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/test/ControlTestHelper.hpp b/delegate/src/test/ControlTestHelper.hpp
new file mode 100644
index 0000000..0c97961
--- /dev/null
+++ b/delegate/src/test/ControlTestHelper.hpp
@@ -0,0 +1,344 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+#include <string>
+
+namespace
+{
+
+std::vector<char> CreateConcatTfLiteModel(tflite::BuiltinOperator controlOperatorCode,
+                                          tflite::TensorType tensorType,
+                                          std::vector<int32_t>& inputTensorShape,
+                                          const std::vector <int32_t>& outputTensorShape,
+                                          const int32_t inputTensorNum,
+                                          int32_t axis = 0,
+                                          float quantScale = 1.0f,
+                                          int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::vector<int32_t> operatorInputs{};
+    const std::vector<int32_t> operatorOutputs{inputTensorNum};
+    std::vector<int> subgraphInputs{};
+    const std::vector<int> subgraphOutputs{inputTensorNum};
+
+    std::vector<flatbuffers::Offset<Tensor>> tensors(inputTensorNum + 1);
+    for (int i = 0; i < inputTensorNum; ++i)
+    {
+        tensors[i] = CreateTensor(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                          inputTensorShape.size()),
+                                  tensorType,
+                                  0,
+                                  flatBufferBuilder.CreateString("input" + std::to_string(i)),
+                                  quantizationParameters);
+
+        // Add number of inputs to vector.
+        operatorInputs.push_back(i);
+        subgraphInputs.push_back(i);
+    }
+
+    // Create output tensor
+    tensors[inputTensorNum] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_ConcatenationOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateConcatenationOptions(flatBufferBuilder, axis).Union();
+
+    flatbuffers::Offset <Operator> controlOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&controlOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: Concatenation Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, controlOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+std::vector<char> CreateMeanTfLiteModel(tflite::BuiltinOperator controlOperatorCode,
+                                        tflite::TensorType tensorType,
+                                        std::vector<int32_t>& input0TensorShape,
+                                        std::vector<int32_t>& input1TensorShape,
+                                        const std::vector <int32_t>& outputTensorShape,
+                                        std::vector<int32_t>& axisData,
+                                        const bool keepDims,
+                                        float quantScale = 1.0f,
+                                        int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 2> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(axisData.data()),
+                                                             sizeof(int32_t) * axisData.size()));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 3> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input0TensorShape.data(),
+                                                                      input0TensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(input1TensorShape.data(),
+                                                                      input1TensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("axis"),
+                              quantizationParameters);
+
+    // Create output tensor
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // create operator. Mean uses ReducerOptions.
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_ReducerOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateReducerOptions(flatBufferBuilder, keepDims).Union();
+
+    const std::vector<int> operatorInputs{ {0, 1} };
+    const std::vector<int> operatorOutputs{ 2 };
+    flatbuffers::Offset <Operator> controlOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1} };
+    const std::vector<int> subgraphOutputs{ 2 };
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&controlOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: Mean Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, controlOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void ConcatenationTest(tflite::BuiltinOperator controlOperatorCode,
+                       tflite::TensorType tensorType,
+                       std::vector<armnn::BackendId>& backends,
+                       std::vector<int32_t>& inputShapes,
+                       std::vector<int32_t>& expectedOutputShape,
+                       std::vector<std::vector<T>>& inputValues,
+                       std::vector<T>& expectedOutputValues,
+                       int32_t axis = 0,
+                       float quantScale = 1.0f,
+                       int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateConcatTfLiteModel(controlOperatorCode,
+                                                            tensorType,
+                                                            inputShapes,
+                                                            expectedOutputShape,
+                                                            inputValues.size(),
+                                                            axis,
+                                                            quantScale,
+                                                            quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data for all input tensors.
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        // Get single input tensor and assign to interpreters.
+        auto inputTensorValues = inputValues[i];
+        armnnDelegate::FillInput<T>(tfLiteInterpreter, i, inputTensorValues);
+        armnnDelegate::FillInput<T>(armnnDelegateInterpreter, i, inputTensorValues);
+    }
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                        armnnDelegateInterpreter,
+                                        expectedOutputShape,
+                                        expectedOutputValues);
+
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+template <typename T>
+void MeanTest(tflite::BuiltinOperator controlOperatorCode,
+              tflite::TensorType tensorType,
+              std::vector<armnn::BackendId>& backends,
+              std::vector<int32_t>& input0Shape,
+              std::vector<int32_t>& input1Shape,
+              std::vector<int32_t>& expectedOutputShape,
+              std::vector<T>& input0Values,
+              std::vector<int32_t>& input1Values,
+              std::vector<T>& expectedOutputValues,
+              const bool keepDims,
+              float quantScale = 1.0f,
+              int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateMeanTfLiteModel(controlOperatorCode,
+                                                          tensorType,
+                                                          input0Shape,
+                                                          input1Shape,
+                                                          expectedOutputShape,
+                                                          input1Values,
+                                                          keepDims,
+                                                          quantScale,
+                                                          quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, input0Values);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, input0Values);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                        armnnDelegateInterpreter,
+                                        expectedOutputShape,
+                                        expectedOutputValues);
+
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/delegate/src/test/TestUtils.hpp b/delegate/src/test/TestUtils.hpp
index 162d62f..9bbab8f 100644
--- a/delegate/src/test/TestUtils.hpp
+++ b/delegate/src/test/TestUtils.hpp
@@ -7,6 +7,8 @@
 
 #include <tensorflow/lite/interpreter.h>
 
+#include <doctest/doctest.h>
+
 namespace armnnDelegate
 {
 
@@ -23,4 +25,35 @@
     }
 }
 
+// Can be used to compare the output tensor shape and values
+// from armnnDelegateInterpreter and tfLiteInterpreter.
+// Example usage can be found in ControlTestHelper.hpp
+template <typename T>
+void CompareOutputData(std::unique_ptr<tflite::Interpreter>& tfLiteInterpreter,
+                       std::unique_ptr<tflite::Interpreter>& armnnDelegateInterpreter,
+                       std::vector<int32_t>& expectedOutputShape,
+                       std::vector<T>& expectedOutputValues)
+{
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelegateOutputTensor = tfLiteInterpreter->tensor(tfLiteDelegateOutputId);
+    auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateOutputId);
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputTensor = armnnDelegateInterpreter->tensor(armnnDelegateOutputId);
+    auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateOutputId);
+
+    for (size_t i = 0; i < expectedOutputShape.size(); i++)
+    {
+        CHECK(expectedOutputShape[i] == armnnDelegateOutputTensor->dims->data[i]);
+        CHECK(tfLiteDelegateOutputTensor->dims->data[i] == expectedOutputShape[i]);
+        CHECK(tfLiteDelegateOutputTensor->dims->data[i] == armnnDelegateOutputTensor->dims->data[i]);
+    }
+
+    for (size_t i = 0; i < expectedOutputValues.size(); i++)
+    {
+        CHECK(expectedOutputValues[i] == armnnDelegateOutputData[i]);
+        CHECK(tfLiteDelageOutputData[i] == expectedOutputValues[i]);
+        CHECK(tfLiteDelageOutputData[i] == armnnDelegateOutputData[i]);
+    }
+}
+
 } // namespace armnnDelegate