IVGCVSW-5970 TfLiteDelegate: Add UNPACK operator Support


Signed-off-by: Kevin May <kevin.may@arm.com>
Change-Id: I23731718236043b46c143eaf416cb375edd93983
diff --git a/delegate/src/Unpack.hpp b/delegate/src/Unpack.hpp
new file mode 100644
index 0000000..87200ff
--- /dev/null
+++ b/delegate/src/Unpack.hpp
@@ -0,0 +1,184 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include "DelegateUtils.hpp"
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+#include <numeric>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitUnpackOperator(DelegateData& delegateData,
+                                 TfLiteContext* tfLiteContext,
+                                 TfLiteNode* tfLiteNode,
+                                 int nodeIndex,
+                                 int32_t operatorCode)
+{
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    // Get Unpack Axis
+    const auto params = reinterpret_cast<TfLiteUnpackParams*>(tfLiteNode->builtin_data);
+
+    const unsigned int unpackAxis = NonNegative(params->axis, nodeIndex);
+
+    const armnn::TensorInfo& inputTensorInfo  = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+
+    if (unpackAxis >= inputTensorInfo.GetNumDimensions())
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: The unpack axis #%d cannot be greater than or equal to "
+            "the number of input dimensions #%d in operator #%d node #%d",
+            unpackAxis, inputTensorInfo.GetNumDimensions(), operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    // Get Unpack Num
+    unsigned int unpackNum = NonNegative(params->num, nodeIndex);
+
+    // If num is not defined, automatically infer from the length of the dimension axis.
+    if(unpackNum == 0)
+    {
+        unpackNum = inputTensorInfo.GetShape()[unpackAxis];
+    }
+
+    // If unpack number cannot be inferred and is still zero, return kTfLiteError.
+    if(unpackNum == 0)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Number to unpack must greater than zero in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    // Check outputs
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, unpackNum, nodeIndex));
+
+
+    auto inputDimSize = inputTensorInfo.GetNumDimensions();
+    std::vector<unsigned int> unpackDimSizes(inputDimSize);
+
+    // Add current input shape to unpackDimSizes
+    for (unsigned int i = 0; i < inputDimSize; ++i)
+    {
+        unpackDimSizes[i] = inputTensorInfo.GetShape()[i];
+    }
+
+    if (unpackDimSizes[unpackAxis] != unpackNum)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+            tfLiteContext,
+            "TfLiteArmnnDelegate: Number to unpack must be the same as length "
+            "of the dimension to unpack along in operator #%d node #%d: ",
+            operatorCode, nodeIndex);
+        return kTfLiteError;
+    }
+
+    unpackDimSizes[unpackAxis] /= unpackNum;
+
+    armnn::SplitterDescriptor splitDesc(unpackNum, static_cast<unsigned int>(unpackDimSizes.size()));
+    for (unsigned int j = 0; j < unpackNum; ++j)
+    {
+        // Set the size of the views.
+        for (unsigned int dimIdx = 0; dimIdx < unpackDimSizes.size(); ++dimIdx)
+        {
+            splitDesc.SetViewSize(j, dimIdx, unpackDimSizes[dimIdx]);
+        }
+        splitDesc.SetViewOriginCoord(j, unpackAxis, unpackDimSizes[unpackAxis] * j);
+    }
+
+    std::vector<armnn::TensorInfo> outputs;
+    for (unsigned int i = 0; i < unpackNum; ++i)
+    {
+        const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[i]];
+        if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+        outputs.push_back(GetTensorInfoForTfLiteTensor(tfLiteOutputTensor));
+    }
+    const std::vector<std::reference_wrapper<armnn::TensorInfo>> outputTensorInfos(outputs.begin(), outputs.end());
+
+    if (!delegateData.m_Network)
+    {
+        // Check if supported
+        bool isSupported = false;
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsSplitterSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfos,
+                                   splitDesc);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    std::string splitterLayerName("Unpack Splitter");
+
+    armnn::IConnectableLayer* splitterLayer = delegateData.m_Network->AddSplitterLayer(splitDesc,
+                                                                                       splitterLayerName.c_str());
+    ARMNN_ASSERT(splitterLayer != nullptr);
+
+    for (unsigned int k = 0; k < splitterLayer->GetNumOutputSlots(); ++k)
+    {
+        splitterLayer->GetOutputSlot(k).SetTensorInfo(outputs[k]);
+    }
+
+    // Connect the input slots
+    delegateData.m_OutputSlotForNode[tfLiteNode->inputs->data[0]]->Connect(splitterLayer->GetInputSlot(0));
+
+    armnn::TensorShape splitOutShape = armnn::TensorShape(static_cast<unsigned int>(unpackDimSizes.size()),
+                                            unpackDimSizes.data());
+
+    // Create reshape to remove the unpacked dimension for unpack operator of each output from Splitter.
+    for (unsigned int outputIndex = 0; outputIndex < splitterLayer->GetNumOutputSlots(); ++outputIndex)
+    {
+        armnn::TensorInfo outputTensorInfo  = outputTensorInfos[outputIndex];
+
+        std::string reshapeLayerName("Unpack Reshape");
+        armnn::ReshapeDescriptor reshapeDescriptor;
+        reshapeDescriptor.m_TargetShape = outputTensorInfo.GetShape();
+        armnn::IConnectableLayer* reshapeLayer = delegateData.m_Network->AddReshapeLayer(reshapeDescriptor,
+                                                                                         reshapeLayerName.c_str());
+
+        ARMNN_ASSERT(reshapeLayer != nullptr);
+
+        splitterLayer->GetOutputSlot(outputIndex).SetTensorInfo(armnn::TensorInfo(splitOutShape,
+                                                                          outputTensorInfo.GetDataType(),
+                                                                          outputTensorInfo.GetQuantizationScale(),
+                                                                          outputTensorInfo.GetQuantizationOffset()));
+        splitterLayer->GetOutputSlot(outputIndex).Connect(reshapeLayer->GetInputSlot(0));
+
+        reshapeLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+        armnn::IOutputSlot& slot = reshapeLayer->GetOutputSlot(0);
+
+        delegateData.m_OutputSlotForNode[
+            static_cast<unsigned long>(tfLiteNode->outputs->data[outputIndex])] = &slot;
+
+    }
+
+    return kTfLiteOk;
+}
+
+} // namespace armnnDelegate
diff --git a/delegate/src/armnn_delegate.cpp b/delegate/src/armnn_delegate.cpp
index d777eff..7e4f5b5 100644
--- a/delegate/src/armnn_delegate.cpp
+++ b/delegate/src/armnn_delegate.cpp
@@ -34,6 +34,7 @@
 #include "SpaceDepth.hpp"
 #include "Split.hpp"
 #include "Transpose.hpp"
+#include "Unpack.hpp"
 
 #include <flatbuffers/flatbuffers.h>
 #include <tensorflow/lite/context_util.h>
@@ -881,6 +882,12 @@
                                            tfLiteNode,
                                            nodeIndex,
                                            kTfLiteBuiltinTanh);
+        case kTfLiteBuiltinUnpack:
+            return VisitUnpackOperator(delegateData,
+                                       tfLiteContext,
+                                       tfLiteNode,
+                                       nodeIndex,
+                                       kTfLiteBuiltinUnpack);
         default:
             return kTfLiteError;
     }
diff --git a/delegate/src/test/UnpackTest.cpp b/delegate/src/test/UnpackTest.cpp
new file mode 100644
index 0000000..c036f64
--- /dev/null
+++ b/delegate/src/test/UnpackTest.cpp
@@ -0,0 +1,179 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "UnpackTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+template <typename T>
+void UnpackAxis0Num4Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 4, 1, 6 };
+    std::vector<int32_t> expectedOutputShape { 1, 6 };
+
+    std::vector<T> inputValues { 1, 2, 3, 4, 5, 6,
+                                 7, 8, 9, 10, 11, 12,
+                                 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24 };
+
+    std::vector<T> expectedOutputValues0 { 1, 2, 3, 4, 5, 6 };
+    std::vector<T> expectedOutputValues1 { 7, 8, 9, 10, 11, 12 };
+    std::vector<T> expectedOutputValues2 { 13, 14, 15, 16, 17, 18 };
+    std::vector<T> expectedOutputValues3 { 19, 20, 21, 22, 23, 24 };
+
+    std::vector<std::vector<T>> expectedOutputValues{ expectedOutputValues0,
+                                                      expectedOutputValues1,
+                                                      expectedOutputValues2,
+                                                      expectedOutputValues3 };
+
+    UnpackTest<T>(tflite::BuiltinOperator_UNPACK,
+                  tensorType,
+                  backends,
+                  inputShape,
+                  expectedOutputShape,
+                  inputValues,
+                  expectedOutputValues,
+                  0);
+}
+
+template <typename T>
+void UnpackAxis2Num6Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 4, 1, 6 };
+    std::vector<int32_t> expectedOutputShape { 4, 1 };
+
+    std::vector<T> inputValues { 1, 2, 3, 4, 5, 6,
+                                 7, 8, 9, 10, 11, 12,
+                                 13, 14, 15, 16, 17, 18,
+                                 19, 20, 21, 22, 23, 24 };
+
+    std::vector<T> expectedOutputValues0 { 1, 7, 13, 19 };
+    std::vector<T> expectedOutputValues1 { 2, 8, 14, 20 };
+    std::vector<T> expectedOutputValues2 { 3, 9, 15, 21 };
+    std::vector<T> expectedOutputValues3 { 4, 10, 16, 22 };
+    std::vector<T> expectedOutputValues4 { 5, 11, 17, 23 };
+    std::vector<T> expectedOutputValues5 { 6, 12, 18, 24 };
+
+    std::vector<std::vector<T>> expectedOutputValues{ expectedOutputValues0,
+                                                      expectedOutputValues1,
+                                                      expectedOutputValues2,
+                                                      expectedOutputValues3,
+                                                      expectedOutputValues4,
+                                                      expectedOutputValues5 };
+
+    UnpackTest<T>(tflite::BuiltinOperator_UNPACK,
+                  tensorType,
+                  backends,
+                  inputShape,
+                  expectedOutputShape,
+                  inputValues,
+                  expectedOutputValues,
+                  2);
+}
+
+TEST_SUITE("Unpack_CpuRefTests")
+{
+
+// Fp32
+TEST_CASE ("Unpack_Fp32_Axis0_Num4_CpuRef_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+UnpackAxis0Num4Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Unpack_Fp32_Axis2_Num6_CpuRef_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+UnpackAxis2Num6Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+// Uint8
+TEST_CASE ("Unpack_Uint8_Axis0_Num4_CpuRef_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+UnpackAxis0Num4Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+TEST_CASE ("Unpack_Uint8_Axis2_Num6_CpuRef_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+UnpackAxis2Num6Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+} // End of Unpack_CpuRefTests
+
+TEST_SUITE("Unpack_CpuAccTests")
+{
+
+// Fp32
+TEST_CASE ("Unpack_Fp32_Axis0_Num4_CpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+UnpackAxis0Num4Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Unpack_Fp32_Axis2_Num6_CpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+UnpackAxis2Num6Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+// Uint8
+TEST_CASE ("Unpack_Uint8_Axis0_Num4_CpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+UnpackAxis0Num4Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+TEST_CASE ("Unpack_Uint8_Axis2_Num6_CpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+UnpackAxis2Num6Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+} // End of Unpack_CpuAccTests
+
+TEST_SUITE("Unpack_GpuAccTests")
+{
+
+// Fp32
+TEST_CASE ("Unpack_Fp32_Axis0_Num4_GpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+UnpackAxis0Num4Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Unpack_Fp32_Axis2_Num6_GpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+UnpackAxis2Num6Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+// Uint8
+TEST_CASE ("Unpack_Uint8_Axis0_Num4_GpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+UnpackAxis0Num4Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+TEST_CASE ("Unpack_Uint8_Axis2_Num6_GpuAcc_Test")
+{
+std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+UnpackAxis2Num6Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+} // End of Unpack_GpuAccTests
+
+// End of Unpack Test Suite
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/test/UnpackTestHelper.hpp b/delegate/src/test/UnpackTestHelper.hpp
new file mode 100644
index 0000000..8487134
--- /dev/null
+++ b/delegate/src/test/UnpackTestHelper.hpp
@@ -0,0 +1,185 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+#include <string>
+
+namespace
+{
+
+std::vector<char> CreateUnpackTfLiteModel(tflite::BuiltinOperator unpackOperatorCode,
+                                          tflite::TensorType tensorType,
+                                          std::vector<int32_t>& inputTensorShape,
+                                          const std::vector <int32_t>& outputTensorShape,
+                                          const int32_t outputTensorNum,
+                                          unsigned int axis = 0,
+                                          float quantScale = 1.0f,
+                                          int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    const std::vector<int32_t> operatorInputs{ 0 };
+    std::vector<int32_t> operatorOutputs{};
+    const std::vector<int> subgraphInputs{ 0 };
+    std::vector<int> subgraphOutputs{};
+
+    std::vector<flatbuffers::Offset<Tensor>> tensors(outputTensorNum + 1);
+
+    // Create input tensor
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                                                                      tensorType,
+                                                                      0,
+                                                                      flatBufferBuilder.CreateString("input"),
+                                                                      quantizationParameters);
+
+    for (int i = 0; i < outputTensorNum; ++i)
+    {
+        tensors[i + 1] = CreateTensor(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                          outputTensorShape.size()),
+                                  tensorType,
+                                  0,
+                                  flatBufferBuilder.CreateString("output" + std::to_string(i)),
+                                  quantizationParameters);
+
+        operatorOutputs.push_back(i + 1);
+        subgraphOutputs.push_back(i + 1);
+    }
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_UnpackOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions =
+        CreateUnpackOptions(flatBufferBuilder, outputTensorNum, axis).Union();
+
+    flatbuffers::Offset <Operator> unpackOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&unpackOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: Unpack Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, unpackOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void UnpackTest(tflite::BuiltinOperator unpackOperatorCode,
+              tflite::TensorType tensorType,
+              std::vector<armnn::BackendId>& backends,
+              std::vector<int32_t>& inputShape,
+              std::vector<int32_t>& expectedOutputShape,
+              std::vector<T>& inputValues,
+              std::vector<std::vector<T>>& expectedOutputValues,
+              unsigned int axis = 0,
+              float quantScale = 1.0f,
+              int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateUnpackTfLiteModel(unpackOperatorCode,
+                                                            tensorType,
+                                                            inputShape,
+                                                            expectedOutputShape,
+                                                            expectedOutputValues.size(),
+                                                            axis,
+                                                            quantScale,
+                                                            quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+                                    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                                                     armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, inputValues);
+
+
+    // Run EnqueueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    for (unsigned int i = 0; i < expectedOutputValues.size(); ++i)
+    {
+        armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                            armnnDelegateInterpreter,
+                                            expectedOutputShape,
+                                            expectedOutputValues[i],
+                                            i);
+    }
+
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file