IVGCVSW-5969 TfLiteDelegate: Add PACK operator Support

 * Added support for PACK which is equivalent to Arm NN STACK

Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com>
Change-Id: I9ea134d0310eeea1caba30a8b9221712e9487c75
diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index 707877f..dc65a19 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -33,6 +33,7 @@
         src/LogicalBinary.hpp
         src/Lstm.hpp
         src/Normalization.hpp
+        src/Pack.hpp
         src/Pad.hpp
         src/Pooling.hpp
         src/Quantization.hpp
@@ -145,6 +146,8 @@
         src/test/LstmTestHelper.hpp
         src/test/NormalizationTest.cpp
         src/test/NormalizationTestHelper.hpp
+        src/test/PackTest.cpp
+        src/test/PackTestHelper.hpp
         src/test/PadTest.cpp
         src/test/PadTestHelper.hpp
         src/test/Pooling2dTest.cpp
diff --git a/delegate/src/Pack.hpp b/delegate/src/Pack.hpp
new file mode 100644
index 0000000..59851cd
--- /dev/null
+++ b/delegate/src/Pack.hpp
@@ -0,0 +1,109 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <tensorflow/lite/builtin_ops.h>
+#include <tensorflow/lite/c/builtin_op_data.h>
+#include <tensorflow/lite/c/common.h>
+#include <tensorflow/lite/minimal_logging.h>
+
+namespace armnnDelegate
+{
+
+TfLiteStatus VisitPackOperator(DelegateData& delegateData,
+                               TfLiteContext* tfLiteContext,
+                               TfLiteNode* tfLiteNode,
+                               int nodeIndex,
+                               int32_t operatorCode)
+{
+    unsigned int numInputs = tfLiteNode->inputs->size;
+    if (numInputs < 1)
+    {
+        TF_LITE_MAYBE_KERNEL_LOG(
+                tfLiteContext, "TfLiteArmnnDelegate: Must have at least one input in (%d != %d) in node #%d",
+                1, numInputs, nodeIndex);
+        return kTfLiteError;
+    }
+
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+
+    // Validate all inputs and get TensorInfo
+    std::vector<armnn::TensorInfo> inputTensorInfos;
+    for (unsigned int i = 0; i < numInputs; ++i)
+    {
+        const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[i]];
+        if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+        {
+            return kTfLiteError;
+        }
+
+        armnn::TensorInfo inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+        inputTensorInfos.emplace_back(inputTensorInfo);
+    }
+
+    // Convert input tensors to const armnn::TensorInfo* type for FORWARD_LAYER_SUPPORT_FUNC.
+    std::vector<const armnn::TensorInfo*> inputConstTensorInfos;
+    std::transform(inputTensorInfos.begin(),
+                   inputTensorInfos.end(),
+                   std::back_inserter(inputConstTensorInfos),
+                   [](armnn::TensorInfo& t)->const armnn::TensorInfo*{ return &t; });
+
+    // Validate output and get TensorInfo
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    armnn::StackDescriptor desc;
+    desc.m_NumInputs = static_cast<uint32_t>(numInputs);
+
+    // Get axis from TfLite parameters
+    auto* params = reinterpret_cast<TfLitePackParams*>(tfLiteNode->builtin_data);
+    desc.m_Axis = static_cast<uint32_t>(params->axis);
+
+    // Use the tensor shape of the first input as the "correct" input shape in the descriptor
+    desc.m_InputShape = inputTensorInfos[0].GetShape();
+
+    // Check if supported
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsStackSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputConstTensorInfos,
+                                   outputTensorInfo,
+                                   desc);
+    };
+
+    // If the m_Network is a nullptr, this signals that a prerequisite TfLite callback is required to clarify the
+    // support for the operator
+    // If supported, VisitPackOperator will be called again to add the layer to the network as seen below
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // The TfLite Pack operator is equivalent to the ArmNN Stack operator
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddStackLayer(desc);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
+}
+
+} // namespace armnnDelegate
diff --git a/delegate/src/armnn_delegate.cpp b/delegate/src/armnn_delegate.cpp
index e637ca6..d777eff 100644
--- a/delegate/src/armnn_delegate.cpp
+++ b/delegate/src/armnn_delegate.cpp
@@ -21,6 +21,7 @@
 #include "LogicalBinary.hpp"
 #include "Lstm.hpp"
 #include "Normalization.hpp"
+#include "Pack.hpp"
 #include "Pad.hpp"
 #include "Pooling.hpp"
 #include "Quantization.hpp"
@@ -712,6 +713,12 @@
                                            tfLiteNode,
                                            nodeIndex,
                                            kTfLiteBuiltinNotEqual);
+        case kTfLiteBuiltinPack:
+            return VisitPackOperator(delegateData,
+                                     tfLiteContext,
+                                     tfLiteNode,
+                                     nodeIndex,
+                                     kTfLiteBuiltinPack);
         case kTfLiteBuiltinPad:
             return VisitPadOperator(delegateData,
                                     tfLiteContext,
diff --git a/delegate/src/test/PackTest.cpp b/delegate/src/test/PackTest.cpp
new file mode 100644
index 0000000..aea903b
--- /dev/null
+++ b/delegate/src/test/PackTest.cpp
@@ -0,0 +1,516 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "PackTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+template <typename T>
+void PackFp32Axis0Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 3, 2, 3 };
+    std::vector<int32_t> expectedOutputShape { 2, 3, 2, 3 };
+
+    std::vector<std::vector<T>> inputValues;
+    inputValues.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputValues.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    });
+
+    std::vector<T> expectedOutputValues =
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18,
+
+
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    };
+
+    PackTest<T>(tflite::BuiltinOperator_PACK,
+                tensorType,
+                backends,
+                inputShape,
+                expectedOutputShape,
+                inputValues,
+                expectedOutputValues,
+                0);
+}
+
+template <typename T>
+void PackFp32Axis1Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 3, 2, 3 };
+    std::vector<int32_t> expectedOutputShape { 3, 2, 2, 3 };
+
+    std::vector<std::vector<T>> inputValues;
+    inputValues.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputValues.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    });
+
+    std::vector<T> expectedOutputValues =
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        19, 20, 21,
+        22, 23, 24,
+
+
+        7, 8, 9,
+        10, 11, 12,
+
+        25, 26, 27,
+        28, 29, 30,
+
+
+        13, 14, 15,
+        16, 17, 18,
+
+        31, 32, 33,
+        34, 35, 36
+    };
+
+    PackTest<T>(tflite::BuiltinOperator_PACK,
+                tensorType,
+                backends,
+                inputShape,
+                expectedOutputShape,
+                inputValues,
+                expectedOutputValues,
+                1);
+}
+
+template <typename T>
+void PackFp32Axis2Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 3, 2, 3 };
+    std::vector<int32_t> expectedOutputShape { 3, 2, 2, 3 };
+
+    std::vector<std::vector<T>> inputValues;
+    inputValues.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputValues.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    });
+
+    std::vector<float> expectedOutputValues =
+    {
+        1, 2, 3,
+        19, 20, 21,
+
+        4, 5, 6,
+        22, 23, 24,
+
+        7, 8, 9,
+        25, 26, 27,
+
+        10, 11, 12,
+        28, 29, 30,
+
+        13, 14, 15,
+        31, 32, 33,
+
+        16, 17, 18,
+        34, 35, 36
+    };
+
+    PackTest<T>(tflite::BuiltinOperator_PACK,
+                tensorType,
+                backends,
+                inputShape,
+                expectedOutputShape,
+                inputValues,
+                expectedOutputValues,
+                2);
+}
+
+template <typename T>
+void PackFp32Axis3Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 3, 2, 3 };
+    std::vector<int32_t> expectedOutputShape { 3, 2, 3, 2 };
+
+    std::vector<std::vector<T>> inputValues;
+    inputValues.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+
+        7, 8, 9,
+        10, 11, 12,
+
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputValues.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+
+        25, 26, 27,
+        28, 29, 30,
+
+        31, 32, 33,
+        34, 35, 36
+    });
+
+    std::vector<T> expectedOutputValues =
+    {
+        1, 19,
+        2, 20,
+        3, 21,
+
+        4, 22,
+        5, 23,
+        6, 24,
+
+
+        7, 25,
+        8, 26,
+        9, 27,
+
+        10, 28,
+        11, 29,
+        12, 30,
+
+
+        13, 31,
+        14, 32,
+        15, 33,
+
+        16, 34,
+        17, 35,
+        18, 36
+    };
+
+    PackTest<T>(tflite::BuiltinOperator_PACK,
+                tflite::TensorType_FLOAT32,
+                backends,
+                inputShape,
+                expectedOutputShape,
+                inputValues,
+                expectedOutputValues,
+                3);
+}
+
+template <typename T>
+void PackFp32Inputs3Test(tflite::TensorType tensorType, std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 3, 3 };
+    std::vector<int32_t> expectedOutputShape { 3, 3, 3 };
+
+    std::vector<std::vector<T>> inputValues;
+    inputValues.push_back(
+    {
+        1, 2, 3,
+        4, 5, 6,
+        7, 8, 9
+    });
+
+    inputValues.push_back(
+    {
+        10, 11, 12,
+        13, 14, 15,
+        16, 17, 18
+    });
+
+    inputValues.push_back(
+    {
+        19, 20, 21,
+        22, 23, 24,
+        25, 26, 27
+    });
+
+    std::vector<T> expectedOutputValues =
+    {
+        1, 2, 3,
+        10, 11, 12,
+        19, 20, 21,
+
+        4, 5, 6,
+        13, 14, 15,
+        22, 23, 24,
+
+        7, 8, 9,
+        16, 17, 18,
+        25, 26, 27
+    };
+
+    PackTest<T>(tflite::BuiltinOperator_PACK,
+                tensorType,
+                backends,
+                inputShape,
+                expectedOutputShape,
+                inputValues,
+                expectedOutputValues,
+                1);
+}
+
+TEST_SUITE("Pack_CpuAccTests")
+{
+
+// Fp32
+TEST_CASE ("Pack_Fp32_Axis0_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Axis0Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis1_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Axis1Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis2_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Axis2Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis3_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Axis3Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Inputs3_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Inputs3Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+// Uint8
+TEST_CASE ("Pack_Uint8_Axis0_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Axis0Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+TEST_CASE ("Pack_Uint8_Inputs3_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Inputs3Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+// Uint8
+TEST_CASE ("Pack_Int8_Axis0_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Axis0Test<int8_t>(tflite::TensorType_INT8, backends);
+}
+
+TEST_CASE ("Pack_Int8_Inputs3_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    PackFp32Inputs3Test<int8_t>(tflite::TensorType_INT8, backends);
+}
+
+}
+
+TEST_SUITE("Pack_GpuAccTests")
+{
+
+// Fp32
+TEST_CASE ("Pack_Fp32_Axis0_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Axis0Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis1_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Axis1Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis2_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Axis2Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis3_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Axis3Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Inputs3_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Inputs3Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+// Uint8
+TEST_CASE ("Pack_Uint8_Axis0_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Axis0Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+TEST_CASE ("Pack_Uint8_Inputs3_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Inputs3Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+// Int8
+TEST_CASE ("Pack_Int8_Axis0_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Axis0Test<int8_t>(tflite::TensorType_INT8, backends);
+}
+
+TEST_CASE ("Pack_Int8_Inputs3_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    PackFp32Inputs3Test<int8_t>(tflite::TensorType_INT8, backends);
+}
+
+}
+
+TEST_SUITE("Pack_CpuRefTests")
+{
+
+// Fp32
+TEST_CASE ("Pack_Fp32_Axis0_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Axis0Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis1_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Axis1Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis2_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Axis2Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Axis3_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Axis3Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+TEST_CASE ("Pack_Fp32_Inputs3_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Inputs3Test<float>(tflite::TensorType_FLOAT32, backends);
+}
+
+// Uint8
+TEST_CASE ("Pack_Uint8_Axis0_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Axis0Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+TEST_CASE ("Pack_Uint8_Inputs3_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Inputs3Test<uint8_t>(tflite::TensorType_UINT8, backends);
+}
+
+// Int8
+TEST_CASE ("Pack_Int8_Axis0_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Axis0Test<int8_t>(tflite::TensorType_INT8, backends);
+}
+
+TEST_CASE ("Pack_Int8_Inputs3_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    PackFp32Inputs3Test<int8_t>(tflite::TensorType_INT8, backends);
+}
+
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/test/PackTestHelper.hpp b/delegate/src/test/PackTestHelper.hpp
new file mode 100644
index 0000000..0869228
--- /dev/null
+++ b/delegate/src/test/PackTestHelper.hpp
@@ -0,0 +1,185 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+#include <string>
+
+namespace
+{
+
+std::vector<char> CreatePackTfLiteModel(tflite::BuiltinOperator packOperatorCode,
+                                        tflite::TensorType tensorType,
+                                        std::vector<int32_t>& inputTensorShape,
+                                        const std::vector <int32_t>& outputTensorShape,
+                                        const int32_t inputTensorNum,
+                                        unsigned int axis = 0,
+                                        float quantScale = 1.0f,
+                                        int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::vector<flatbuffers::Offset<tflite::Buffer>> buffers;
+    buffers.push_back(CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::vector<int32_t> operatorInputs{};
+    const std::vector<int32_t> operatorOutputs{inputTensorNum};
+    std::vector<int> subgraphInputs{};
+    const std::vector<int> subgraphOutputs{inputTensorNum};
+
+    std::vector<flatbuffers::Offset<Tensor>> tensors(inputTensorNum + 1);
+    for (int i = 0; i < inputTensorNum; ++i)
+    {
+        tensors[i] = CreateTensor(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                          inputTensorShape.size()),
+                                  tensorType,
+                                  0,
+                                  flatBufferBuilder.CreateString("input" + std::to_string(i)),
+                                  quantizationParameters);
+
+        // Add number of inputs to vector.
+        operatorInputs.push_back(i);
+        subgraphInputs.push_back(i);
+    }
+
+    // Create output tensor
+    tensors[inputTensorNum] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_PackOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions =
+            CreatePackOptions(flatBufferBuilder, inputTensorNum, axis).Union();
+
+    flatbuffers::Offset <Operator> packOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&packOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: Pack Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, packOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void PackTest(tflite::BuiltinOperator packOperatorCode,
+              tflite::TensorType tensorType,
+              std::vector<armnn::BackendId>& backends,
+              std::vector<int32_t>& inputShape,
+              std::vector<int32_t>& expectedOutputShape,
+              std::vector<std::vector<T>>& inputValues,
+              std::vector<T>& expectedOutputValues,
+              unsigned int axis = 0,
+              float quantScale = 1.0f,
+              int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreatePackTfLiteModel(packOperatorCode,
+                                                          tensorType,
+                                                          inputShape,
+                                                          expectedOutputShape,
+                                                          inputValues.size(),
+                                                          axis,
+                                                          quantScale,
+                                                          quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data for all input tensors.
+    for (unsigned int i = 0; i < inputValues.size(); ++i)
+    {
+        // Get single input tensor and assign to interpreters.
+        auto inputTensorValues = inputValues[i];
+        armnnDelegate::FillInput<T>(tfLiteInterpreter, i, inputTensorValues);
+        armnnDelegate::FillInput<T>(armnnDelegateInterpreter, i, inputTensorValues);
+    }
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                        armnnDelegateInterpreter,
+                                        expectedOutputShape,
+                                        expectedOutputValues);
+
+    armnnDelegateInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file