IVGCVSW-5393 'TfLiteDelegate: Implement the split operators'

* Added SPLIT and SPLIT_V support to armnn_delegate

Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Change-Id: I2def9b8be783b25ef17a997e521c6027553035d3
diff --git a/delegate/src/test/SplitTest.cpp b/delegate/src/test/SplitTest.cpp
new file mode 100644
index 0000000..5940516
--- /dev/null
+++ b/delegate/src/test/SplitTest.cpp
@@ -0,0 +1,262 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SplitTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+// SPLIT Operator
+void SplitUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> axisShape { 1 };
+    std::vector<int32_t> inputShape { 2, 2, 2, 2} ;
+    std::vector<int32_t> outputShape0 { 2, 2, 2, 1 };
+    std::vector<int32_t> outputShape1 { 2, 2, 2, 1 };
+    std::vector<std::vector<int32_t>> outputShapes{ outputShape0, outputShape1 };
+
+    std::vector<int32_t> axisData { 3 };  // Axis
+    std::vector<uint8_t> inputValues { 1, 2, 3, 4, 5, 6, 7, 8,
+                                       9, 10, 11, 12, 13, 14, 15, 16 }; // Input
+
+
+    std::vector<uint8_t> expectedOutputValues0 { 1, 3, 5, 7, 9, 11, 13, 15 };
+    std::vector<uint8_t> expectedOutputValues1 { 2, 4, 6, 8, 10, 12, 14, 16 };
+    std::vector<std::vector<uint8_t>> expectedOutputValues{ expectedOutputValues0, expectedOutputValues1 };
+
+    int32_t numSplits = 2;
+
+    SplitTest<uint8_t>(::tflite::TensorType_UINT8,
+                       backends,
+                       axisShape,
+                       inputShape,
+                       outputShapes,
+                       axisData,
+                       inputValues,
+                       expectedOutputValues,
+                       numSplits);
+}
+
+void SplitFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> axisShape { 1 };
+    std::vector<int32_t> inputShape { 2, 2, 2, 2 };
+    std::vector<int32_t> outputShape0 { 2, 1, 2, 2 };
+    std::vector<int32_t> outputShape1 { 2, 1, 2, 2 };
+    std::vector<std::vector<int32_t>> outputShapes{ outputShape0, outputShape1 };
+
+    std::vector<int32_t> axisData { 1 };  // Axis
+    std::vector<float> inputValues { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+                                     9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f  }; // Input
+
+
+    std::vector<float> expectedOutputValues0 { 1.0f, 2.0f, 3.0f, 4.0f, 9.0f, 10.0f, 11.0f, 12.0f };
+    std::vector<float> expectedOutputValues1 { 5.0f, 6.0f, 7.0f, 8.0f, 13.0f, 14.0f, 15.0f, 16.0f };
+    std::vector<std::vector<float>> expectedOutputValues{ expectedOutputValues0, expectedOutputValues1 };
+
+    int32_t numSplits = 2;
+
+    SplitTest<float>(::tflite::TensorType_FLOAT32,
+                     backends,
+                     axisShape,
+                     inputShape,
+                     outputShapes,
+                     axisData,
+                     inputValues,
+                     expectedOutputValues,
+                     numSplits);
+}
+
+// SPLIT Test Suite
+TEST_SUITE("SPLIT_CpuRefTests")
+{
+
+TEST_CASE ("SPLIT_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SplitUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SplitFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("SPLIT_CpuAccTests")
+{
+
+TEST_CASE ("SPLIT_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SplitUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SplitFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("SPLIT_GpuAccTests")
+{
+
+TEST_CASE ("SPLIT_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SplitUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SplitFp32Test(backends);
+}
+
+}
+// End of SPLIT Test Suite
+
+// SPLIT_V Operator
+void SplitVUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> axisShape { 1 };
+    std::vector<int32_t> inputShape { 2, 4, 2, 2 };
+    std::vector<int32_t> splitsShape { 2 };
+    std::vector<int32_t> outputShape0 { 2, 3, 2, 2 };
+    std::vector<int32_t> outputShape1 { 2, 1, 2, 2 };
+    std::vector<std::vector<int32_t>> outputShapes{ outputShape0, outputShape1 };
+
+    std::vector<int32_t> axisData { 1 };    // Axis
+    std::vector<int32_t> splitsData { 3, 1 };  // Splits
+    std::vector<uint8_t> inputValues { 1, 2, 3, 4, 5, 6, 7, 8,
+                                     9, 10, 11, 12, 13, 14, 15, 16,
+                                     17, 18, 19, 20, 21, 22, 23, 24,
+                                     25, 26, 27, 28, 29, 30, 31, 32   }; // Input
+
+
+    std::vector<uint8_t> expectedOutputValues0 { 1, 2, 3, 4, 5, 6, 7, 8,
+                                               9, 10, 11, 12, 17, 18, 19, 20,
+                                               21, 22, 23, 24, 25, 26, 27, 28 };
+    std::vector<uint8_t> expectedOutputValues1 { 13, 14, 15, 16, 29, 30, 31, 32 };
+    std::vector<std::vector<uint8_t>> expectedOutputValues{ expectedOutputValues0, expectedOutputValues1 };
+
+    int32_t numSplits = 2;
+
+    SplitVTest<uint8_t>(::tflite::TensorType_UINT8,
+                        backends,
+                        inputShape,
+                        splitsShape,
+                        axisShape,
+                        outputShapes,
+                        inputValues,
+                        splitsData,
+                        axisData,
+                        expectedOutputValues,
+                        numSplits);
+}
+
+void SplitVFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> axisShape { 1 };
+    std::vector<int32_t> inputShape { 2, 4, 2, 2 };
+    std::vector<int32_t> splitsShape { 2 };
+    std::vector<int32_t> outputShape0 { 2, 3, 2, 2 };
+    std::vector<int32_t> outputShape1 { 2, 1, 2, 2 };
+    std::vector<std::vector<int32_t>> outputShapes{ outputShape0, outputShape1 };
+
+    std::vector<int32_t> axisData { 1 };    // Axis
+    std::vector<int32_t> splitsData { 3, 1 };  // Splits
+    std::vector<float> inputValues { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+                                     9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f,
+                                     17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f,
+                                     25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f   }; // Input
+
+
+    std::vector<float> expectedOutputValues0 { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f,
+                                               9.0f, 10.0f, 11.0f, 12.0f, 17.0f, 18.0f, 19.0f, 20.0f,
+                                               21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f };
+    std::vector<float> expectedOutputValues1 { 13.0f, 14.0f, 15.0f, 16.0f, 29.0f, 30.0f, 31.0f, 32.0f };
+    std::vector<std::vector<float>> expectedOutputValues{ expectedOutputValues0, expectedOutputValues1 };
+
+    int32_t numSplits = 2;
+
+    SplitVTest<float>(::tflite::TensorType_FLOAT32,
+                      backends,
+                      inputShape,
+                      splitsShape,
+                      axisShape,
+                      outputShapes,
+                      inputValues,
+                      splitsData,
+                      axisData,
+                      expectedOutputValues,
+                      numSplits);
+}
+
+// SPLIT_V Test Suite
+TEST_SUITE("SPLIT_V_CpuRefTests")
+{
+
+TEST_CASE ("SPLIT_V_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SplitVUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_V_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SplitVFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("SPLIT_V_CpuAccTests")
+{
+
+TEST_CASE ("SPLIT_V_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SplitVUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_V_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SplitVFp32Test(backends);
+}
+
+}
+
+TEST_SUITE("SPLIT_V_GpuAccTests")
+{
+
+TEST_CASE ("SPLIT_V_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SplitVUint8Test(backends);
+}
+
+TEST_CASE ("SPLIT_V_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SplitVFp32Test(backends);
+}
+
+}
+// End of SPLIT_V Test Suite
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/test/SplitTestHelper.hpp b/delegate/src/test/SplitTestHelper.hpp
new file mode 100644
index 0000000..31fc7d5
--- /dev/null
+++ b/delegate/src/test/SplitTestHelper.hpp
@@ -0,0 +1,368 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+#include <string>
+
+namespace
+{
+
+std::vector<char> CreateSplitTfLiteModel(tflite::TensorType tensorType,
+                                         std::vector<int32_t>& axisTensorShape,
+                                         std::vector<int32_t>& inputTensorShape,
+                                         const std::vector<std::vector<int32_t>>& outputTensorShapes,
+                                         std::vector<int32_t>& axisData,
+                                         const int32_t numSplits,
+                                         float quantScale = 1.0f,
+                                         int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 2> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(axisData.data()),
+                                                             sizeof(int32_t) * axisData.size()));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 4> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(axisTensorShape.data(),
+                                                                      axisTensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("axis"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+
+    // Create output tensor
+    for (unsigned int i = 0; i < outputTensorShapes.size(); ++i)
+    {
+        tensors[i + 2] = CreateTensor(flatBufferBuilder,
+                                  flatBufferBuilder.CreateVector<int32_t>(outputTensorShapes[i].data(),
+                                                                          outputTensorShapes[i].size()),
+                                  tensorType,
+                                  0,
+                                  flatBufferBuilder.CreateString("output"),
+                                  quantizationParameters);
+    }
+
+    // create operator. Mean uses ReducerOptions.
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_SplitOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateSplitOptions(flatBufferBuilder, numSplits).Union();
+
+    const std::vector<int> operatorInputs{ {0, 1} };
+    const std::vector<int> operatorOutputs{ {2, 3} };
+    flatbuffers::Offset <Operator> controlOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1} };
+    const std::vector<int> subgraphOutputs{ {2, 3} };
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&controlOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: SPLIT Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, BuiltinOperator_SPLIT);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void SplitTest(tflite::TensorType tensorType,
+               std::vector<armnn::BackendId>& backends,
+               std::vector<int32_t>& axisTensorShape,
+               std::vector<int32_t>& inputTensorShape,
+               std::vector<std::vector<int32_t>>& outputTensorShapes,
+               std::vector<int32_t>& axisData,
+               std::vector<T>& inputValues,
+               std::vector<std::vector<T>>& expectedOutputValues,
+               const int32_t numSplits,
+               float quantScale = 1.0f,
+               int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateSplitTfLiteModel(tensorType,
+                                                           axisTensorShape,
+                                                           inputTensorShape,
+                                                           outputTensorShapes,
+                                                           axisData,
+                                                           numSplits,
+                                                           quantScale,
+                                                           quantOffset);
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&armnnDelegate) == kTfLiteOk);
+    CHECK(armnnDelegate != nullptr);
+    CHECK(armnnDelegate->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+              (&tfLiteDelegate) == kTfLiteOk);
+    CHECK(tfLiteDelegate != nullptr);
+    CHECK(tfLiteDelegate->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+    theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                     armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegate->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteDelegate, 1, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegate, 1, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteDelegate->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegate->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    for (unsigned int i = 0; i < expectedOutputValues.size(); ++i)
+    {
+        armnnDelegate::CompareOutputData<T>(tfLiteDelegate,
+                                            armnnDelegate,
+                                            outputTensorShapes[i],
+                                            expectedOutputValues[i],
+                                            i);
+    }
+
+    tfLiteDelegate.reset(nullptr);
+    armnnDelegate.reset(nullptr);
+} // End of SPLIT Test
+
+std::vector<char> CreateSplitVTfLiteModel(tflite::TensorType tensorType,
+                                          std::vector<int32_t>& inputTensorShape,
+                                          std::vector<int32_t>& splitsTensorShape,
+                                          std::vector<int32_t>& axisTensorShape,
+                                          const std::vector<std::vector<int32_t>>& outputTensorShapes,
+                                          std::vector<int32_t>& splitsData,
+                                          std::vector<int32_t>& axisData,
+                                          const int32_t numSplits,
+                                          float quantScale = 1.0f,
+                                          int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(splitsData.data()),
+                                                             sizeof(int32_t) * splitsData.size()));
+    buffers[2] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(axisData.data()),
+                                                             sizeof(int32_t) * axisData.size()));
+
+    auto quantizationParameters =
+        CreateQuantizationParameters(flatBufferBuilder,
+                                     0,
+                                     0,
+                                     flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                     flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::array<flatbuffers::Offset<Tensor>, 5> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(splitsTensorShape.data(),
+                                                                      splitsTensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("splits"),
+                              quantizationParameters);
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(axisTensorShape.data(),
+                                                                      axisTensorShape.size()),
+                              ::tflite::TensorType_INT32,
+                              2,
+                              flatBufferBuilder.CreateString("axis"),
+                              quantizationParameters);
+
+    // Create output tensor
+    for (unsigned int i = 0; i < outputTensorShapes.size(); ++i)
+    {
+        tensors[i + 3] = CreateTensor(flatBufferBuilder,
+                                      flatBufferBuilder.CreateVector<int32_t>(outputTensorShapes[i].data(),
+                                                                              outputTensorShapes[i].size()),
+                                      tensorType,
+                                      0,
+                                      flatBufferBuilder.CreateString("output"),
+                                      quantizationParameters);
+    }
+
+    // create operator. Mean uses ReducerOptions.
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_SplitVOptions;
+    flatbuffers::Offset<void> operatorBuiltinOptions = CreateSplitVOptions(flatBufferBuilder, numSplits).Union();
+
+    const std::vector<int> operatorInputs{ {0, 1, 2} };
+    const std::vector<int> operatorOutputs{ {3, 4} };
+    flatbuffers::Offset <Operator> controlOperator =
+        CreateOperator(flatBufferBuilder,
+                       0,
+                       flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                       operatorBuiltinOptionsType,
+                       operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1, 2} };
+    const std::vector<int> subgraphOutputs{ {3, 4} };
+    flatbuffers::Offset <SubGraph> subgraph =
+        CreateSubGraph(flatBufferBuilder,
+                       flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                       flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                       flatBufferBuilder.CreateVector(&controlOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+        flatBufferBuilder.CreateString("ArmnnDelegate: SPLIT_V Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, BuiltinOperator_SPLIT_V);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+        CreateModel(flatBufferBuilder,
+                    TFLITE_SCHEMA_VERSION,
+                    flatBufferBuilder.CreateVector(&operatorCode, 1),
+                    flatBufferBuilder.CreateVector(&subgraph, 1),
+                    modelDescription,
+                    flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void SplitVTest(tflite::TensorType tensorType,
+                std::vector<armnn::BackendId>& backends,
+                std::vector<int32_t>& inputTensorShape,
+                std::vector<int32_t>& splitsTensorShape,
+                std::vector<int32_t>& axisTensorShape,
+                std::vector<std::vector<int32_t>>& outputTensorShapes,
+                std::vector<T>& inputValues,
+                std::vector<int32_t>& splitsData,
+                std::vector<int32_t>& axisData,
+                std::vector<std::vector<T>>& expectedOutputValues,
+                const int32_t numSplits,
+                float quantScale = 1.0f,
+                int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateSplitVTfLiteModel(tensorType,
+                                                            inputTensorShape,
+                                                            splitsTensorShape,
+                                                            axisTensorShape,
+                                                            outputTensorShapes,
+                                                            splitsData,
+                                                            axisData,
+                                                            numSplits,
+                                                            quantScale,
+                                                            quantOffset);
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegate) == kTfLiteOk);
+    CHECK(armnnDelegate != nullptr);
+    CHECK(armnnDelegate->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteDelegate;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteDelegate) == kTfLiteOk);
+    CHECK(tfLiteDelegate != nullptr);
+    CHECK(tfLiteDelegate->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegate->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteDelegate, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegate, 0, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteDelegate->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegate->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    for (unsigned int i = 0; i < expectedOutputValues.size(); ++i)
+    {
+        armnnDelegate::CompareOutputData<T>(tfLiteDelegate,
+                                            armnnDelegate,
+                                            outputTensorShapes[i],
+                                            expectedOutputValues[i],
+                                            i);
+    }
+
+    tfLiteDelegate.reset(nullptr);
+    armnnDelegate.reset(nullptr);
+} // End of SPLIT_V Test
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/delegate/src/test/TestUtils.hpp b/delegate/src/test/TestUtils.hpp
index 57ae3ce..284eaa7 100644
--- a/delegate/src/test/TestUtils.hpp
+++ b/delegate/src/test/TestUtils.hpp
@@ -51,12 +51,13 @@
 void CompareOutputData(std::unique_ptr<tflite::Interpreter>& tfLiteInterpreter,
                        std::unique_ptr<tflite::Interpreter>& armnnDelegateInterpreter,
                        std::vector<int32_t>& expectedOutputShape,
-                       std::vector<T>& expectedOutputValues)
+                       std::vector<T>& expectedOutputValues,
+                       unsigned int outputIndex = 0)
 {
-    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[0];
+    auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex];
     auto tfLiteDelegateOutputTensor = tfLiteInterpreter->tensor(tfLiteDelegateOutputId);
     auto tfLiteDelegateOutputData = tfLiteInterpreter->typed_tensor<T>(tfLiteDelegateOutputId);
-    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[0];
+    auto armnnDelegateOutputId = armnnDelegateInterpreter->outputs()[outputIndex];
     auto armnnDelegateOutputTensor = armnnDelegateInterpreter->tensor(armnnDelegateOutputId);
     auto armnnDelegateOutputData = armnnDelegateInterpreter->typed_tensor<T>(armnnDelegateOutputId);