IVGCVSW-5392 TfLiteDelegate: Implement the Batch/Space operators

 * Added support for BATCH_TO_SPACE_ND and SPACE_TO_BATCH_ND

Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com>
Change-Id: Ib29333f3e2bc1a18bcc63533da4369b63c45ab90
diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt
index 11bff48..f792821 100644
--- a/delegate/CMakeLists.txt
+++ b/delegate/CMakeLists.txt
@@ -116,6 +116,8 @@
         src/test/ArgMinMaxTest.cpp
         src/test/ArgMinMaxTestHelper.hpp
         src/test/ArmnnDelegateTest.cpp
+        src/test/BatchSpaceTest.cpp
+        src/test/BatchSpaceTestHelper.hpp
         src/test/ComparisonTest.cpp
         src/test/ComparisonTestHelper.hpp
         src/test/ControlTest.cpp
diff --git a/delegate/src/BatchSpace.hpp b/delegate/src/BatchSpace.hpp
index 5a8a5dc..318806f 100644
--- a/delegate/src/BatchSpace.hpp
+++ b/delegate/src/BatchSpace.hpp
@@ -5,8 +5,6 @@
 
 #pragma once
 
-#include <armnn/utility/IgnoreUnused.hpp>
-
 #include <tensorflow/lite/builtin_ops.h>
 #include <tensorflow/lite/c/builtin_op_data.h>
 #include <tensorflow/lite/c/common.h>
@@ -21,12 +19,89 @@
                                          int nodeIndex,
                                          int32_t operatorCode)
 {
-    armnn::IgnoreUnused(delegateData,
-                        tfLiteContext,
-                        tfLiteNode,
-                        nodeIndex,
-                        operatorCode);
-    return kTfLiteError;
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 3, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteBlockShapeTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteBlockShapeTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteCropsTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+    if (!IsValid(tfLiteContext, tfLiteCropsTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo      = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& blockShapeTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteBlockShapeTensor);
+    const armnn::TensorInfo& cropsTensorInfo      = GetTensorInfoForTfLiteTensor(tfLiteCropsTensor);
+    const armnn::TensorInfo& outputTensorInfo     = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    std::vector<unsigned int> blockShape(blockShapeTensorInfo.GetNumElements());
+    ::memcpy(blockShape.data(), tfLiteBlockShapeTensor.data.data, blockShapeTensorInfo.GetNumBytes());
+
+    std::vector<unsigned int> cropsVector(cropsTensorInfo.GetNumElements());
+    std::memcpy(cropsVector.data(), tfLiteCropsTensor.data.data, cropsTensorInfo.GetNumBytes());
+
+    size_t step = 2;
+    std::vector<std::pair<unsigned int, unsigned int>> crops;
+    for (unsigned int i = 0; i < cropsTensorInfo.GetNumElements() / step; ++i)
+    {
+        crops.emplace_back(cropsVector[i * step], cropsVector[i * step + 1]);
+    }
+
+    armnn::BatchToSpaceNdDescriptor descriptor;
+    descriptor.m_BlockShape = blockShape;
+    descriptor.m_Crops = crops;
+    descriptor.m_DataLayout = armnn::DataLayout::NHWC;
+
+    // Check if supported
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsBatchToSpaceNdSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor);
+    };
+
+    // If the m_Network is a nullptr, this signals that a prerequisite TfLite callback is required to clarify the
+    // support for the operator
+    // If supported, VisitBatchToSpaceNdOperator will be called again to add the layer to the network as seen below
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add a BatchToSpace layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddBatchToSpaceNdLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
 }
 
 TfLiteStatus VisitSpaceToBatchNdOperator(DelegateData& delegateData,
@@ -35,12 +110,89 @@
                                          int nodeIndex,
                                          int32_t operatorCode)
 {
-    armnn::IgnoreUnused(delegateData,
-                        tfLiteContext,
-                        tfLiteNode,
-                        nodeIndex,
-                        operatorCode);
-    return kTfLiteError;
+    TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 3, nodeIndex));
+    TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex));
+
+    const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors;
+    const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteInputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteBlockShapeTensor = tfLiteTensors[tfLiteNode->inputs->data[1]];
+    if (!IsValid(tfLiteContext, tfLiteBlockShapeTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLitePadListTensor = tfLiteTensors[tfLiteNode->inputs->data[2]];
+    if (!IsValid(tfLiteContext, tfLitePadListTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]];
+    if (!IsValid(tfLiteContext, tfLiteOutputTensor, operatorCode, nodeIndex))
+    {
+        return kTfLiteError;
+    }
+
+    const armnn::TensorInfo& inputTensorInfo      = GetTensorInfoForTfLiteTensor(tfLiteInputTensor);
+    const armnn::TensorInfo& blockShapeTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteBlockShapeTensor);
+    const armnn::TensorInfo& padListTensorInfo    = GetTensorInfoForTfLiteTensor(tfLitePadListTensor);
+    const armnn::TensorInfo& outputTensorInfo     = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor);
+
+    std::vector<unsigned int> blockShape(blockShapeTensorInfo.GetNumElements());
+    std::memcpy(blockShape.data(), tfLiteBlockShapeTensor.data.data, blockShapeTensorInfo.GetNumBytes());
+
+    std::vector<unsigned int> padListVector(padListTensorInfo.GetNumElements());
+    std::memcpy(padListVector.data(), tfLitePadListTensor.data.data, padListTensorInfo.GetNumBytes());
+
+    size_t step = 2;
+    std::vector<std::pair<unsigned int, unsigned int>> padList;
+    for (unsigned int i = 0; i < padListTensorInfo.GetNumElements() / step; ++i)
+    {
+        padList.emplace_back(padListVector[i * step], padListVector[i * step + 1]);
+    }
+
+    armnn::SpaceToBatchNdDescriptor descriptor;
+    descriptor.m_BlockShape = blockShape;
+    descriptor.m_PadList = padList;
+    descriptor.m_DataLayout = armnn::DataLayout::NHWC;
+
+    // Check if supported
+    bool isSupported = false;
+    auto validateFunc = [&](const armnn::TensorInfo& outputTensorInfo, bool& isSupported)
+    {
+        FORWARD_LAYER_SUPPORT_FUNC(__func__,
+                                   tfLiteContext,
+                                   IsSpaceToBatchNdSupported,
+                                   delegateData.m_Backends,
+                                   isSupported,
+                                   inputTensorInfo,
+                                   outputTensorInfo,
+                                   descriptor);
+    };
+
+    // If the m_Network is a nullptr, this signals that a prerequisite TfLite callback is required to clarify the
+    // support for the operator
+    // If supported, VisitSpaceToBatchNdOperator will be called again to add the layer to the network as seen below
+    if (!delegateData.m_Network)
+    {
+        validateFunc(outputTensorInfo, isSupported);
+        return isSupported ? kTfLiteOk : kTfLiteError;
+    }
+
+    // Add a SpaceToBatch layer
+    armnn::IConnectableLayer* layer = delegateData.m_Network->AddSpaceToBatchNdLayer(descriptor);
+    ARMNN_ASSERT(layer != nullptr);
+
+    armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0);
+    outputSlot.SetTensorInfo(outputTensorInfo);
+
+    // Connect
+    return Connect(layer, tfLiteNode, delegateData);
 }
 
 } // namespace armnnDelegate
diff --git a/delegate/src/test/BatchSpaceTest.cpp b/delegate/src/test/BatchSpaceTest.cpp
new file mode 100644
index 0000000..47eba45
--- /dev/null
+++ b/delegate/src/test/BatchSpaceTest.cpp
@@ -0,0 +1,299 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BatchSpaceTestHelper.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+
+#include <doctest/doctest.h>
+
+namespace armnnDelegate
+{
+
+// BatchToSpaceND Operator
+void BatchToSpaceNDFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 4, 1, 1, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<float> inputValues { 1.0f, 2.0f, 3.0f, 4.0f };
+    std::vector<float> expectedOutputValues { 1.0f, 2.0f, 3.0f, 4.0f };
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    BatchSpaceTest<float>(tflite::BuiltinOperator_BATCH_TO_SPACE_ND,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          inputShape,
+                          expectedOutputShape,
+                          inputValues,
+                          blockShape,
+                          crops,
+                          expectedOutputValues);
+}
+
+void BatchToSpaceNDFp32BatchOneTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 1 };
+
+    std::vector<float> inputValues { 1.0f, 2.0f, 3.0f, 4.0f };
+    std::vector<float> expectedOutputValues { 1.0f, 2.0f, 3.0f, 4.0f };
+
+    std::vector<unsigned int> blockShape({1, 1});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    BatchSpaceTest<float>(tflite::BuiltinOperator_BATCH_TO_SPACE_ND,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          inputShape,
+                          expectedOutputShape,
+                          inputValues,
+                          blockShape,
+                          crops,
+                          expectedOutputValues);
+}
+
+void BatchToSpaceNDUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 4, 1, 1, 3 };
+    std::vector<int32_t> expectedOutputShape { 1, 2, 2, 3 };
+
+    std::vector<uint8_t> inputValues { 1, 2, 3, 4, 5, 6, 7 };
+    std::vector<uint8_t> expectedOutputValues { 1, 2, 3, 4, 5, 6, 7 };
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    BatchSpaceTest<uint8_t>(tflite::BuiltinOperator_BATCH_TO_SPACE_ND,
+                          ::tflite::TensorType_UINT8,
+                          backends,
+                          inputShape,
+                          expectedOutputShape,
+                          inputValues,
+                          blockShape,
+                          crops,
+                          expectedOutputValues);
+}
+
+// SpaceToBatchND Operator
+void SpaceToBatchNDFp32Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 1, 2, 2, 1 };
+    std::vector<int32_t> expectedOutputShape { 4, 1, 1, 1 };
+
+    std::vector<float> inputValues { 1.0f, 2.0f, 3.0f, 4.0f };
+    std::vector<float> expectedOutputValues { 1.0f, 2.0f, 3.0f, 4.0f };
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> padding = {{0, 0}, {0, 0}};
+
+    BatchSpaceTest<float>(tflite::BuiltinOperator_SPACE_TO_BATCH_ND,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          inputShape,
+                          expectedOutputShape,
+                          inputValues,
+                          blockShape,
+                          padding,
+                          expectedOutputValues);
+}
+
+void SpaceToBatchNDFp32PaddingTest(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 2, 2, 4, 1 };
+    std::vector<int32_t> expectedOutputShape { 8, 1, 3, 1 };
+
+    std::vector<float> inputValues { 1.0f,  2.0f,  3.0f,  4.0f,
+                                     5.0f,  6.0f,  7.0f,  8.0f,
+                                     9.0f,  10.0f, 11.0f, 12.0f,
+                                     13.0f, 14.0f, 15.0f, 16.0f };
+
+    std::vector<float> expectedOutputValues { 0.0f, 1.0f, 3.0f,  0.0f, 9.0f, 11.0f,
+                                              0.0f, 2.0f, 4.0f,  0.0f, 10.0f, 12.0f,
+                                              0.0f, 5.0f, 7.0f,  0.0f, 13.0f, 15.0f,
+                                              0.0f, 6.0f, 8.0f,  0.0f, 14.0f, 16.0f };
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> padding = {{0, 0}, {2, 0}};
+
+    BatchSpaceTest<float>(tflite::BuiltinOperator_SPACE_TO_BATCH_ND,
+                          ::tflite::TensorType_FLOAT32,
+                          backends,
+                          inputShape,
+                          expectedOutputShape,
+                          inputValues,
+                          blockShape,
+                          padding,
+                          expectedOutputValues);
+}
+
+void SpaceToBatchNDUint8Test(std::vector<armnn::BackendId>& backends)
+{
+    std::vector<int32_t> inputShape { 1, 2, 2, 3 };
+    std::vector<int32_t> expectedOutputShape { 4, 1, 1, 3 };
+
+    std::vector<uint8_t> inputValues { 1, 2, 3, 4, 5, 6, 7 };
+    std::vector<uint8_t> expectedOutputValues { 1, 2, 3, 4, 5, 6, 7 };
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::pair<unsigned int, unsigned int>> padding = {{0, 0}, {0, 0}};
+
+    BatchSpaceTest<uint8_t>(tflite::BuiltinOperator_SPACE_TO_BATCH_ND,
+                            ::tflite::TensorType_UINT8,
+                            backends,
+                            inputShape,
+                            expectedOutputShape,
+                            inputValues,
+                            blockShape,
+                            padding,
+                            expectedOutputValues);
+}
+
+// BatchToSpaceND Tests
+TEST_SUITE("BatchToSpaceND_CpuAccTests")
+{
+
+TEST_CASE ("BatchToSpaceND_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    BatchToSpaceNDFp32Test(backends);
+}
+
+TEST_CASE ("BatchToSpaceND_Fp32_BatchOne_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    BatchToSpaceNDFp32BatchOneTest(backends);
+}
+
+TEST_CASE ("BatchToSpaceND_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    BatchToSpaceNDUint8Test(backends);
+}
+
+}
+
+TEST_SUITE("BatchToSpaceND_GpuAccTests")
+{
+
+TEST_CASE ("BatchToSpaceND_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    BatchToSpaceNDFp32Test(backends);
+}
+
+TEST_CASE ("BatchToSpaceND_Fp32_BatchOne_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    BatchToSpaceNDFp32BatchOneTest(backends);
+}
+
+TEST_CASE ("BatchToSpaceND_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    BatchToSpaceNDUint8Test(backends);
+}
+
+}
+
+TEST_SUITE("BatchToSpaceND_CpuRefTests")
+{
+
+TEST_CASE ("BatchToSpaceND_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    BatchToSpaceNDFp32Test(backends);
+}
+
+TEST_CASE ("BatchToSpaceND_Fp32_BatchOne_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    BatchToSpaceNDFp32BatchOneTest(backends);
+}
+
+TEST_CASE ("BatchToSpaceND_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    BatchToSpaceNDUint8Test(backends);
+}
+
+}
+
+// SpaceToBatchND Tests
+TEST_SUITE("SpaceToBatchND_CpuAccTests")
+{
+
+TEST_CASE ("SpaceToBatchND_Fp32_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SpaceToBatchNDFp32Test(backends);
+}
+
+TEST_CASE ("SpaceToBatchND_Fp32_Padding_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SpaceToBatchNDFp32PaddingTest(backends);
+}
+
+TEST_CASE ("SpaceToBatchND_Uint8_CpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuAcc};
+    SpaceToBatchNDUint8Test(backends);
+}
+
+}
+
+TEST_SUITE("SpaceToBatchND_GpuAccTests")
+{
+
+TEST_CASE ("SpaceToBatchND_Fp32_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SpaceToBatchNDFp32Test(backends);
+}
+
+TEST_CASE ("SpaceToBatchND_Fp32_Padding_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SpaceToBatchNDFp32PaddingTest(backends);
+}
+
+TEST_CASE ("SpaceToBatchND_Uint8_GpuAcc_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::GpuAcc};
+    SpaceToBatchNDUint8Test(backends);
+}
+
+}
+
+TEST_SUITE("SpaceToBatchND_CpuRefTests")
+{
+
+TEST_CASE ("SpaceToBatchND_Fp32_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SpaceToBatchNDFp32Test(backends);
+}
+
+TEST_CASE ("SpaceToBatchND_Fp32_Padding_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SpaceToBatchNDFp32PaddingTest(backends);
+}
+
+TEST_CASE ("SpaceToBatchND_Uint8_CpuRef_Test")
+{
+    std::vector<armnn::BackendId> backends = {armnn::Compute::CpuRef};
+    SpaceToBatchNDUint8Test(backends);
+}
+
+}
+
+} // namespace armnnDelegate
\ No newline at end of file
diff --git a/delegate/src/test/BatchSpaceTestHelper.hpp b/delegate/src/test/BatchSpaceTestHelper.hpp
new file mode 100644
index 0000000..464a5d9
--- /dev/null
+++ b/delegate/src/test/BatchSpaceTestHelper.hpp
@@ -0,0 +1,216 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "TestUtils.hpp"
+
+#include <armnn_delegate.hpp>
+
+#include <flatbuffers/flatbuffers.h>
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/schema/schema_generated.h>
+#include <tensorflow/lite/version.h>
+
+#include <doctest/doctest.h>
+
+namespace
+{
+
+std::vector<char> CreateBatchSpaceTfLiteModel(tflite::BuiltinOperator batchSpaceOperatorCode,
+                                              tflite::TensorType tensorType,
+                                              std::vector<int32_t>& inputTensorShape,
+                                              std::vector <int32_t>& outputTensorShape,
+                                              std::vector<unsigned int>& blockData,
+                                              std::vector<std::pair<unsigned int, unsigned int>>& cropsPadData,
+                                              float quantScale = 1.0f,
+                                              int quantOffset  = 0)
+{
+    using namespace tflite;
+    flatbuffers::FlatBufferBuilder flatBufferBuilder;
+
+    std::array<flatbuffers::Offset<tflite::Buffer>, 3> buffers;
+    buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({}));
+    buffers[1] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(blockData.data()),
+                                                                  sizeof(int32_t) * blockData.size()));
+    buffers[2] = CreateBuffer(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector(reinterpret_cast<const uint8_t*>(cropsPadData.data()),
+                                                                  sizeof(int64_t) * cropsPadData.size()));
+
+    auto quantizationParameters =
+            CreateQuantizationParameters(flatBufferBuilder,
+                                         0,
+                                         0,
+                                         flatBufferBuilder.CreateVector<float>({ quantScale }),
+                                         flatBufferBuilder.CreateVector<int64_t>({ quantOffset }));
+
+    std::string cropsOrPadding =
+            batchSpaceOperatorCode == tflite::BuiltinOperator_BATCH_TO_SPACE_ND ? "crops" : "padding";
+
+    std::vector<int32_t> blockShape { 2 };
+    std::vector<int32_t> cropsOrPaddingShape { 2, 2 };
+
+    std::array<flatbuffers::Offset<Tensor>, 4> tensors;
+    tensors[0] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(inputTensorShape.data(),
+                                                                      inputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("input"),
+                              quantizationParameters);
+
+    tensors[1] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(blockShape.data(),
+                                                                      blockShape.size()),
+                              ::tflite::TensorType_INT32,
+                              1,
+                              flatBufferBuilder.CreateString("block"),
+                              quantizationParameters);
+
+    tensors[2] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(cropsOrPaddingShape.data(),
+                                                                      cropsOrPaddingShape.size()),
+                              ::tflite::TensorType_INT32,
+                              2,
+                              flatBufferBuilder.CreateString(cropsOrPadding),
+                              quantizationParameters);
+
+    // Create output tensor
+    tensors[3] = CreateTensor(flatBufferBuilder,
+                              flatBufferBuilder.CreateVector<int32_t>(outputTensorShape.data(),
+                                                                      outputTensorShape.size()),
+                              tensorType,
+                              0,
+                              flatBufferBuilder.CreateString("output"),
+                              quantizationParameters);
+
+    // Create operator
+    tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_NONE;
+    flatbuffers::Offset<void> operatorBuiltinOptions = 0;
+    switch (batchSpaceOperatorCode)
+    {
+        case tflite::BuiltinOperator_BATCH_TO_SPACE_ND:
+        {
+            operatorBuiltinOptionsType = tflite::BuiltinOptions_BatchToSpaceNDOptions;
+            operatorBuiltinOptions = CreateBatchToSpaceNDOptions(flatBufferBuilder).Union();
+            break;
+        }
+        case tflite::BuiltinOperator_SPACE_TO_BATCH_ND:
+        {
+            operatorBuiltinOptionsType = tflite::BuiltinOptions_SpaceToBatchNDOptions;
+            operatorBuiltinOptions = CreateSpaceToBatchNDOptions(flatBufferBuilder).Union();
+            break;
+        }
+        default:
+            break;
+    }
+
+    const std::vector<int> operatorInputs{ {0, 1, 2} };
+    const std::vector<int> operatorOutputs{ 3 };
+    flatbuffers::Offset <Operator> batchSpaceOperator =
+            CreateOperator(flatBufferBuilder,
+                           0,
+                           flatBufferBuilder.CreateVector<int32_t>(operatorInputs.data(), operatorInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(operatorOutputs.data(), operatorOutputs.size()),
+                           operatorBuiltinOptionsType,
+                           operatorBuiltinOptions);
+
+    const std::vector<int> subgraphInputs{ {0, 1, 2} };
+    const std::vector<int> subgraphOutputs{ 3 };
+    flatbuffers::Offset <SubGraph> subgraph =
+            CreateSubGraph(flatBufferBuilder,
+                           flatBufferBuilder.CreateVector(tensors.data(), tensors.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphInputs.data(), subgraphInputs.size()),
+                           flatBufferBuilder.CreateVector<int32_t>(subgraphOutputs.data(), subgraphOutputs.size()),
+                           flatBufferBuilder.CreateVector(&batchSpaceOperator, 1));
+
+    flatbuffers::Offset <flatbuffers::String> modelDescription =
+            flatBufferBuilder.CreateString("ArmnnDelegate: BatchSpace Operator Model");
+    flatbuffers::Offset <OperatorCode> operatorCode = CreateOperatorCode(flatBufferBuilder, batchSpaceOperatorCode);
+
+    flatbuffers::Offset <Model> flatbufferModel =
+            CreateModel(flatBufferBuilder,
+                        TFLITE_SCHEMA_VERSION,
+                        flatBufferBuilder.CreateVector(&operatorCode, 1),
+                        flatBufferBuilder.CreateVector(&subgraph, 1),
+                        modelDescription,
+                        flatBufferBuilder.CreateVector(buffers.data(), buffers.size()));
+
+    flatBufferBuilder.Finish(flatbufferModel);
+
+    return std::vector<char>(flatBufferBuilder.GetBufferPointer(),
+                             flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize());
+}
+
+template <typename T>
+void BatchSpaceTest(tflite::BuiltinOperator controlOperatorCode,
+                    tflite::TensorType tensorType,
+                    std::vector<armnn::BackendId>& backends,
+                    std::vector<int32_t>& inputShape,
+                    std::vector<int32_t>& expectedOutputShape,
+                    std::vector<T>& inputValues,
+                    std::vector<unsigned int>& blockShapeValues,
+                    std::vector<std::pair<unsigned int, unsigned int>>& cropsPaddingValues,
+                    std::vector<T>& expectedOutputValues,
+                    float quantScale = 1.0f,
+                    int quantOffset  = 0)
+{
+    using namespace tflite;
+    std::vector<char> modelBuffer = CreateBatchSpaceTfLiteModel(controlOperatorCode,
+                                                                tensorType,
+                                                                inputShape,
+                                                                expectedOutputShape,
+                                                                blockShapeValues,
+                                                                cropsPaddingValues,
+                                                                quantScale,
+                                                                quantOffset);
+
+    const Model* tfLiteModel = GetModel(modelBuffer.data());
+
+    // Create TfLite Interpreters
+    std::unique_ptr<Interpreter> armnnDelegateInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&armnnDelegateInterpreter) == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter != nullptr);
+    CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk);
+
+    std::unique_ptr<Interpreter> tfLiteInterpreter;
+    CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver())
+                  (&tfLiteInterpreter) == kTfLiteOk);
+    CHECK(tfLiteInterpreter != nullptr);
+    CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk);
+
+    // Create the ArmNN Delegate
+    armnnDelegate::DelegateOptions delegateOptions(backends);
+    std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
+            theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
+                             armnnDelegate::TfLiteArmnnDelegateDelete);
+    CHECK(theArmnnDelegate != nullptr);
+
+    // Modify armnnDelegateInterpreter to use armnnDelegate
+    CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk);
+
+    // Set input data
+    armnnDelegate::FillInput<T>(tfLiteInterpreter, 0, inputValues);
+    armnnDelegate::FillInput<T>(armnnDelegateInterpreter, 0, inputValues);
+
+    // Run EnqueWorkload
+    CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk);
+    CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk);
+
+    // Compare output data
+    armnnDelegate::CompareOutputData<T>(tfLiteInterpreter,
+                                        armnnDelegateInterpreter,
+                                        expectedOutputShape,
+                                        expectedOutputValues);
+
+    armnnDelegateInterpreter.reset(nullptr);
+    tfLiteInterpreter.reset(nullptr);
+}
+
+} // anonymous namespace
\ No newline at end of file
diff --git a/docs/01_03_delegate.dox b/docs/01_03_delegate.dox
index 73d8690..9063f05 100644
--- a/docs/01_03_delegate.dox
+++ b/docs/01_03_delegate.dox
@@ -43,6 +43,8 @@
 
 - AVERAGE_POOL_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE
 
+- BATCH_TO_SPACE_ND
+
 - CONCATENATION, Supported Fused Activation: RELU , RELU6 , TANH, NONE
 
 - CONV_2D, Supported Fused Activation: RELU , RELU6 , TANH, NONE
@@ -125,6 +127,8 @@
 
 - SOFTMAX
 
+- SPACE_TO_BATCH_ND
+
 - SPACE_TO_DEPTH
 
 - SPLIT