IVGCVSW-2054: BATCH_TO_SPACE_ND Reference implementation and Unit tests.

Change-Id: I13c6728dbb60643d0e086d171225c5d802987f92
diff --git a/Android.mk b/Android.mk
index ee24312..4742aed 100644
--- a/Android.mk
+++ b/Android.mk
@@ -80,6 +80,7 @@
         src/armnn/layers/AdditionLayer.cpp \
         src/armnn/layers/ArithmeticBaseLayer.cpp \
         src/armnn/layers/BatchNormalizationLayer.cpp \
+        src/armnn/layers/BatchToSpaceNdLayer.cpp \
         src/armnn/layers/ConstantLayer.cpp \
         src/armnn/layers/Convolution2dLayer.cpp \
         src/armnn/layers/ConvertFp16ToFp32Layer.cpp \
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 48176c1..95ca39f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -175,6 +175,8 @@
     src/armnn/layers/ArithmeticBaseLayer.cpp
     src/armnn/layers/BatchNormalizationLayer.hpp
     src/armnn/layers/BatchNormalizationLayer.cpp
+    src/armnn/layers/BatchToSpaceNdLayer.hpp
+    src/armnn/layers/BatchToSpaceNdLayer.cpp
     src/armnn/layers/ConstantLayer.hpp
     src/armnn/layers/ConstantLayer.cpp
     src/armnn/layers/Convolution2dLayer.hpp
diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp
index 656afb1..bda8cf7 100644
--- a/include/armnn/Descriptors.hpp
+++ b/include/armnn/Descriptors.hpp
@@ -296,6 +296,25 @@
     DataLayoutIndexed m_DataLayout;
 };
 
+struct BatchToSpaceNdDescriptor
+{
+    BatchToSpaceNdDescriptor()
+        : m_BlockShape()
+        , m_Crops()
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    BatchToSpaceNdDescriptor(std::vector<unsigned int> blockShape, std::vector<std::vector<unsigned int>> crops)
+        : m_BlockShape(blockShape)
+        , m_Crops(crops)
+        , m_DataLayout(DataLayout::NCHW)
+    {}
+
+    std::vector<unsigned int> m_BlockShape;
+    std::vector<std::vector<unsigned int>> m_Crops;
+    DataLayoutIndexed m_DataLayout;
+};
+
 struct FakeQuantizationDescriptor
 {
     FakeQuantizationDescriptor()
diff --git a/include/armnn/DescriptorsFwd.hpp b/include/armnn/DescriptorsFwd.hpp
index a2974d7..0e96c36 100644
--- a/include/armnn/DescriptorsFwd.hpp
+++ b/include/armnn/DescriptorsFwd.hpp
@@ -8,6 +8,7 @@
 {
 struct ActivationDescriptor;
 struct BatchNormalizationDescriptor;
+struct BatchToSpaceNdDescriptor;
 struct Convolution2dDescriptor;
 struct DepthwiseConvolution2dDescriptor;
 struct FakeQuantizationDescriptor;
diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp
index 0e7adff..1bf268f 100644
--- a/include/armnn/ILayerSupport.hpp
+++ b/include/armnn/ILayerSupport.hpp
@@ -41,6 +41,11 @@
                                                const BatchNormalizationDescriptor& descriptor,
                                                Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
 
+    virtual bool IsBatchToSpaceNdSupported(const TensorInfo& input,
+                                           const TensorInfo& output,
+                                           const BatchToSpaceNdDescriptor& descriptor,
+                                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
     virtual bool IsConstantSupported(const TensorInfo& output,
                                      Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
 
diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp
index 2cb8f28..df274d6 100644
--- a/include/armnn/INetwork.hpp
+++ b/include/armnn/INetwork.hpp
@@ -149,6 +149,13 @@
     virtual IConnectableLayer* AddPermuteLayer(const PermuteDescriptor& permuteDescriptor,
                                                const char* name = nullptr) = 0;
 
+    /// Adds a batch to space ND layer to the network.
+    /// @param batchToSpaceNdDescriptor - Description of the layer.
+    /// @param name - Optional name for the layer.
+    /// @return - Interface for configuring the layer.
+    virtual IConnectableLayer* AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
+                                                      const char* name = nullptr) = 0;
+
     /// Adds a pooling layer to the network.
     /// @param pooling2dDescriptor - Pooling2dDescriptor to configure the pooling.
     /// @param name - Optional name for the layer.
diff --git a/include/armnn/LayerSupport.hpp b/include/armnn/LayerSupport.hpp
index eb6b5da..bd20f18 100644
--- a/include/armnn/LayerSupport.hpp
+++ b/include/armnn/LayerSupport.hpp
@@ -41,6 +41,14 @@
                                    size_t reasonIfUnsupportedMaxLength = 1024);
 
 /// Deprecated in favor of IBackend and ILayerSupport interfaces
+bool IsBatchToSpaceNdSupported(const BackendId& backend,
+                               const TensorInfo& input,
+                               const TensorInfo& output,
+                               const BatchToSpaceNdDescriptor& descriptor,
+                               char* reasonIfUnsupported = nullptr,
+                               size_t reasonIfUnsupportedMaxLength = 1024);
+
+/// Deprecated in favor of IBackend and ILayerSupport interfaces
 bool IsConstantSupported(const BackendId& backend,
                          const TensorInfo& output,
                          char* reasonIfUnsupported = nullptr,
diff --git a/src/armnn/InternalTypes.cpp b/src/armnn/InternalTypes.cpp
index 931b6a3..3493a3d 100644
--- a/src/armnn/InternalTypes.cpp
+++ b/src/armnn/InternalTypes.cpp
@@ -17,6 +17,7 @@
         case LayerType::Activation: return "Activation";
         case LayerType::Addition: return "Addition";
         case LayerType::BatchNormalization: return "BatchNormalization";
+        case LayerType::BatchToSpaceNd: return "BatchToSpaceNd";
         case LayerType::Constant: return "Constant";
         case LayerType::ConvertFp16ToFp32: return "ConvertFp16ToFp32";
         case LayerType::ConvertFp32ToFp16: return "ConvertFp32ToFp16";
diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp
index 0661b16..dc3c55e 100644
--- a/src/armnn/InternalTypes.hpp
+++ b/src/armnn/InternalTypes.hpp
@@ -17,6 +17,7 @@
     Activation = FirstLayer,
     Addition,
     BatchNormalization,
+    BatchToSpaceNd,
     Constant,
     ConvertFp16ToFp32,
     ConvertFp32ToFp16,
diff --git a/src/armnn/LayerSupport.cpp b/src/armnn/LayerSupport.cpp
index fb3ce43..5d2d205 100644
--- a/src/armnn/LayerSupport.cpp
+++ b/src/armnn/LayerSupport.cpp
@@ -12,6 +12,7 @@
 #include <cstring>
 #include <algorithm>
 #include <unordered_map>
+#include <armnn/ArmNN.hpp>
 
 namespace armnn
 {
@@ -100,6 +101,20 @@
                                descriptor);
 }
 
+bool IsBatchToSpaceNdSupported(const BackendId& backend,
+                               const TensorInfo& input,
+                               const TensorInfo& output,
+                               const BatchToSpaceNdDescriptor& descriptor,
+                               char* reasonIfUnsupported,
+                               size_t reasonIfUnsupportedMaxLength)
+{
+    FORWARD_LAYER_SUPPORT_FUNC(backend,
+                               IsBatchToSpaceNdSupported,
+                               input,
+                               output,
+                               descriptor);
+}
+
 bool IsConstantSupported(const BackendId& backend,
                          const TensorInfo& output,
                          char* reasonIfUnsupported,
diff --git a/src/armnn/LayersFwd.hpp b/src/armnn/LayersFwd.hpp
index 5c08b66..bd1297b 100644
--- a/src/armnn/LayersFwd.hpp
+++ b/src/armnn/LayersFwd.hpp
@@ -9,6 +9,7 @@
 #include "layers/ActivationLayer.hpp"
 #include "layers/AdditionLayer.hpp"
 #include "layers/BatchNormalizationLayer.hpp"
+#include "layers/BatchToSpaceNdLayer.hpp"
 #include "layers/ConstantLayer.hpp"
 #include "layers/ConvertFp16ToFp32Layer.hpp"
 #include "layers/ConvertFp32ToFp16Layer.hpp"
@@ -67,6 +68,7 @@
 DECLARE_LAYER(Activation)
 DECLARE_LAYER(Addition)
 DECLARE_LAYER(BatchNormalization)
+DECLARE_LAYER(BatchToSpaceNd)
 DECLARE_LAYER(Constant)
 DECLARE_LAYER(ConvertFp16ToFp32)
 DECLARE_LAYER(ConvertFp32ToFp16)
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 7b430c3..3b3ee31 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -351,6 +351,12 @@
     return m_Graph->AddLayer<InputLayer>(id, name);
 }
 
+IConnectableLayer* Network::AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
+                                            const char* name)
+{
+    return m_Graph->AddLayer<BatchToSpaceNdLayer>(batchToSpaceNdDescriptor, name);
+}
+
 IConnectableLayer* Network::AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor,
                                                        const ConstTensor& weights,
                                                        const ConstTensor* biases,
diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp
index 4a93dd1..95cdb28 100644
--- a/src/armnn/Network.hpp
+++ b/src/armnn/Network.hpp
@@ -34,6 +34,9 @@
 
     IConnectableLayer* AddInputLayer(LayerBindingId id, const char* name=nullptr) override;
 
+    IConnectableLayer* AddBatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
+                                              const char* name = nullptr) override;
+
     IConnectableLayer* AddConvolution2dLayer(const Convolution2dDescriptor& convolution2dDescriptor,
         const ConstTensor& weights,
         const char* name = nullptr) override;
diff --git a/src/armnn/layers/BatchToSpaceNdLayer.cpp b/src/armnn/layers/BatchToSpaceNdLayer.cpp
new file mode 100644
index 0000000..595ce4a
--- /dev/null
+++ b/src/armnn/layers/BatchToSpaceNdLayer.cpp
@@ -0,0 +1,89 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include "BatchToSpaceNdLayer.hpp"
+
+#include "LayerCloneBase.hpp"
+#include "LayerWithParameters.hpp"
+#include "BatchToSpaceNdLayer.hpp"
+
+#include <armnn/TypesUtils.hpp>
+#include <backendsCommon/CpuTensorHandle.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+namespace armnn
+{
+
+BatchToSpaceNdLayer::BatchToSpaceNdLayer(const armnn::BatchToSpaceNdDescriptor& param, const char* name)
+    : LayerWithParameters(1, 1, LayerType::BatchToSpaceNd, param, name)
+{
+}
+
+std::unique_ptr<IWorkload> BatchToSpaceNdLayer::CreateWorkload(const Graph& graph,
+                                                               const IWorkloadFactory& factory) const
+{
+    BatchToSpaceNdQueueDescriptor descriptor;
+
+    return factory.CreateBatchToSpaceNd(descriptor, PrepInfoAndDesc(descriptor, graph));
+}
+
+BatchToSpaceNdLayer* BatchToSpaceNdLayer::Clone(Graph& graph) const
+{
+    auto layer = CloneBase<BatchToSpaceNdLayer>(graph, m_Param, GetName());
+    return std::move(layer);
+}
+
+void BatchToSpaceNdLayer::ValidateTensorShapesFromInputs()
+{
+    VerifyLayerConnections(1, CHECK_LOCATION());
+
+    auto inferredShapes = InferOutputShapes({GetInputSlot(0).GetConnection()->GetTensorInfo().GetShape()});
+
+    BOOST_ASSERT(inferredShapes.size() == 1);
+
+    ConditionalThrowIfNotEqual<LayerValidationException>(
+        "BatchToSpaceLayer: TensorShape set on OutputSlot[0] does not match the inferred shape.",
+        GetOutputSlot(0).GetTensorInfo().GetShape(),inferredShapes[0]);
+}
+
+std::vector<TensorShape> BatchToSpaceNdLayer::InferOutputShapes(const std::vector<TensorShape>& inputShapes) const
+{
+    const DataLayoutIndexed & dataLayout = m_Param.m_DataLayout;
+    const TensorShape& inputShape = inputShapes[0];
+    unsigned int inBatchSize = inputShape[0];
+    unsigned int channelSize = inputShape[dataLayout.GetChannelsIndex()];
+
+    std::vector<unsigned int> theBlockShape = m_Param.m_BlockShape;
+
+    unsigned int overallSize = inBatchSize;
+
+    for (unsigned int i = 0; i < theBlockShape.size(); ++i)
+    {
+        overallSize = overallSize * theBlockShape.at(i);
+    }
+
+    std::vector<std::vector<unsigned int>> crops = m_Param.m_Crops;
+
+    std::vector<unsigned int> yCrops = crops[0];
+    std::vector<unsigned int> xCrops = crops[1];
+
+    unsigned int inputHeight = inputShape[dataLayout.GetHeightIndex()];
+    unsigned int outputHeight = theBlockShape.at(0) * (inputHeight - (yCrops[0] + yCrops[1]));
+
+    unsigned int inputWidth = inputShape[dataLayout.GetWidthIndex()];
+    unsigned int outputWidth = theBlockShape.at(1) * (inputWidth - (xCrops[0] + xCrops[1]));
+
+    unsigned int outputBatchSize = overallSize / (outputHeight * outputWidth);
+
+    if (dataLayout == DataLayout::NHWC)
+    {
+        return std::vector<TensorShape>({ TensorShape({ outputBatchSize, outputHeight, outputWidth, channelSize }) });
+    }
+    else
+    {
+        return std::vector<TensorShape>({ TensorShape({ outputBatchSize, channelSize, outputHeight, outputWidth }) });
+    }
+}
+} // namespace armnn
diff --git a/src/armnn/layers/BatchToSpaceNdLayer.hpp b/src/armnn/layers/BatchToSpaceNdLayer.hpp
new file mode 100644
index 0000000..eb5f979
--- /dev/null
+++ b/src/armnn/layers/BatchToSpaceNdLayer.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "LayerWithParameters.hpp"
+
+namespace armnn
+{
+
+class BatchToSpaceNdLayer : public LayerWithParameters<BatchToSpaceNdDescriptor>
+{
+public:
+    virtual std::unique_ptr<IWorkload> CreateWorkload(const Graph&            graph,
+                                                      const IWorkloadFactory& factory) const override;
+
+    BatchToSpaceNdLayer* Clone(Graph& graph) const override;
+
+    void ValidateTensorShapesFromInputs() override;
+
+    std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const override;
+
+protected:
+    BatchToSpaceNdLayer(const BatchToSpaceNdDescriptor& param, const char* name);
+    ~BatchToSpaceNdLayer() = default;
+};
+
+} // namespace
diff --git a/src/backends/backendsCommon/ILayerSupport.cpp b/src/backends/backendsCommon/ILayerSupport.cpp
index ebfff5d..2cd57b7 100644
--- a/src/backends/backendsCommon/ILayerSupport.cpp
+++ b/src/backends/backendsCommon/ILayerSupport.cpp
@@ -59,6 +59,14 @@
     return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
 }
 
+bool ILayerSupport::IsBatchToSpaceNdSupported(const TensorInfo& input,
+                                              const TensorInfo& output,
+                                              const BatchToSpaceNdDescriptor& descriptor,
+                                              Optional<std::string&> reasonIfUnsupported) const
+{
+    return DefaultLayerSupport(__func__, __FILE__, __LINE__, reasonIfUnsupported);
+}
+
 bool ILayerSupport::IsConstantSupported(const TensorInfo& output,
                                         Optional<std::string&> reasonIfUnsupported) const
 {
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index 7c02947..9fbdfe9 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -918,4 +918,10 @@
     }
 }
 
-} //namespace armnn
+void BatchToSpaceNdQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+{
+    ValidateSingleInput(workloadInfo, "BatchToSpaceNdQueueDescriptor");
+    ValidateSingleOutput(workloadInfo, "BatchToSpaceNdQueueDescriptor");
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/backendsCommon/WorkloadData.hpp b/src/backends/backendsCommon/WorkloadData.hpp
index 7fb8855..d54a71a 100644
--- a/src/backends/backendsCommon/WorkloadData.hpp
+++ b/src/backends/backendsCommon/WorkloadData.hpp
@@ -335,4 +335,8 @@
     void Validate(const WorkloadInfo& workloadInfo) const;
 };
 
+struct BatchToSpaceNdQueueDescriptor : QueueDescriptorWithParameters<BatchToSpaceNdDescriptor>
+{
+    void Validate(const WorkloadInfo& workloadInfo) const;
+};
 } //namespace armnn
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index 9f97452..ec30f34 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -116,6 +116,18 @@
                                                    reason);
             break;
         }
+        case LayerType::BatchToSpaceNd:
+        {
+            const TensorInfo& input = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
+            const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
+            auto cLayer = boost::polymorphic_downcast<const BatchToSpaceNdLayer*>(&layer);
+
+            result = layerSupportObject->IsBatchToSpaceNdSupported(OverrideDataType(input, dataType),
+                                                                   OverrideDataType(output, dataType),
+                                                                   cLayer->GetParameters(),
+                                                                   reason);
+            break;
+        }
         case LayerType::Constant:
         {
             const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp
index 67876e1..e3be9f5 100644
--- a/src/backends/backendsCommon/WorkloadFactory.hpp
+++ b/src/backends/backendsCommon/WorkloadFactory.hpp
@@ -97,6 +97,9 @@
     virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor,
                                                                 const WorkloadInfo& info) const = 0;
 
+    virtual std::unique_ptr<IWorkload> CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
+                                                            const WorkloadInfo& Info) const = 0;
+
     virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor,
                                                      const WorkloadInfo& info) const = 0;
 
diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
index 2c992bc..2507905 100644
--- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
+++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
@@ -92,6 +92,20 @@
 };
 
 template<>
+struct DummyLayer<armnn::BatchToSpaceNdLayer>
+{
+    DummyLayer()
+    {
+        m_Layer = dummyGraph.AddLayer<armnn::BatchToSpaceNdLayer>(armnn::BatchToSpaceNdDescriptor(), "");
+    }
+    ~DummyLayer()
+    {
+        dummyGraph.EraseLayer(m_Layer);
+    }
+    armnn::BatchToSpaceNdLayer* m_Layer;
+};
+
+template<>
 struct DummyLayer<armnn::ConstantLayer, void>
 {
     DummyLayer()
@@ -306,6 +320,8 @@
 
 DECLARE_LAYER_POLICY_2_PARAM(BatchNormalization)
 
+DECLARE_LAYER_POLICY_2_PARAM(BatchToSpaceNd)
+
 DECLARE_LAYER_POLICY_1_PARAM(Constant)
 
 DECLARE_LAYER_POLICY_1_PARAM(ConvertFp16ToFp32)
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
index cdc989f..4a00303 100755
--- a/src/backends/backendsCommon/test/LayerTests.cpp
+++ b/src/backends/backendsCommon/test/LayerTests.cpp
@@ -6169,3 +6169,170 @@
 {
     return SpaceToBatchNdPaddingNHWCTest<uint8_t>(workloadFactory);
 }
+
+namespace {
+
+template<typename T, std::size_t InputDim, std::size_t OutputDim>
+LayerTestResult<T, OutputDim> BatchToSpaceNdHelper(armnn::IWorkloadFactory &workloadFactory,
+                                                   const armnn::DataLayout& dataLayout,
+                                                   const unsigned int *inputShape,
+                                                   const std::vector<T> &inputData,
+                                                   const std::vector<unsigned int> &blockShape,
+                                                   const std::vector<std::vector<unsigned int>> &crops,
+                                                   const unsigned int *outputShape,
+                                                   const std::vector<T> &outputData,
+                                                   float scale = 1.0f,
+                                                   int32_t offset = 0)
+  {
+    auto dataType = (std::is_same<T, uint8_t>::value ? armnn::DataType::QuantisedAsymm8 : armnn::DataType::Float32);
+
+    armnn::TensorInfo inputTensorInfo(InputDim, inputShape, dataType);
+    armnn::TensorInfo outputTensorInfo(OutputDim, outputShape, dataType);
+
+    inputTensorInfo.SetQuantizationScale(scale);
+    inputTensorInfo.SetQuantizationOffset(offset);
+
+    outputTensorInfo.SetQuantizationScale(scale);
+    outputTensorInfo.SetQuantizationOffset(offset);
+
+    auto input = MakeTensor<T, InputDim>(inputTensorInfo, inputData);
+
+    LayerTestResult<T, OutputDim> result(outputTensorInfo);
+    result.outputExpected = MakeTensor<T, OutputDim>(outputTensorInfo, outputData);
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::BatchToSpaceNdQueueDescriptor data;
+    data.m_Parameters.m_DataLayout = dataLayout;
+    data.m_Parameters.m_BlockShape = blockShape;
+    data.m_Parameters.m_Crops = crops;
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateBatchToSpaceNd(data, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), input.origin());
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&result.output[0][0][0][0], outputHandle.get());
+
+    return result;
+}
+
+} // anonymous namespace
+
+LayerTestResult<float, 4> BatchToSpaceNdNhwcFloat32Test1(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = {4, 2, 2, 1};
+    const unsigned int outputShape[] = {1, 4, 4, 1 };
+
+    std::vector<float> input
+    ({
+        // Batch 0, Height 0, Width (2) x Channel (1)
+        1.0f, 3.0f,
+        // Batch 0, Height 1, Width (2) x Channel (1)
+        9.0f, 11.0f,
+
+
+        // Batch 1, Height 0, Width (2) x Channel (1)
+        2.0f, 4.0f,
+        // Batch 1, Height 1, Width (2) x Channel (1)
+        10.0f, 12.0f,
+
+
+        // Batch 2, Height 0, Width (2) x Channel (1)
+        5.0f, 7.0f,
+        // Batch 2, Height 1, Width (2) x Channel (1)
+        13.0f, 15.0f,
+
+        // Batch 3, Height 0, Width (2) x Channel (3)
+        6.0f, 8.0f,
+        // Batch 3, Height 1, Width (2) x Channel (1)
+        14.0f, 16.0f
+    });
+
+    std::vector<float> expectedOutput
+    ({
+        1.0f,   2.0f,  3.0f,  4.0f,
+        5.0f,   6.0f,  7.0f,  8.0f,
+        9.0f,  10.0f, 11.0f,  12.0f,
+        13.0f, 14.0f, 15.0f,  16.0f
+    });
+
+    std::vector<unsigned int> blockShape {2, 2};
+    std::vector<std::vector<unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<float, 4, 4>(workloadFactory, armnn::DataLayout::NHWC, inputShape, input, blockShape,
+            crops, outputShape, expectedOutput);
+}
+
+LayerTestResult<float, 4> BatchToSpaceNdNhwcFloat32Test2(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = {4, 1, 1, 1};
+    const unsigned int outputShape[] = {1, 2, 2, 1};
+
+    std::vector<float> input
+    ({
+         // Batch 0, Height 0, Width (2) x Channel (1)
+         1.0f, 2.0f, 3.0f, 4.0f
+    });
+
+    std::vector<float> expectedOutput({1.0f,   2.0f,  3.0f,  4.0f});
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::vector<unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<float, 4, 4>(workloadFactory, armnn::DataLayout::NHWC, inputShape, input, blockShape,
+                                             crops, outputShape, expectedOutput);
+}
+
+LayerTestResult<float, 4> BatchToSpaceNdNhwcFloat32Test3(armnn::IWorkloadFactory& workloadFactory)
+{
+    const unsigned int inputShape[] = {4, 1, 1, 3};
+    const unsigned int outputShape[] = {1, 2, 2, 3};
+
+    std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f });
+
+    std::vector<float> expectedOutput({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f });
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::vector<unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<float, 4, 4>(workloadFactory, armnn::DataLayout::NHWC, inputShape, input, blockShape,
+                                             crops, outputShape, expectedOutput);
+}
+
+LayerTestResult<float, 4> BatchToSpaceNdNchwFloat32Test1(armnn::IWorkloadFactory &workloadFactory)
+{
+    const unsigned int inputShape[] = {4, 3, 1, 1};
+    const unsigned int outputShape[] = {1, 3, 2, 2};
+
+    std::vector<float> input({ 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f });
+
+    std::vector<float> expectedOutput
+    ({
+         // Batch 0, Channel 0, Height (2) x Width (2)
+         1.0f,  4.0f,
+         7.0f, 10.0f,
+
+         // Batch 0, Channel 1, Height (2) x Width (2)
+         2.0f,  5.0f,
+         8.0f, 11.0f,
+
+         // Batch 0, Channel 2, Height (2) x Width (2)
+         3.0f,  6.0f,
+         9.0f, 12.0f,
+    });
+
+    std::vector<unsigned int> blockShape({2, 2});
+    std::vector<std::vector<unsigned int>> crops = {{0, 0}, {0, 0}};
+
+    return BatchToSpaceNdHelper<float, 4, 4>(workloadFactory, armnn::DataLayout::NCHW, inputShape, input, blockShape,
+                                             crops, outputShape, expectedOutput);
+}
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index 66032c8..cd8758e 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -434,3 +434,9 @@
 LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiChannelsNHWCUint8Test(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<uint8_t, 4> SpaceToBatchNdMultiBlockNHWCUint8Test(armnn::IWorkloadFactory& workloadFactory);
 LayerTestResult<uint8_t, 4> SpaceToBatchNdPaddingNHWCUint8Test(armnn::IWorkloadFactory& workloadFactory);
+
+LayerTestResult<float, 4> BatchToSpaceNdNhwcFloat32Test1(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> BatchToSpaceNdNhwcFloat32Test2(armnn::IWorkloadFactory& workloadFactory);
+LayerTestResult<float, 4> BatchToSpaceNdNhwcFloat32Test3(armnn::IWorkloadFactory& workloadFactory);
+
+LayerTestResult<float, 4> BatchToSpaceNdNchwFloat32Test1(armnn::IWorkloadFactory &workloadFactory);
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index eece934..0862ea1 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -314,6 +314,12 @@
     return MakeWorkload<ClPadWorkload>(descriptor, info);
 }
 
+std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
+                                                                   const WorkloadInfo& info) const
+{
+    return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+}
+
 void ClWorkloadFactory::Release()
 {
     m_MemoryManager.Release();
@@ -530,6 +536,12 @@
     return nullptr;
 }
 
+std::unique_ptr<IWorkload> ClWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
+                                                        const WorkloadInfo& info) const
+{
+    return nullptr;
+}
+
 void ClWorkloadFactory::Release()
 {
 }
diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp
index c45bc15..6a928db 100644
--- a/src/backends/cl/ClWorkloadFactory.hpp
+++ b/src/backends/cl/ClWorkloadFactory.hpp
@@ -126,6 +126,9 @@
     virtual std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor,
                                                  const WorkloadInfo& info) const override;
 
+    virtual std::unique_ptr<IWorkload> CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
+                                                            const WorkloadInfo& info) const override;
+
     virtual void Release() override;
 
     virtual void Acquire() override;
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index 11b5634..f0d916b 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -279,6 +279,12 @@
     return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
 }
 
+std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
+                                                                     const WorkloadInfo& info) const
+{
+    return MakeWorkloadHelper<NullWorkload, NullWorkload>(descriptor, info);
+}
+
 void NeonWorkloadFactory::Release()
 {
     m_MemoryManager.Release();
@@ -495,6 +501,12 @@
     return nullptr;
 }
 
+std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
+                                                                     const WorkloadInfo& info) const
+{
+    return nullptr;
+}
+
 void NeonWorkloadFactory::Release()
 {}
 
diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp
index 9b574f6..98f323a 100644
--- a/src/backends/neon/NeonWorkloadFactory.hpp
+++ b/src/backends/neon/NeonWorkloadFactory.hpp
@@ -128,6 +128,9 @@
     virtual std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor,
                                                  const WorkloadInfo& info) const override;
 
+    virtual std::unique_ptr<IWorkload> CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
+                                                            const WorkloadInfo& Info) const override;
+
     virtual void Release() override;
 
     virtual void Acquire() override;
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index 0902b0f..b057370 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -101,6 +101,22 @@
                                      &TrueFunc<>);
 }
 
+bool RefLayerSupport::IsBatchToSpaceNdSupported(const TensorInfo& input,
+                                                const TensorInfo& output,
+                                                const BatchToSpaceNdDescriptor& descriptor,
+                                                Optional<std::string&> reasonIfUnsupported) const
+{
+    ignore_unused(descriptor);
+    return (IsSupportedForDataTypeRef(reasonIfUnsupported,
+                                      input.GetDataType(),
+                                      &TrueFunc<>,
+                                      &TrueFunc<>) &&
+            IsSupportedForDataTypeRef(reasonIfUnsupported,
+                                      output.GetDataType(),
+                                      &TrueFunc<>,
+                                      &TrueFunc<>));
+}
+
 bool RefLayerSupport::IsConstantSupported(const TensorInfo& output,
                                           Optional<std::string&> reasonIfUnsupported) const
 {
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index b161f5c..2e86ece 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -31,6 +31,11 @@
                                        const BatchNormalizationDescriptor& descriptor,
                                        Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsBatchToSpaceNdSupported(const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const BatchToSpaceNdDescriptor& descriptor,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsConstantSupported(const TensorInfo& output,
                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index a238d5f..afffd65 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -270,5 +270,10 @@
     return MakeWorkload<RefPadFloat32Workload, RefPadUint8Workload>(descriptor, info);
 }
 
+std::unique_ptr<IWorkload> RefWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
+                                                                    const WorkloadInfo& info) const
+{
+    return MakeWorkload<RefBatchToSpaceNdFloat32Workload, RefBatchToSpaceNdUint8Workload>(descriptor, info);
+}
 
 } // namespace armnn
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index e9b298d..91bba84 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -143,6 +143,9 @@
 
     virtual std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor,
                                                  const WorkloadInfo& info) const override;
+
+    virtual std::unique_ptr<IWorkload> CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
+                                                            const WorkloadInfo& info) const override;
 private:
 
     template <typename F32Workload, typename U8Workload, typename QueueDescriptorType>
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index cc8c24f..7d56144 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -13,6 +13,7 @@
         RefWorkloadFactory.cpp \
         workloads/Activation.cpp \
         workloads/ArithmeticFunction.cpp \
+        workloads/BatchToSpaceNd.cpp \
         workloads/Broadcast.cpp \
         workloads/ConvImpl.cpp \
         workloads/FullyConnected.cpp \
@@ -25,6 +26,8 @@
         workloads/RefBaseConstantWorkload.cpp \
         workloads/RefBatchNormalizationFloat32Workload.cpp \
         workloads/RefBatchNormalizationUint8Workload.cpp \
+        workloads/RefBatchToSpaceNdFloat32Workload.cpp \
+        workloads/RefBatchToSpaceNdUint8Workload.cpp \
         workloads/RefConstantFloat32Workload.cpp \
         workloads/RefConstantUint8Workload.cpp \
         workloads/RefConvertFp16ToFp32Workload.cpp \
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index df0e378..703ec58 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -376,4 +376,10 @@
 ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNHWCUint8, SpaceToBatchNdMultiBlockNHWCUint8Test)
 ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNHWCUint8, SpaceToBatchNdPaddingNHWCUint8Test)
 
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcFloat321, BatchToSpaceNdNhwcFloat32Test1)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcFloat322, BatchToSpaceNdNhwcFloat32Test2)
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNhwcFloat323, BatchToSpaceNdNhwcFloat32Test3)
+
+ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwFloat321, BatchToSpaceNdNchwFloat32Test1)
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/workloads/BatchToSpaceNd.cpp b/src/backends/reference/workloads/BatchToSpaceNd.cpp
new file mode 100644
index 0000000..bedf841
--- /dev/null
+++ b/src/backends/reference/workloads/BatchToSpaceNd.cpp
@@ -0,0 +1,100 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BatchToSpaceNd.hpp"
+
+#include "RefWorkloadUtils.hpp"
+
+#include <armnn/Types.hpp>
+
+#include <boost/assert.hpp>
+
+namespace armnn
+{
+
+inline unsigned int Offset(const TensorShape& shape, unsigned int batch, unsigned int height, unsigned int width,
+        unsigned int channels, const DataLayoutIndexed& dataLayout)
+{
+    if (dataLayout.GetDataLayout() == DataLayout::NHWC)
+    {
+        return ((batch * shape[dataLayout.GetHeightIndex()] + height) * shape[dataLayout.GetWidthIndex()] + width) *
+               shape[dataLayout.GetChannelsIndex()] + channels;
+    }
+    else
+    {
+        return ((batch * shape[dataLayout.GetChannelsIndex()] + channels) *
+               shape[dataLayout.GetHeightIndex()] + height) *
+               shape[dataLayout.GetWidthIndex()] + width;
+    }
+}
+
+void BatchToSpaceNd(const DataLayoutIndexed& dataLayout,
+                    const TensorInfo& inputTensorInfo,
+                    const TensorInfo& outputTensorInfo,
+                    const std::vector<unsigned int>& blockShape,
+                    const std::vector<std::vector<unsigned int>>& cropsData,
+                    const float* inputData,
+                    float* outputData)
+{
+    TensorShape inputShape = inputTensorInfo.GetShape();
+    unsigned int inputNumDims = inputShape.GetNumDimensions();
+    if (inputNumDims != 4)
+    {
+        throw armnn::InvalidArgumentException("Expected Input with 4 Dimensions");
+    }
+
+    TensorShape outputShape = outputTensorInfo.GetShape();
+    unsigned int outputNumDims = outputShape.GetNumDimensions();
+    if (outputNumDims != 4)
+    {
+        throw armnn::InvalidArgumentException("Expected Output with 4 Dimensions");
+    }
+
+    const unsigned int inputBatchSize = inputShape[0];
+    const unsigned int channels = inputShape[dataLayout.GetChannelsIndex()];
+
+    const unsigned int outputBatchSize = outputShape[0];
+    const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
+    const unsigned int outputWidth = outputShape[dataLayout.GetWidthIndex()];
+
+    const unsigned int blockShapeHeight = blockShape[0];
+    const unsigned int blockShapeWidth = blockShape[1];
+
+    const unsigned int cropsTop = cropsData[0][0];
+    const unsigned int cropsLeft = cropsData[1][0];
+
+    for (unsigned int inBatch = 0; inBatch < inputBatchSize; ++inBatch)
+    {
+        const unsigned int outBatch = inBatch % outputBatchSize;
+        const unsigned int spatialOffset = inBatch / outputBatchSize;
+
+        for (unsigned int inH = 0; inH < inputTensorInfo.GetShape()[dataLayout.GetHeightIndex()]; ++inH) {
+            const unsigned int outH = inH * blockShapeHeight + spatialOffset / blockShapeWidth - cropsTop;
+
+            if (outH >= outputHeight)
+            {
+                continue;
+            }
+
+            for (unsigned int inW = 0; inW < inputTensorInfo.GetShape()[dataLayout.GetWidthIndex()]; ++inW) {
+                const unsigned int outW = inW * blockShapeWidth + spatialOffset % blockShapeWidth - cropsLeft;
+
+                if (outW >= outputWidth)
+                {
+                    continue;
+                }
+
+                for (unsigned int c = 0; c < channels; c++)
+                {
+                    unsigned int outOffset = Offset(outputShape, outBatch, outH, outW, c, dataLayout);
+                    unsigned int inOffset = Offset(inputShape, inBatch, inH, inW, c, dataLayout);
+                    outputData[outOffset] = inputData[inOffset];
+                }
+            }
+        }
+    }
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/BatchToSpaceNd.hpp b/src/backends/reference/workloads/BatchToSpaceNd.hpp
new file mode 100644
index 0000000..7923cea
--- /dev/null
+++ b/src/backends/reference/workloads/BatchToSpaceNd.hpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+#include <armnn/Types.hpp>
+
+namespace armnn
+{
+
+void BatchToSpaceNd(const DataLayoutIndexed& dataLayout,
+                    const TensorInfo& inputTensorInfo,
+                    const TensorInfo& outputTensorInfo,
+                    const std::vector<unsigned int>& blockShape,
+                    const std::vector<std::vector<unsigned int>>& cropsData,
+                    const float* inputData,
+                    float* outputData);
+} // namespace armnn
\ No newline at end of file
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 4cef2d0..1c38509 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -9,6 +9,8 @@
     ArithmeticFunction.cpp
     ArithmeticFunction.hpp
     BatchNormImpl.hpp
+    BatchToSpaceNd.cpp
+    BatchToSpaceNd.hpp
     Broadcast.cpp
     Broadcast.hpp
     ConvImpl.cpp
@@ -32,6 +34,10 @@
     RefBatchNormalizationFloat32Workload.hpp
     RefBatchNormalizationUint8Workload.cpp
     RefBatchNormalizationUint8Workload.hpp
+    RefBatchToSpaceNdFloat32Workload.cpp
+    RefBatchToSpaceNdFloat32Workload.hpp
+    RefBatchToSpaceNdUint8Workload.cpp
+    RefBatchToSpaceNdUint8Workload.hpp
     RefConstantFloat32Workload.cpp
     RefConstantFloat32Workload.hpp
     RefConstantUint8Workload.cpp
diff --git a/src/backends/reference/workloads/RefBatchToSpaceNdFloat32Workload.cpp b/src/backends/reference/workloads/RefBatchToSpaceNdFloat32Workload.cpp
new file mode 100644
index 0000000..bf246c2
--- /dev/null
+++ b/src/backends/reference/workloads/RefBatchToSpaceNdFloat32Workload.cpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BatchToSpaceNd.hpp"
+#include "Profiling.hpp"
+#include "RefBatchToSpaceNdFloat32Workload.hpp"
+#include "RefWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+void RefBatchToSpaceNdFloat32Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchToSpaceNdFloat32Workload_Execute");
+
+    const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+    const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+    const float* inputData = GetInputTensorDataFloat(0, m_Data);
+    float* outputData = GetOutputTensorDataFloat(0, m_Data);
+
+    BatchToSpaceNd(m_Data.m_Parameters.m_DataLayout, inputInfo, outputInfo, m_Data.m_Parameters.m_BlockShape,
+                   m_Data.m_Parameters.m_Crops, inputData, outputData);
+}
+
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/reference/workloads/RefBatchToSpaceNdFloat32Workload.hpp b/src/backends/reference/workloads/RefBatchToSpaceNdFloat32Workload.hpp
new file mode 100644
index 0000000..4977772
--- /dev/null
+++ b/src/backends/reference/workloads/RefBatchToSpaceNdFloat32Workload.hpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+
+namespace armnn {
+
+class RefBatchToSpaceNdFloat32Workload : public Float32Workload<BatchToSpaceNdQueueDescriptor>
+{
+
+public:
+    using Float32Workload<BatchToSpaceNdQueueDescriptor>::Float32Workload;
+
+    virtual void Execute() const override;
+};
+
+} // namespace armnn
\ No newline at end of file
diff --git a/src/backends/reference/workloads/RefBatchToSpaceNdUint8Workload.cpp b/src/backends/reference/workloads/RefBatchToSpaceNdUint8Workload.cpp
new file mode 100644
index 0000000..a66bcd4
--- /dev/null
+++ b/src/backends/reference/workloads/RefBatchToSpaceNdUint8Workload.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BatchToSpaceNd.hpp"
+#include "Profiling.hpp"
+#include "RefBatchToSpaceNdUint8Workload.hpp"
+#include "RefWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+void RefBatchToSpaceNdUint8Workload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefBatchToSpaceNdUint8Workload_Execute");
+
+    const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+    const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+    auto dequantizedInputData = Dequantize(GetInputTensorDataU8(0, m_Data), inputInfo);
+    float* outputData = GetOutputTensorDataFloat(0, m_Data);
+
+    std::vector<float> results(outputInfo.GetNumElements());
+    BatchToSpaceNd(m_Data.m_Parameters.m_DataLayout, inputInfo, outputInfo, m_Data.m_Parameters.m_BlockShape,
+                   m_Data.m_Parameters.m_Crops, dequantizedInputData.data(), outputData);
+
+    Quantize(GetOutputTensorDataU8(0, m_Data), results.data(), outputInfo);
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/reference/workloads/RefBatchToSpaceNdUint8Workload.hpp b/src/backends/reference/workloads/RefBatchToSpaceNdUint8Workload.hpp
new file mode 100644
index 0000000..1f221c2
--- /dev/null
+++ b/src/backends/reference/workloads/RefBatchToSpaceNdUint8Workload.hpp
@@ -0,0 +1,23 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <backendsCommon/WorkloadData.hpp>
+
+namespace armnn
+{
+
+class RefBatchToSpaceNdUint8Workload : public Uint8Workload<BatchToSpaceNdQueueDescriptor>
+{
+
+public:
+    using Uint8Workload<BatchToSpaceNdQueueDescriptor>::Uint8Workload;
+
+    virtual void Execute() const override;
+};
+
+} // namespace armnn
\ No newline at end of file
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 03907a6..5ea7fe4 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -54,4 +54,6 @@
 #include "RefConvertFp32ToFp16Workload.hpp"
 #include "RefMeanUint8Workload.hpp"
 #include "RefMeanFloat32Workload.hpp"
-#include "RefPadWorkload.hpp"
\ No newline at end of file
+#include "RefPadWorkload.hpp"
+#include "RefBatchToSpaceNdUint8Workload.hpp"
+#include "RefBatchToSpaceNdFloat32Workload.hpp"