IVGCVSW-3231 Add reference workload support for SpaceToDepth

* Added reference workload for SpaceToDepth
* Added unit tests for float32 & uint8
* Minor sort refactoring to RefWorkloads.hpp to alphabetical order

Change-Id: I2e01f8101650e2aae102a8a32bc0064f067141ab
Signed-off-by: Keith Davis <keith.davis@arm.com>
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index b7317af..adba86c 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -1123,6 +1123,54 @@
                       "SpaceToBatchNdQueueDescriptor");
 }
 
+void SpaceToDepthQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+{
+    ValidateNumInputs(workloadInfo, "SpaceToDepthQueueDescriptor", 1);
+    ValidateNumOutputs(workloadInfo, "SpaceToDepthQueueDescriptor", 1);
+
+    ValidateTensorNumDimensions(workloadInfo.m_InputTensorInfos[0],
+        "SpaceToDepthQueueDescriptor", 4, "input");
+    ValidateTensorNumDimensions(workloadInfo.m_OutputTensorInfos[0],
+        "SpaceToDepthQueueDescriptor", 4, "output");
+
+    DataLayoutIndexed dimensionIndices(m_Parameters.m_DataLayout);
+
+    std::vector<DataType> supportedTypes =
+    {
+        DataType::Float32,
+        DataType::Float16,
+        DataType::QuantisedAsymm8
+    };
+
+    ValidateDataTypes(workloadInfo.m_InputTensorInfos[0],
+        supportedTypes,
+        "SpaceToDepthQueueDescriptor");
+    ValidateDataTypes(workloadInfo.m_OutputTensorInfos[0],
+        supportedTypes,
+        "SpaceToDepthQueueDescriptor");
+
+    const TensorShape inputShape = workloadInfo.m_InputTensorInfos[0].GetShape();
+
+    unsigned int numInputElements = inputShape[0]
+        * inputShape[dimensionIndices.GetWidthIndex()]
+        * inputShape[dimensionIndices.GetHeightIndex()]
+        * inputShape[dimensionIndices.GetChannelsIndex()];
+
+    if (workloadInfo.m_OutputTensorInfos[0].GetNumElements() != numInputElements)
+    {
+        throw InvalidArgumentException("SpaceToDepthQueueDescriptor: Input tensor has " +
+            to_string(numInputElements) + " but output tensor has " +
+            to_string(workloadInfo.m_OutputTensorInfos[0].GetNumElements()) + " elements.");
+    }
+
+    if (inputShape[dimensionIndices.GetHeightIndex()] % m_Parameters.m_BlockSize != 0 ||
+        inputShape[dimensionIndices.GetWidthIndex()]  % m_Parameters.m_BlockSize != 0)
+    {
+        throw InvalidArgumentException(
+            "Input shape must be divisible by block size in all spatial dimensions");
+    }
+}
+
 void FloorQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
 {
     const std::string floorQueueDescString = "FloorQueueDescriptor";
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 7512c89..7819cfe 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -44,6 +44,7 @@
     QuantizeTestImpl.hpp
     RuntimeTestImpl.hpp
     SoftmaxTestImpl.hpp
+    SpaceToDepthTestImpl.hpp
     SplitterEndToEndTestImpl.hpp
     SplitterTestImpl.hpp
     StridedSliceTestImpl.hpp
diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp
index 7f0c933..45791e5 100644
--- a/src/backends/backendsCommon/test/LayerTests.cpp
+++ b/src/backends/backendsCommon/test/LayerTests.cpp
@@ -33,6 +33,7 @@
 #include "FullyConnectedTestImpl.hpp"
 #include "GatherTestImpl.hpp"
 #include "SpaceToBatchNdTestImpl.hpp"
+#include "SpaceToDepthTestImpl.hpp"
 #include "SplitterTestImpl.hpp"
 #include "SoftmaxTestImpl.hpp"
 #include "StridedSliceTestImpl.hpp"
@@ -8969,6 +8970,45 @@
     return SpaceToBatchNdPaddingNHWCTest<armnn::DataType::QuantisedSymm16>(workloadFactory, memoryManager);
 }
 
+
+LayerTestResult<uint8_t, 4> SpaceToDepthNHWCAsymmQ8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToDepthSimpleTest<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory,
+        memoryManager);
+}
+
+LayerTestResult<uint8_t, 4> SpaceToDepthNCHWAsymmQ8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToDepthSimpleTest<armnn::DataType::QuantisedAsymm8>(
+        workloadFactory,
+        memoryManager,
+        armnn::DataLayout::NCHW);
+}
+
+LayerTestResult<float, 4> SpaceToDepthNHWCFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToDepthFloatTest<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager);
+}
+
+LayerTestResult<float, 4> SpaceToDepthNCHWFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager)
+{
+    return SpaceToDepthFloatTest<armnn::DataType::Float32>(
+        workloadFactory,
+        memoryManager,
+        armnn::DataLayout::NCHW);
+}
+
 namespace {
 
 template<typename T, std::size_t InputDim, std::size_t OutputDim>
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index bf0d063..e0b0273 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -1914,6 +1914,22 @@
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
 
+LayerTestResult<uint8_t, 4> SpaceToDepthNCHWAsymmQ8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<uint8_t, 4> SpaceToDepthNHWCAsymmQ8Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SpaceToDepthNHWCFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
+LayerTestResult<float, 4> SpaceToDepthNCHWFloat32Test(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
+
 LayerTestResult<uint8_t, 4> QuantizeSimpleUint8Test(
     armnn::IWorkloadFactory& workloadFactory,
     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager);
diff --git a/src/backends/backendsCommon/test/SpaceToDepthTestImpl.hpp b/src/backends/backendsCommon/test/SpaceToDepthTestImpl.hpp
new file mode 100644
index 0000000..99926cd
--- /dev/null
+++ b/src/backends/backendsCommon/test/SpaceToDepthTestImpl.hpp
@@ -0,0 +1,147 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "WorkloadTestUtils.hpp"
+
+#include <armnn/ArmNN.hpp>
+#include <armnn/Tensor.hpp>
+#include <armnn/TypesUtils.hpp>
+
+#include <backendsCommon/CpuTensorHandle.hpp>
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+template<typename T>
+LayerTestResult<T, 4> SpaceToDepthTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::TensorInfo& inputTensorInfo,
+    armnn::TensorInfo& outputTensorInfo,
+    std::vector<float>& inputData,
+    std::vector<float>& outputExpectedData,
+    armnn::SpaceToDepthQueueDescriptor descriptor,
+    const float qScale = 1.0f,
+    const int32_t qOffset = 0)
+{
+    const armnn::PermutationVector NHWCToNCHW = {0, 2, 3, 1};
+
+    if (descriptor.m_Parameters.m_DataLayout == armnn::DataLayout::NCHW)
+    {
+        inputTensorInfo = armnnUtils::Permuted(inputTensorInfo, NHWCToNCHW);
+        outputTensorInfo = armnnUtils::Permuted(outputTensorInfo, NHWCToNCHW);
+
+        std::vector<float> inputTmp(inputData.size());
+        armnnUtils::Permute(inputTensorInfo.GetShape(), NHWCToNCHW,
+            inputData.data(), inputTmp.data(), sizeof(float));
+        inputData = inputTmp;
+
+        std::vector<float> outputTmp(outputExpectedData.size());
+        armnnUtils::Permute(outputTensorInfo.GetShape(), NHWCToNCHW,
+            outputExpectedData.data(), outputTmp.data(), sizeof(float));
+        outputExpectedData = outputTmp;
+    }
+
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputTensorInfo.SetQuantizationScale(qScale);
+        inputTensorInfo.SetQuantizationOffset(qOffset);
+        outputTensorInfo.SetQuantizationScale(qScale);
+        outputTensorInfo.SetQuantizationOffset(qOffset);
+    }
+
+    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputTensorInfo, QuantizedVector<T>(qScale, qOffset, inputData));
+
+    LayerTestResult<T, 4> ret(outputTensorInfo);
+    ret.outputExpected = MakeTensor<T, 4>(outputTensorInfo, QuantizedVector<T>(qScale, qOffset, outputExpectedData));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle = workloadFactory.CreateTensorHandle(inputTensorInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputTensorInfo);
+
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateSpaceToDepth(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), &input[0][0][0][0]);
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(&ret.output[0][0][0][0], outputHandle.get());
+
+    return ret;
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> SpaceToDepthSimpleTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout = armnn::DataLayout::NHWC)
+{
+    unsigned int inputShape[] = {1, 2, 2, 1};
+    unsigned int outputShape[] = {1, 1, 1, 4};
+
+    std::vector<float> input = std::vector<float>(
+    {
+        1.0f, 2.0f, 3.0f, 4.0f
+    });
+
+    std::vector<float> outputExpected = std::vector<float>(
+    {
+        1.0f, 2.0f, 3.0f, 4.0f
+    });
+
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    armnn::SpaceToDepthQueueDescriptor desc;
+    desc.m_Parameters.m_DataLayout = dataLayout;
+    desc.m_Parameters.m_BlockSize = 2;
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
+
+    return SpaceToDepthTestImpl<T>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected, desc);
+}
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> SpaceToDepthFloatTest(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout = armnn::DataLayout::NHWC)
+{
+    unsigned int inputShape[] = {1, 2, 2, 2};
+    unsigned int outputShape[] = {1, 1, 1, 8};
+
+    std::vector<float> input = std::vector<float>(
+    {
+        1.4f, 2.3f, 3.2f, 4.1f, 5.4f, 6.3f, 7.2f, 8.1f
+    });
+
+    std::vector<float> outputExpected = std::vector<float>(
+    {
+        1.4f, 2.3f, 3.2f, 4.1f, 5.4f, 6.3f, 7.2f, 8.1f
+    });
+
+    armnn::TensorInfo inputTensorInfo;
+    armnn::TensorInfo outputTensorInfo;
+
+    armnn::SpaceToDepthQueueDescriptor desc;
+    desc.m_Parameters.m_DataLayout = dataLayout;
+    desc.m_Parameters.m_BlockSize = 2;
+
+    inputTensorInfo = armnn::TensorInfo(4, inputShape, ArmnnType);
+    outputTensorInfo = armnn::TensorInfo(4, outputShape, ArmnnType);
+
+    return SpaceToDepthTestImpl<T>(
+        workloadFactory, memoryManager, inputTensorInfo, outputTensorInfo, input, outputExpected, desc);
+}
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index 077aa1c..a180781 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -1282,6 +1282,33 @@
     return supported;
 }
 
+bool RefLayerSupport::IsSpaceToDepthSupported(const TensorInfo& input,
+                             const TensorInfo& output,
+                             const SpaceToDepthDescriptor& descriptor,
+                             Optional<std::string&> reasonIfUnsupported) const
+{
+
+    ignore_unused(descriptor);
+    bool supported = true;
+
+    std::array<DataType,2> supportedTypes =
+    {
+        DataType::Float32,
+        DataType::QuantisedAsymm8,
+    };
+
+    supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported,
+        "Reference SpaceToDepth: input type not supported");
+
+    supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
+        "Reference SpaceToDepth: output type not supported");
+
+    supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported,
+        "Reference SpaceToDepth: input and output types are mismatched");
+
+    return supported;
+}
+
 bool RefLayerSupport::IsSplitterSupported(const TensorInfo& input,
                                           const ViewsDescriptor& descriptor,
                                           Optional<std::string&> reasonIfUnsupported) const
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index 041701d..5d24149 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -236,6 +236,12 @@
                                    const SpaceToBatchNdDescriptor& descriptor,
                                    Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsSpaceToDepthSupported(const TensorInfo& input,
+                                 const TensorInfo& output,
+                                 const SpaceToDepthDescriptor& descriptor,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional())
+                                 const override;
+
     ARMNN_DEPRECATED_MSG("Use IsSplitterSupported with outputs instead")
     bool IsSplitterSupported(const TensorInfo& input,
                              const ViewsDescriptor& descriptor,
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 29b2c52..1f241f0 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -298,6 +298,16 @@
     return std::make_unique<RefSpaceToBatchNdWorkload>(descriptor, info);
 }
 
+std::unique_ptr<IWorkload> RefWorkloadFactory::CreateSpaceToDepth(const armnn::SpaceToDepthQueueDescriptor& descriptor,
+    const armnn::WorkloadInfo& info) const
+{
+    if (IsFloat16(info))
+    {
+        return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+    }
+    return std::make_unique<RefSpaceToDepthWorkload>(descriptor, info);
+}
+
 std::unique_ptr<IWorkload> RefWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
                                                           const WorkloadInfo& info) const
 {
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index 333a9ca..ed513e1 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -127,6 +127,9 @@
     std::unique_ptr<IWorkload> CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
                                                     const WorkloadInfo& info) const override;
 
+    std::unique_ptr<IWorkload> CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor,
+                                                  const WorkloadInfo& info) const override;
+
     std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor,
                                            const WorkloadInfo& info) const override;
 
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index a430f4f..6b7e895 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -58,11 +58,13 @@
         workloads/RefRsqrtWorkload.cpp \
         workloads/RefSoftmaxWorkload.cpp \
         workloads/RefSpaceToBatchNdWorkload.cpp \
+        workloads/RefSpaceToDepthWorkload.cpp \
         workloads/RefStridedSliceWorkload.cpp \
         workloads/RefSplitterWorkload.cpp \
         workloads/ResizeBilinear.cpp \
         workloads/Rsqrt.cpp \
         workloads/SpaceToBatchNd.cpp \
+        workloads/SpaceToDepth.cpp \
         workloads/StridedSlice.cpp \
         workloads/StringMapping.cpp \
         workloads/Softmax.cpp \
diff --git a/src/backends/reference/test/RefLayerSupportTests.cpp b/src/backends/reference/test/RefLayerSupportTests.cpp
index 0d99b3e..e6a2fb3 100644
--- a/src/backends/reference/test/RefLayerSupportTests.cpp
+++ b/src/backends/reference/test/RefLayerSupportTests.cpp
@@ -48,7 +48,6 @@
     BOOST_CHECK(supportChecker.IsAdditionSupported(in0, in1, out, reasonNotSupported));
 }
 
-
 BOOST_AUTO_TEST_CASE(IsLayerSupportedFloat16Reference)
 {
     armnn::RefWorkloadFactory factory;
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index b540d18..ccb1dc2 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -769,6 +769,12 @@
 ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwUint3, BatchToSpaceNdNchwUintTest3)
 ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwUint4, BatchToSpaceNdNchwUintTest4)
 
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHWAsymmQ8, SpaceToDepthNCHWAsymmQ8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWCAsymmQ8, SpaceToDepthNHWCAsymmQ8Test)
+
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNHWCFloat32, SpaceToDepthNHWCFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToDepthNCHWFloat32, SpaceToDepthNCHWFloat32Test)
+
 // Strided Slice
 ARMNN_AUTO_TEST_CASE(StridedSlice4DFloat32, StridedSlice4DFloat32Test)
 ARMNN_AUTO_TEST_CASE(StridedSlice4DReverseFloat32, StridedSlice4DReverseFloat32Test)
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index db0daa0..ca35e27 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -100,6 +100,8 @@
     RefSoftmaxWorkload.hpp
     RefSpaceToBatchNdWorkload.cpp
     RefSpaceToBatchNdWorkload.hpp
+    RefSpaceToDepthWorkload.cpp
+    RefSpaceToDepthWorkload.hpp
     RefSplitterWorkload.cpp
     RefSplitterWorkload.hpp
     RefStridedSliceWorkload.cpp
@@ -114,6 +116,8 @@
     Softmax.hpp
     SpaceToBatchNd.hpp
     SpaceToBatchNd.cpp
+    SpaceToDepth.hpp
+    SpaceToDepth.cpp
     Splitter.hpp
     Splitter.cpp
     StridedSlice.hpp
diff --git a/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp b/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp
new file mode 100644
index 0000000..1b12272
--- /dev/null
+++ b/src/backends/reference/workloads/RefSpaceToDepthWorkload.cpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefSpaceToDepthWorkload.hpp"
+#include "SpaceToDepth.hpp"
+
+#include "RefWorkloadUtils.hpp"
+#include <ResolveType.hpp>
+
+namespace armnn
+{
+
+void RefSpaceToDepthWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefSpaceToDepthWorkload_Execute");
+
+    const TensorInfo& inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+    std::unique_ptr<Decoder<float>> decoder = MakeDecoder<float>(inputInfo, m_Data.m_Inputs[0]->Map());
+
+    const TensorInfo& outputInfo = GetTensorInfo(m_Data.m_Outputs[0]);
+    std::unique_ptr<Encoder<float>> encoder = MakeEncoder<float>(outputInfo, m_Data.m_Outputs[0]->Map());
+
+    SpaceToDepth(inputInfo, outputInfo, m_Data.m_Parameters, *decoder, *encoder);
+}
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp b/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp
new file mode 100644
index 0000000..82d8528
--- /dev/null
+++ b/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "backendsCommon/Workload.hpp"
+
+#include <armnn/TypesUtils.hpp>
+
+namespace armnn
+{
+
+class RefSpaceToDepthWorkload : public BaseWorkload<SpaceToDepthQueueDescriptor>
+{
+public:
+    using BaseWorkload<SpaceToDepthQueueDescriptor>::BaseWorkload;
+    virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index 41b16fa..056127a 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -5,50 +5,51 @@
 
 #pragma once
 
-#include "ElementwiseFunction.hpp"
-#include "RefElementwiseWorkload.hpp"
+#include "Activation.hpp"
+#include "BatchNormImpl.hpp"
 #include "ConvImpl.hpp"
-#include "RefConstantWorkload.hpp"
-#include "RefConvolution2dWorkload.hpp"
-#include "RefSplitterWorkload.hpp"
-#include "RefResizeBilinearWorkload.hpp"
-#include "RefL2NormalizationWorkload.hpp"
-#include "RefActivationWorkload.hpp"
-#include "RefPooling2dWorkload.hpp"
-#include "RefWorkloadUtils.hpp"
-#include "RefConcatWorkload.hpp"
-#include "RefFullyConnectedWorkload.hpp"
-#include "RefGatherWorkload.hpp"
-#include "Softmax.hpp"
-#include "TensorBufferArrayView.hpp"
-#include "RefBatchNormalizationWorkload.hpp"
-#include "Splitter.hpp"
-#include "RefDepthwiseConvolution2dWorkload.hpp"
+#include "Concatenate.hpp"
+#include "ElementwiseFunction.hpp"
 #include "FullyConnected.hpp"
 #include "Gather.hpp"
-#include "RefFloorWorkload.hpp"
-#include "RefSoftmaxWorkload.hpp"
-#include "ResizeBilinear.hpp"
-#include "RefNormalizationWorkload.hpp"
-#include "RefDetectionPostProcessWorkload.hpp"
-#include "BatchNormImpl.hpp"
-#include "Activation.hpp"
-#include "Concatenate.hpp"
-#include "RefSpaceToBatchNdWorkload.hpp"
-#include "RefStridedSliceWorkload.hpp"
 #include "Pooling2d.hpp"
-#include "RefFakeQuantizationFloat32Workload.hpp"
-#include "RefPermuteWorkload.hpp"
-#include "RefLstmWorkload.hpp"
-#include "RefConvertFp16ToFp32Workload.hpp"
-#include "RefConvertFp32ToFp16Workload.hpp"
-#include "RefMeanWorkload.hpp"
-#include "RefPadWorkload.hpp"
+#include "RefActivationWorkload.hpp"
+#include "RefBatchNormalizationWorkload.hpp"
 #include "RefBatchToSpaceNdUint8Workload.hpp"
 #include "RefBatchToSpaceNdFloat32Workload.hpp"
+#include "RefConvolution2dWorkload.hpp"
+#include "RefConstantWorkload.hpp"
+#include "RefConcatWorkload.hpp"
+#include "RefConvertFp16ToFp32Workload.hpp"
+#include "RefConvertFp32ToFp16Workload.hpp"
+#include "RefDepthwiseConvolution2dWorkload.hpp"
+#include "RefDetectionPostProcessWorkload.hpp"
 #include "RefDebugWorkload.hpp"
-#include "RefRsqrtWorkload.hpp"
 #include "RefDequantizeWorkload.hpp"
-#include "RefQuantizeWorkload.hpp"
-#include "RefReshapeWorkload.hpp"
+#include "RefElementwiseWorkload.hpp"
+#include "RefFullyConnectedWorkload.hpp"
+#include "RefFloorWorkload.hpp"
+#include "RefFakeQuantizationFloat32Workload.hpp"
+#include "RefGatherWorkload.hpp"
+#include "RefL2NormalizationWorkload.hpp"
+#include "RefLstmWorkload.hpp"
+#include "RefMeanWorkload.hpp"
+#include "RefNormalizationWorkload.hpp"
+#include "RefPooling2dWorkload.hpp"
+#include "RefPermuteWorkload.hpp"
+#include "RefPadWorkload.hpp"
 #include "RefPreluWorkload.hpp"
+#include "RefQuantizeWorkload.hpp"
+#include "RefResizeBilinearWorkload.hpp"
+#include "RefRsqrtWorkload.hpp"
+#include "RefReshapeWorkload.hpp"
+#include "RefSplitterWorkload.hpp"
+#include "RefSoftmaxWorkload.hpp"
+#include "RefSpaceToBatchNdWorkload.hpp"
+#include "RefStridedSliceWorkload.hpp"
+#include "RefSpaceToDepthWorkload.hpp"
+#include "RefWorkloadUtils.hpp"
+#include "ResizeBilinear.hpp"
+#include "Softmax.hpp"
+#include "Splitter.hpp"
+#include "TensorBufferArrayView.hpp"
\ No newline at end of file
diff --git a/src/backends/reference/workloads/SpaceToDepth.cpp b/src/backends/reference/workloads/SpaceToDepth.cpp
new file mode 100644
index 0000000..4a4f418
--- /dev/null
+++ b/src/backends/reference/workloads/SpaceToDepth.cpp
@@ -0,0 +1,107 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SpaceToDepth.hpp"
+
+#include <DataLayoutIndexed.hpp>
+
+using namespace armnnUtils;
+
+namespace {
+    unsigned int GetOffset(const armnn::TensorShape& shape,
+        unsigned int c,
+        unsigned int h,
+        unsigned int w,
+        unsigned int b,
+        const DataLayoutIndexed& dataLayout)
+    {
+        if (dataLayout.GetDataLayout() == armnn::DataLayout::NHWC)
+        {
+            return ((b * shape[dataLayout.GetHeightIndex()] + h) * shape[dataLayout.GetWidthIndex()] + w) *
+                shape[dataLayout.GetChannelsIndex()] + c;
+        }
+        else
+        {
+            return ((b * shape[dataLayout.GetChannelsIndex()] + c) * shape[dataLayout.GetHeightIndex()] + h) *
+                shape[dataLayout.GetWidthIndex()] + w;
+        }
+    }
+}
+
+namespace armnn
+{
+
+void SpaceToDepth(const TensorInfo& inputInfo,
+                  const TensorInfo& outputInfo,
+                  const SpaceToDepthDescriptor& params,
+                  Decoder<float>& inputData,
+                  Encoder<float>& outputData)
+{
+    DataLayoutIndexed dataLayout = params.m_DataLayout;
+
+    const TensorShape& inputShape = inputInfo.GetShape();
+    const TensorShape& outputShape = outputInfo.GetShape();
+
+    const unsigned int inputBatchSize = inputShape[0];
+    const unsigned int inputChannels = inputShape[dataLayout.GetChannelsIndex()];
+
+    const unsigned int outputHeight = outputShape[dataLayout.GetHeightIndex()];
+    const unsigned int outputWidth = outputShape[dataLayout.GetWidthIndex()];
+    const unsigned int outputChannels = outputShape[dataLayout.GetChannelsIndex()];
+
+    const unsigned int blockSize = params.m_BlockSize;
+
+    if (blockSize == 0)
+    {
+        throw InvalidArgumentException(
+            "Input shape must be divisible by block size in all spatial dimensions: Block size is"
+            " equal to zero");
+    }
+
+    for (unsigned int outChannelIndex = 0; outChannelIndex < outputChannels; outChannelIndex++)
+    {
+        unsigned int inChannelIndex = outChannelIndex % inputChannels;
+
+        unsigned int shiftW = (outChannelIndex / inputChannels) % blockSize;
+        unsigned int shiftH = (outChannelIndex / inputChannels) / blockSize;
+
+        for (unsigned int outH = 0; outH < outputHeight; outH++)
+        {
+            for (unsigned int outW = 0; outW < outputWidth; outW++)
+            {
+                for (unsigned int inBatchIndex = 0; inBatchIndex < inputBatchSize; inBatchIndex++)
+                {
+                    unsigned int inOffset = GetOffset(inputShape,
+                        inChannelIndex,
+                        (outH * blockSize + shiftH),
+                        (outW * blockSize + shiftW),
+                        inBatchIndex,
+                        dataLayout);
+
+                    unsigned int outOffset = GetOffset(outputShape,
+                        outChannelIndex,
+                        outH,
+                        outW,
+                        inBatchIndex,
+                        dataLayout);
+
+                    outputData += outOffset;
+                    inputData += inOffset;
+                    outputData.Set(inputData.Get());
+                    inputData -= inOffset;
+                    outputData -= outOffset;
+                }
+            }
+        }
+    }
+}
+
+void SpaceToDepth(const TensorInfo& inputInfo,
+    const TensorInfo& outputInfo,
+    const SpaceToDepthDescriptor& params,
+    Decoder<float>& inputData,
+    Encoder<float>& outData);
+
+} //namespace armnn
diff --git a/src/backends/reference/workloads/SpaceToDepth.hpp b/src/backends/reference/workloads/SpaceToDepth.hpp
new file mode 100644
index 0000000..f855884
--- /dev/null
+++ b/src/backends/reference/workloads/SpaceToDepth.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "BaseIterator.hpp"
+#include "Decoders.hpp"
+#include "Encoders.hpp"
+
+#include <armnn/Descriptors.hpp>
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+void SpaceToDepth(const TensorInfo& inputInfo,
+                  const TensorInfo& outputInfo,
+                  const SpaceToDepthDescriptor& params,
+                  Decoder<float>& inputData,
+                  Encoder<float>& outputData);
+
+} //namespace armnn