IVGCVSW-3885 Add reference workload for DepthToSpace

Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Change-Id: Id937dc4425884ad1985dcdfaae8bf3fb64f0c766
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index abf924a..343af5a 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -45,6 +45,7 @@
     test/layerTests/ConvertFp16ToFp32TestImpl.cpp \
     test/layerTests/ConvertFp32ToFp16TestImpl.cpp \
     test/layerTests/DebugTestImpl.cpp \
+    test/layerTests/DepthToSpaceTestImpl.cpp \
     test/layerTests/DequantizeTestImpl.cpp \
     test/layerTests/DivisionTestImpl.cpp \
     test/layerTests/EqualTestImpl.cpp \
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 0cebf90..481d7d8 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -63,6 +63,8 @@
     layerTests/ConvertFp32ToFp16TestImpl.hpp
     layerTests/DebugTestImpl.cpp
     layerTests/DebugTestImpl.hpp
+    layerTests/DepthToSpaceTestImpl.cpp
+    layerTests/DepthToSpaceTestImpl.hpp
     layerTests/DequantizeTestImpl.cpp
     layerTests/DequantizeTestImpl.hpp
     layerTests/DetectionPostProcessTestImpl.hpp
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index 14ff266..84125b5 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -17,6 +17,7 @@
 #include <backendsCommon/test/layerTests/Conv2dTestImpl.hpp>
 #include <backendsCommon/test/layerTests/ConstantTestImpl.hpp>
 #include <backendsCommon/test/layerTests/DebugTestImpl.hpp>
+#include <backendsCommon/test/layerTests/DepthToSpaceTestImpl.hpp>
 #include <backendsCommon/test/layerTests/DequantizeTestImpl.hpp>
 #include <backendsCommon/test/layerTests/DetectionPostProcessTestImpl.hpp>
 #include <backendsCommon/test/layerTests/DivisionTestImpl.hpp>
diff --git a/src/backends/backendsCommon/test/layerTests/DepthToSpaceTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/DepthToSpaceTestImpl.cpp
new file mode 100644
index 0000000..9588f56
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/DepthToSpaceTestImpl.cpp
@@ -0,0 +1,342 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "DepthToSpaceTestImpl.hpp"
+
+#include <Permute.hpp>
+
+#include <armnn/ArmNN.hpp>
+
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+namespace
+{
+
+template<typename T>
+LayerTestResult<T, 4> DepthToSpaceTestImpl(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::TensorInfo& inputInfo,
+    armnn::TensorInfo& outputInfo,
+    std::vector<float>& inputData,
+    std::vector<float>& expectedOutputData,
+    armnn::DepthToSpaceQueueDescriptor descriptor,
+    const float qScale = 1.0f,
+    const int32_t qOffset = 0)
+{
+    const armnn::PermutationVector permVector{0, 2, 3, 1};
+
+    if (descriptor.m_Parameters.m_DataLayout == armnn::DataLayout::NCHW)
+    {
+        inputInfo  = armnnUtils::Permuted(inputInfo, permVector);
+        outputInfo = armnnUtils::Permuted(outputInfo, permVector);
+
+        constexpr size_t typeSize = sizeof(float);
+
+        std::vector<float> inputTmp(inputData.size());
+        armnnUtils::Permute(inputInfo.GetShape(), permVector, inputData.data(), inputTmp.data(), typeSize);
+        inputData = inputTmp;
+
+        std::vector<float> outputTmp(expectedOutputData.size());
+        armnnUtils::Permute(outputInfo.GetShape(), permVector, expectedOutputData.data(), outputTmp.data(), typeSize);
+        expectedOutputData = outputTmp;
+    }
+
+    if(armnn::IsQuantizedType<T>())
+    {
+        inputInfo.SetQuantizationScale(qScale);
+        inputInfo.SetQuantizationOffset(qOffset);
+        outputInfo.SetQuantizationScale(qScale);
+        outputInfo.SetQuantizationOffset(qOffset);
+    }
+
+    boost::multi_array<T, 4> input = MakeTensor<T, 4>(inputInfo, QuantizedVector<T>(qScale, qOffset, inputData));
+
+    LayerTestResult<T, 4> result(outputInfo);
+    result.outputExpected = MakeTensor<T, 4>(outputInfo, QuantizedVector<T>(qScale, qOffset, expectedOutputData));
+
+    std::unique_ptr<armnn::ITensorHandle> inputHandle  = workloadFactory.CreateTensorHandle(inputInfo);
+    std::unique_ptr<armnn::ITensorHandle> outputHandle = workloadFactory.CreateTensorHandle(outputInfo);
+
+    armnn::WorkloadInfo info;
+    AddInputToWorkload(descriptor, info, inputInfo, inputHandle.get());
+    AddOutputToWorkload(descriptor, info, outputInfo, outputHandle.get());
+
+    std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateDepthToSpace(descriptor, info);
+
+    inputHandle->Allocate();
+    outputHandle->Allocate();
+
+    CopyDataToITensorHandle(inputHandle.get(), input.origin());
+
+    workload->Execute();
+
+    CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
+    return result;
+}
+
+} // anonymous namespace
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 4> DepthToSpaceTest1(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout)
+{
+    unsigned int inputShape[]  = { 1, 1, 1, 8 };
+    unsigned int outputShape[] = { 1, 2, 2, 2 };
+
+    // in:
+    // [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]]
+    //
+    // out:
+    // [[[[1, 2, 3], [4, 5, 6]],
+    //   [[7, 8, 9], [10, 11, 12]]]]
+
+    std::vector<float> input          = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f };
+    std::vector<float> expectedOutput = input;
+
+    armnn::DepthToSpaceQueueDescriptor desc;
+    desc.m_Parameters.m_DataLayout = dataLayout;
+    desc.m_Parameters.m_BlockSize  = 2;
+
+    armnn::TensorInfo inputInfo(4, inputShape, ArmnnType);
+    armnn::TensorInfo outputInfo(4, outputShape, ArmnnType);
+
+    return DepthToSpaceTestImpl<T>(workloadFactory, memoryManager, inputInfo, outputInfo, input, expectedOutput, desc);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 4> DepthToSpaceTest2(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout)
+{
+    unsigned int inputShape[]  = { 1, 2, 2, 4 };
+    unsigned int outputShape[] = { 1, 4, 4, 1 };
+
+    // in:
+    // [[[[1, 2, 3, 4],
+    //   [5, 6, 7, 8]],
+    //  [[9, 10, 11, 12],
+    //   [13, 14, 15, 16]]]]
+    //
+    // out:
+    // [[[ [1],   [2],  [5],  [6]],
+    //  [ [3],   [4],  [7],  [8]],
+    //  [ [9],  [10], [13],  [14]],
+    //  [ [11], [12], [15],  [16]]]]
+
+    std::vector<float> input =
+    {
+        1.f,  2.f,  3.f,  4.f,
+
+        5.f,  6.f,  7.f,  8.f,
+
+        9.f, 10.f, 11.f, 12.f,
+
+        13.f, 14.f, 15.f, 16.f
+    };
+
+    std::vector<float> expectedOutput
+    {
+         1.f,   2.f,   5.f,   6.f,
+         3.f,   4.f,   7.f,   8.f,
+         9.f,  10.f,  13.f,  14.f,
+        11.f,  12.f,  15.f,  16.f
+    };
+
+    armnn::DepthToSpaceQueueDescriptor desc;
+    desc.m_Parameters.m_DataLayout = dataLayout;
+    desc.m_Parameters.m_BlockSize  = 2;
+
+    armnn::TensorInfo inputInfo(4, inputShape, ArmnnType);
+    armnn::TensorInfo outputInfo(4, outputShape, ArmnnType);
+
+    return DepthToSpaceTestImpl<T>(workloadFactory, memoryManager, inputInfo, outputInfo, input, expectedOutput, desc);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 4> DepthToSpaceTest3(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout)
+{
+    unsigned int inputShape[]  = { 2, 1, 1, 4 };
+    unsigned int outputShape[] = { 2, 2, 2, 1 };
+
+    std::vector<float> input =
+    {
+        1.f, 2.f, 3.f, 4.f, // batch 0
+        5.f, 6.f, 7.f, 8.f  // batch 1
+    };
+
+    std::vector<float> expectedOutput = input;
+
+    armnn::DepthToSpaceQueueDescriptor desc;
+    desc.m_Parameters.m_DataLayout = dataLayout;
+    desc.m_Parameters.m_BlockSize  = 2;
+
+    armnn::TensorInfo inputInfo(4, inputShape, ArmnnType);
+    armnn::TensorInfo outputInfo(4, outputShape, ArmnnType);
+
+    return DepthToSpaceTestImpl<T>(workloadFactory, memoryManager, inputInfo, outputInfo, input, expectedOutput, desc);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<T, 4> DepthToSpaceTest4(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout)
+{
+    unsigned int inputShape[]  = { 2, 2, 2, 4 };
+    unsigned int outputShape[] = { 2, 4, 4, 1 };
+
+    std::vector<float> input =
+    {
+        1.f,  2.f,  3.f,  4.f,
+
+        5.f,  6.f,  7.f,  8.f,
+
+        9.f, 10.f, 11.f, 12.f,
+
+        13.f, 14.f, 15.f, 16.f,
+
+
+        17.f, 18.f, 19.f, 20.f,
+
+        21.f, 22.f, 23.f, 24.f,
+
+        25.f, 26.f, 27.f, 28.f,
+
+        29.f, 30.f, 31.f, 32.f
+    };
+
+    std::vector<float> expectedOutput
+    {
+         1.f,   2.f,   5.f,   6.f,
+         3.f,   4.f,   7.f,   8.f,
+         9.f,  10.f,  13.f,  14.f,
+        11.f,  12.f,  15.f,  16.f,
+
+
+        17.f,  18.f,  21.f,  22.f,
+        19.f,  20.f,  23.f,  24.f,
+        25.f,  26.f,  29.f,  30.f,
+        27.f,  28.f,  31.f,  32.f
+    };
+
+    armnn::DepthToSpaceQueueDescriptor desc;
+    desc.m_Parameters.m_DataLayout = dataLayout;
+    desc.m_Parameters.m_BlockSize  = 2;
+
+    armnn::TensorInfo inputInfo(4, inputShape, ArmnnType);
+    armnn::TensorInfo outputInfo(4, outputShape, ArmnnType);
+
+    return DepthToSpaceTestImpl<T>(workloadFactory, memoryManager, inputInfo, outputInfo, input, expectedOutput, desc);
+}
+
+// Float32
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+DepthToSpaceTest1<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+DepthToSpaceTest2<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+DepthToSpaceTest3<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float32>, 4>
+DepthToSpaceTest4<armnn::DataType::Float32>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+// Float16
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 4>
+DepthToSpaceTest1<armnn::DataType::Float16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 4>
+DepthToSpaceTest2<armnn::DataType::Float16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 4>
+DepthToSpaceTest3<armnn::DataType::Float16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::Float16>, 4>
+DepthToSpaceTest4<armnn::DataType::Float16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+// QuantisedAsymm8
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
+DepthToSpaceTest1<armnn::DataType::QuantisedAsymm8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
+DepthToSpaceTest2<armnn::DataType::QuantisedAsymm8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
+DepthToSpaceTest3<armnn::DataType::QuantisedAsymm8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedAsymm8>, 4>
+DepthToSpaceTest4<armnn::DataType::QuantisedAsymm8>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+// QuantisedSymm16
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
+DepthToSpaceTest1<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
+DepthToSpaceTest2<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
+DepthToSpaceTest3<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template LayerTestResult<armnn::ResolveType<armnn::DataType::QuantisedSymm16>, 4>
+DepthToSpaceTest4<armnn::DataType::QuantisedSymm16>(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
diff --git a/src/backends/backendsCommon/test/layerTests/DepthToSpaceTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/DepthToSpaceTestImpl.hpp
new file mode 100644
index 0000000..4dbcd26
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/DepthToSpaceTestImpl.hpp
@@ -0,0 +1,36 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <backendsCommon/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> DepthToSpaceTest1(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> DepthToSpaceTest2(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> DepthToSpaceTest3(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<T, 4> DepthToSpaceTest4(
+    armnn::IWorkloadFactory& workloadFactory,
+    const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+    armnn::DataLayout dataLayout);
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index 14183a7..06da776 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -466,6 +466,34 @@
     return supported;
 }
 
+bool RefLayerSupport::IsDepthToSpaceSupported(const TensorInfo& input,
+                                              const TensorInfo& output,
+                                              const DepthToSpaceDescriptor& descriptor,
+                                              Optional<std::string&> reasonIfUnsupported) const
+{
+    ignore_unused(descriptor);
+    bool supported = true;
+
+    std::array<DataType,4> supportedTypes =
+    {
+        DataType::Float32,
+        DataType::Float16,
+        DataType::QuantisedAsymm8,
+        DataType::QuantisedSymm16
+    };
+
+    supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported,
+        "Reference DepthToSpace: input type not supported");
+
+    supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
+        "Reference DepthToSpace: output type not supported");
+
+    supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported,
+        "Reference DepthToSpace: input and output types are mismatched");
+
+    return supported;
+}
+
 bool RefLayerSupport::IsDepthwiseConvolutionSupported(const TensorInfo& input,
                                                       const TensorInfo& output,
                                                       const DepthwiseConvolution2dDescriptor& descriptor,
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index 9e8c914..cc9478d 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -72,6 +72,11 @@
                           const TensorInfo& output,
                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsDepthToSpaceSupported(const TensorInfo& input,
+                                 const TensorInfo& output,
+                                 const DepthToSpaceDescriptor& descriptor,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsDepthwiseConvolutionSupported(const TensorInfo& input,
                                          const TensorInfo& output,
                                          const DepthwiseConvolution2dDescriptor& descriptor,
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 480b7e2..254b221 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -193,6 +193,12 @@
     return std::make_unique<RefConvolution2dWorkload>(descriptor, info);
 }
 
+std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
+                                                                  const WorkloadInfo& info) const
+{
+    return std::make_unique<RefDepthToSpaceWorkload>(descriptor, info);
+}
+
 std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateDepthwiseConvolution2d(
     const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
 {
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index 033f817..e8e11e0 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -94,6 +94,9 @@
     std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
                                                    const WorkloadInfo& info) const override;
 
+    std::unique_ptr<IWorkload> CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
+                                                  const WorkloadInfo& info) const override;
+
     std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor& descriptor,
                                                             const WorkloadInfo& info) const override;
 
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index fd0df27..597fba8 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -29,6 +29,7 @@
         workloads/Broadcast.cpp \
         workloads/ConvImpl.cpp \
         workloads/Debug.cpp \
+        workloads/DepthToSpace.cpp \
         workloads/DetectionPostProcess.cpp \
         workloads/ElementwiseFunction.cpp \
         workloads/FullyConnected.cpp \
@@ -50,6 +51,7 @@
         workloads/RefConvertFp32ToFp16Workload.cpp \
         workloads/RefConvolution2dWorkload.cpp \
         workloads/RefDebugWorkload.cpp \
+        workloads/RefDepthToSpaceWorkload.cpp \
         workloads/RefDepthwiseConvolution2dWorkload.cpp \
         workloads/RefDequantizeWorkload.cpp \
         workloads/RefDetectionPostProcessWorkload.cpp \
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index a5164f0..901017a 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -1136,6 +1136,46 @@
 ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwQsymm16_6,  BatchToSpaceNdNchwTest6<DataType::QuantisedSymm16>)
 ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwQsymm16_7,  BatchToSpaceNdNchwTest7<DataType::QuantisedSymm16>)
 
+// DepthToSpace
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat32_1, DepthToSpaceTest1<DataType::Float32>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat32_2, DepthToSpaceTest2<DataType::Float32>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat32_3, DepthToSpaceTest3<DataType::Float32>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat32_4, DepthToSpaceTest4<DataType::Float32>, DataLayout::NCHW);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat16_1, DepthToSpaceTest1<DataType::Float16>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat16_2, DepthToSpaceTest2<DataType::Float16>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat16_3, DepthToSpaceTest3<DataType::Float16>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat16_4, DepthToSpaceTest4<DataType::Float16>, DataLayout::NCHW);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwUint8_1, DepthToSpaceTest1<DataType::QuantisedAsymm8>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwUint8_2, DepthToSpaceTest2<DataType::QuantisedAsymm8>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwUint8_3, DepthToSpaceTest3<DataType::QuantisedAsymm8>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwUint8_4, DepthToSpaceTest4<DataType::QuantisedAsymm8>, DataLayout::NCHW);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwInt16_1, DepthToSpaceTest1<DataType::QuantisedSymm16>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwInt16_2, DepthToSpaceTest2<DataType::QuantisedSymm16>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwInt16_3, DepthToSpaceTest3<DataType::QuantisedSymm16>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwInt16_4, DepthToSpaceTest4<DataType::QuantisedSymm16>, DataLayout::NCHW);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat32_1, DepthToSpaceTest1<DataType::Float32>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat32_2, DepthToSpaceTest2<DataType::Float32>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat32_3, DepthToSpaceTest3<DataType::Float32>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat32_4, DepthToSpaceTest4<DataType::Float32>, DataLayout::NHWC);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat16_1, DepthToSpaceTest1<DataType::Float16>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat16_2, DepthToSpaceTest2<DataType::Float16>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat16_3, DepthToSpaceTest3<DataType::Float16>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat16_4, DepthToSpaceTest4<DataType::Float16>, DataLayout::NHWC);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcUint8_1, DepthToSpaceTest1<DataType::QuantisedAsymm8>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcUint8_2, DepthToSpaceTest2<DataType::QuantisedAsymm8>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcUint8_3, DepthToSpaceTest3<DataType::QuantisedAsymm8>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcUint8_4, DepthToSpaceTest4<DataType::QuantisedAsymm8>, DataLayout::NHWC);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_1, DepthToSpaceTest1<DataType::QuantisedSymm16>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_2, DepthToSpaceTest2<DataType::QuantisedSymm16>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_3, DepthToSpaceTest3<DataType::QuantisedSymm16>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_4, DepthToSpaceTest4<DataType::QuantisedSymm16>, DataLayout::NHWC);
 
 // SpaceToDepth
 ARMNN_AUTO_TEST_CASE(SpaceToDepthNchwAsymmQ8, SpaceToDepthNchwAsymmQ8Test)
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 83444ed..c2eb025 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -24,6 +24,8 @@
     Debug.cpp
     Debug.hpp
     Decoders.hpp
+    DepthToSpace.cpp
+    DepthToSpace.hpp
     DetectionPostProcess.cpp
     DetectionPostProcess.hpp
     ElementwiseFunction.cpp
@@ -71,6 +73,8 @@
     RefElementwiseWorkload.hpp
     RefDebugWorkload.cpp
     RefDebugWorkload.hpp
+    RefDepthToSpaceWorkload.cpp
+    RefDepthToSpaceWorkload.hpp
     RefDepthwiseConvolution2dWorkload.cpp
     RefDepthwiseConvolution2dWorkload.hpp
     RefDequantizeWorkload.cpp
diff --git a/src/backends/reference/workloads/DepthToSpace.cpp b/src/backends/reference/workloads/DepthToSpace.cpp
new file mode 100644
index 0000000..046bd47
--- /dev/null
+++ b/src/backends/reference/workloads/DepthToSpace.cpp
@@ -0,0 +1,79 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "DepthToSpace.hpp"
+
+#include <DataLayoutIndexed.hpp>
+#include <Permute.hpp>
+
+#include <boost/assert.hpp>
+
+using namespace armnnUtils;
+
+namespace armnn
+{
+
+void DepthToSpace(const TensorInfo& inputInfo,
+                  const DepthToSpaceDescriptor& descriptor,
+                  const void* inputData,
+                  void* outputData,
+                  unsigned int dataTypeSize)
+{
+    const unsigned int blockSize = descriptor.m_BlockSize;
+    BOOST_ASSERT(blockSize != 0u);
+
+    const TensorShape& inputShape = inputInfo.GetShape();
+    const unsigned int batches = inputShape[0];
+
+    armnnUtils::DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
+    const unsigned int inDepth  = inputShape[dataLayoutIndexed.GetChannelsIndex()];
+    const unsigned int inHeight = inputShape[dataLayoutIndexed.GetHeightIndex()];
+    const unsigned int inWidth  = inputShape[dataLayoutIndexed.GetWidthIndex()];
+
+    const unsigned int outDepth = inDepth / (blockSize * blockSize);
+
+    // The 4D input data can be interpreted as 6D (implicitly reshaped) as follows:
+    //
+    // [batch, block size, block size, inDepth, inHeight, inWidth] for NCHW and
+    // [batch, inHeight, inWidth, blockSize, blockSize, outDepth] for NHWC.
+    //
+    // DepthToSpace can then be implemented as a permutation in 6D resulting in
+    // the following shapes:
+    //
+    // [batch, outDepth, inHeight, blockSize, inWidth, blockSize] for NCHW and
+    // [batch, inHeight, blockSize, inWidth, blockSize, outDepth] for NHWC.
+    //
+    // NOTE:
+    // Since 6D tensors are not currently supported, in practice we need to handle each
+    // batch separately and execute 5D permutations
+
+    TensorShape permDestShape;
+    std::initializer_list<unsigned int> permVector;
+    if (descriptor.m_DataLayout == DataLayout::NCHW)
+    {
+        permDestShape = TensorShape({ outDepth, inHeight, blockSize, inWidth, blockSize });
+        permVector    = { 2, 4, 0, 1, 3 };
+    }
+    else
+    {
+        permDestShape = TensorShape({ inHeight, blockSize, inWidth, blockSize, outDepth });
+        permVector    = { 0, 2, 1, 3, 4 };
+    }
+
+    const unsigned int numElementsPerBatch = inputShape.GetNumElements() / batches;
+
+    for (unsigned int batchIndex = 0u; batchIndex < batches; ++batchIndex)
+    {
+        const uintptr_t batchDataOffset = batchIndex * (numElementsPerBatch * dataTypeSize);
+
+        armnnUtils::Permute(permDestShape,
+                            PermutationVector(permVector),
+                            static_cast<const void*>(reinterpret_cast<const uint8_t*>(inputData) + batchDataOffset),
+                            static_cast<void*>(reinterpret_cast<uint8_t*>(outputData) + batchDataOffset),
+                            dataTypeSize);
+    }
+}
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/DepthToSpace.hpp b/src/backends/reference/workloads/DepthToSpace.hpp
new file mode 100644
index 0000000..a1805c0
--- /dev/null
+++ b/src/backends/reference/workloads/DepthToSpace.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Descriptors.hpp>
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+void DepthToSpace(const TensorInfo& inputInfo,
+                  const DepthToSpaceDescriptor& descriptor,
+                  const void* inputData,
+                  void* outputData,
+                  unsigned int dataTypeSize);
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp b/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp
new file mode 100644
index 0000000..93c1120
--- /dev/null
+++ b/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefDepthToSpaceWorkload.hpp"
+
+#include "DepthToSpace.hpp"
+#include "RefWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+void RefDepthToSpaceWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthToSpaceWorkload_Execute");
+
+    const TensorInfo inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+
+    DepthToSpace(inputInfo,
+                 m_Data.m_Parameters,
+                 m_Data.m_Inputs[0]->Map(),
+                 m_Data.m_Outputs[0]->Map(),
+                 GetDataTypeSize(inputInfo.GetDataType()));
+}
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp b/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp
new file mode 100644
index 0000000..327cd9d
--- /dev/null
+++ b/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backendsCommon/Workload.hpp"
+
+namespace armnn
+{
+
+class RefDepthToSpaceWorkload : public BaseWorkload<DepthToSpaceQueueDescriptor>
+{
+public:
+    using BaseWorkload<DepthToSpaceQueueDescriptor>::BaseWorkload;
+    virtual void Execute() const override;
+};
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index b4721b1..94592cb 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -25,9 +25,11 @@
 #include "RefConcatWorkload.hpp"
 #include "RefConvertFp16ToFp32Workload.hpp"
 #include "RefConvertFp32ToFp16Workload.hpp"
-#include "RefDepthwiseConvolution2dWorkload.hpp"
-#include "RefDetectionPostProcessWorkload.hpp"
 #include "RefDebugWorkload.hpp"
+#include "RefDepthToSpaceWorkload.hpp"
+#include "RefDepthwiseConvolution2dWorkload.hpp"
+#include "RefDequantizeWorkload.hpp"
+#include "RefDetectionPostProcessWorkload.hpp"
 #include "RefDequantizeWorkload.hpp"
 #include "RefElementwiseWorkload.hpp"
 #include "RefFullyConnectedWorkload.hpp"