IVGCVSW-3885 Add reference workload for DepthToSpace

Signed-off-by: Aron Virginas-Tar <Aron.Virginas-Tar@arm.com>
Change-Id: Id937dc4425884ad1985dcdfaae8bf3fb64f0c766
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index 14183a7..06da776 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -466,6 +466,34 @@
     return supported;
 }
 
+bool RefLayerSupport::IsDepthToSpaceSupported(const TensorInfo& input,
+                                              const TensorInfo& output,
+                                              const DepthToSpaceDescriptor& descriptor,
+                                              Optional<std::string&> reasonIfUnsupported) const
+{
+    ignore_unused(descriptor);
+    bool supported = true;
+
+    std::array<DataType,4> supportedTypes =
+    {
+        DataType::Float32,
+        DataType::Float16,
+        DataType::QuantisedAsymm8,
+        DataType::QuantisedSymm16
+    };
+
+    supported &= CheckSupportRule(TypeAnyOf(input, supportedTypes), reasonIfUnsupported,
+        "Reference DepthToSpace: input type not supported");
+
+    supported &= CheckSupportRule(TypeAnyOf(output, supportedTypes), reasonIfUnsupported,
+        "Reference DepthToSpace: output type not supported");
+
+    supported &= CheckSupportRule(TypesAreEqual(input, output), reasonIfUnsupported,
+        "Reference DepthToSpace: input and output types are mismatched");
+
+    return supported;
+}
+
 bool RefLayerSupport::IsDepthwiseConvolutionSupported(const TensorInfo& input,
                                                       const TensorInfo& output,
                                                       const DepthwiseConvolution2dDescriptor& descriptor,
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index 9e8c914..cc9478d 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -72,6 +72,11 @@
                           const TensorInfo& output,
                           Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsDepthToSpaceSupported(const TensorInfo& input,
+                                 const TensorInfo& output,
+                                 const DepthToSpaceDescriptor& descriptor,
+                                 Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsDepthwiseConvolutionSupported(const TensorInfo& input,
                                          const TensorInfo& output,
                                          const DepthwiseConvolution2dDescriptor& descriptor,
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index 480b7e2..254b221 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -193,6 +193,12 @@
     return std::make_unique<RefConvolution2dWorkload>(descriptor, info);
 }
 
+std::unique_ptr<IWorkload> RefWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
+                                                                  const WorkloadInfo& info) const
+{
+    return std::make_unique<RefDepthToSpaceWorkload>(descriptor, info);
+}
+
 std::unique_ptr<armnn::IWorkload> RefWorkloadFactory::CreateDepthwiseConvolution2d(
     const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
 {
diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp
index 033f817..e8e11e0 100644
--- a/src/backends/reference/RefWorkloadFactory.hpp
+++ b/src/backends/reference/RefWorkloadFactory.hpp
@@ -94,6 +94,9 @@
     std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
                                                    const WorkloadInfo& info) const override;
 
+    std::unique_ptr<IWorkload> CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor,
+                                                  const WorkloadInfo& info) const override;
+
     std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor& descriptor,
                                                             const WorkloadInfo& info) const override;
 
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index fd0df27..597fba8 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -29,6 +29,7 @@
         workloads/Broadcast.cpp \
         workloads/ConvImpl.cpp \
         workloads/Debug.cpp \
+        workloads/DepthToSpace.cpp \
         workloads/DetectionPostProcess.cpp \
         workloads/ElementwiseFunction.cpp \
         workloads/FullyConnected.cpp \
@@ -50,6 +51,7 @@
         workloads/RefConvertFp32ToFp16Workload.cpp \
         workloads/RefConvolution2dWorkload.cpp \
         workloads/RefDebugWorkload.cpp \
+        workloads/RefDepthToSpaceWorkload.cpp \
         workloads/RefDepthwiseConvolution2dWorkload.cpp \
         workloads/RefDequantizeWorkload.cpp \
         workloads/RefDetectionPostProcessWorkload.cpp \
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index a5164f0..901017a 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -1136,6 +1136,46 @@
 ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwQsymm16_6,  BatchToSpaceNdNchwTest6<DataType::QuantisedSymm16>)
 ARMNN_AUTO_TEST_CASE(BatchToSpaceNdNchwQsymm16_7,  BatchToSpaceNdNchwTest7<DataType::QuantisedSymm16>)
 
+// DepthToSpace
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat32_1, DepthToSpaceTest1<DataType::Float32>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat32_2, DepthToSpaceTest2<DataType::Float32>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat32_3, DepthToSpaceTest3<DataType::Float32>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat32_4, DepthToSpaceTest4<DataType::Float32>, DataLayout::NCHW);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat16_1, DepthToSpaceTest1<DataType::Float16>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat16_2, DepthToSpaceTest2<DataType::Float16>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat16_3, DepthToSpaceTest3<DataType::Float16>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwFloat16_4, DepthToSpaceTest4<DataType::Float16>, DataLayout::NCHW);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwUint8_1, DepthToSpaceTest1<DataType::QuantisedAsymm8>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwUint8_2, DepthToSpaceTest2<DataType::QuantisedAsymm8>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwUint8_3, DepthToSpaceTest3<DataType::QuantisedAsymm8>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwUint8_4, DepthToSpaceTest4<DataType::QuantisedAsymm8>, DataLayout::NCHW);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwInt16_1, DepthToSpaceTest1<DataType::QuantisedSymm16>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwInt16_2, DepthToSpaceTest2<DataType::QuantisedSymm16>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwInt16_3, DepthToSpaceTest3<DataType::QuantisedSymm16>, DataLayout::NCHW);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNchwInt16_4, DepthToSpaceTest4<DataType::QuantisedSymm16>, DataLayout::NCHW);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat32_1, DepthToSpaceTest1<DataType::Float32>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat32_2, DepthToSpaceTest2<DataType::Float32>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat32_3, DepthToSpaceTest3<DataType::Float32>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat32_4, DepthToSpaceTest4<DataType::Float32>, DataLayout::NHWC);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat16_1, DepthToSpaceTest1<DataType::Float16>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat16_2, DepthToSpaceTest2<DataType::Float16>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat16_3, DepthToSpaceTest3<DataType::Float16>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcFloat16_4, DepthToSpaceTest4<DataType::Float16>, DataLayout::NHWC);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcUint8_1, DepthToSpaceTest1<DataType::QuantisedAsymm8>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcUint8_2, DepthToSpaceTest2<DataType::QuantisedAsymm8>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcUint8_3, DepthToSpaceTest3<DataType::QuantisedAsymm8>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcUint8_4, DepthToSpaceTest4<DataType::QuantisedAsymm8>, DataLayout::NHWC);
+
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_1, DepthToSpaceTest1<DataType::QuantisedSymm16>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_2, DepthToSpaceTest2<DataType::QuantisedSymm16>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_3, DepthToSpaceTest3<DataType::QuantisedSymm16>, DataLayout::NHWC);
+ARMNN_AUTO_TEST_CASE(DepthToSpaceNhwcInt16_4, DepthToSpaceTest4<DataType::QuantisedSymm16>, DataLayout::NHWC);
 
 // SpaceToDepth
 ARMNN_AUTO_TEST_CASE(SpaceToDepthNchwAsymmQ8, SpaceToDepthNchwAsymmQ8Test)
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index 83444ed..c2eb025 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -24,6 +24,8 @@
     Debug.cpp
     Debug.hpp
     Decoders.hpp
+    DepthToSpace.cpp
+    DepthToSpace.hpp
     DetectionPostProcess.cpp
     DetectionPostProcess.hpp
     ElementwiseFunction.cpp
@@ -71,6 +73,8 @@
     RefElementwiseWorkload.hpp
     RefDebugWorkload.cpp
     RefDebugWorkload.hpp
+    RefDepthToSpaceWorkload.cpp
+    RefDepthToSpaceWorkload.hpp
     RefDepthwiseConvolution2dWorkload.cpp
     RefDepthwiseConvolution2dWorkload.hpp
     RefDequantizeWorkload.cpp
diff --git a/src/backends/reference/workloads/DepthToSpace.cpp b/src/backends/reference/workloads/DepthToSpace.cpp
new file mode 100644
index 0000000..046bd47
--- /dev/null
+++ b/src/backends/reference/workloads/DepthToSpace.cpp
@@ -0,0 +1,79 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "DepthToSpace.hpp"
+
+#include <DataLayoutIndexed.hpp>
+#include <Permute.hpp>
+
+#include <boost/assert.hpp>
+
+using namespace armnnUtils;
+
+namespace armnn
+{
+
+void DepthToSpace(const TensorInfo& inputInfo,
+                  const DepthToSpaceDescriptor& descriptor,
+                  const void* inputData,
+                  void* outputData,
+                  unsigned int dataTypeSize)
+{
+    const unsigned int blockSize = descriptor.m_BlockSize;
+    BOOST_ASSERT(blockSize != 0u);
+
+    const TensorShape& inputShape = inputInfo.GetShape();
+    const unsigned int batches = inputShape[0];
+
+    armnnUtils::DataLayoutIndexed dataLayoutIndexed(descriptor.m_DataLayout);
+    const unsigned int inDepth  = inputShape[dataLayoutIndexed.GetChannelsIndex()];
+    const unsigned int inHeight = inputShape[dataLayoutIndexed.GetHeightIndex()];
+    const unsigned int inWidth  = inputShape[dataLayoutIndexed.GetWidthIndex()];
+
+    const unsigned int outDepth = inDepth / (blockSize * blockSize);
+
+    // The 4D input data can be interpreted as 6D (implicitly reshaped) as follows:
+    //
+    // [batch, block size, block size, inDepth, inHeight, inWidth] for NCHW and
+    // [batch, inHeight, inWidth, blockSize, blockSize, outDepth] for NHWC.
+    //
+    // DepthToSpace can then be implemented as a permutation in 6D resulting in
+    // the following shapes:
+    //
+    // [batch, outDepth, inHeight, blockSize, inWidth, blockSize] for NCHW and
+    // [batch, inHeight, blockSize, inWidth, blockSize, outDepth] for NHWC.
+    //
+    // NOTE:
+    // Since 6D tensors are not currently supported, in practice we need to handle each
+    // batch separately and execute 5D permutations
+
+    TensorShape permDestShape;
+    std::initializer_list<unsigned int> permVector;
+    if (descriptor.m_DataLayout == DataLayout::NCHW)
+    {
+        permDestShape = TensorShape({ outDepth, inHeight, blockSize, inWidth, blockSize });
+        permVector    = { 2, 4, 0, 1, 3 };
+    }
+    else
+    {
+        permDestShape = TensorShape({ inHeight, blockSize, inWidth, blockSize, outDepth });
+        permVector    = { 0, 2, 1, 3, 4 };
+    }
+
+    const unsigned int numElementsPerBatch = inputShape.GetNumElements() / batches;
+
+    for (unsigned int batchIndex = 0u; batchIndex < batches; ++batchIndex)
+    {
+        const uintptr_t batchDataOffset = batchIndex * (numElementsPerBatch * dataTypeSize);
+
+        armnnUtils::Permute(permDestShape,
+                            PermutationVector(permVector),
+                            static_cast<const void*>(reinterpret_cast<const uint8_t*>(inputData) + batchDataOffset),
+                            static_cast<void*>(reinterpret_cast<uint8_t*>(outputData) + batchDataOffset),
+                            dataTypeSize);
+    }
+}
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/DepthToSpace.hpp b/src/backends/reference/workloads/DepthToSpace.hpp
new file mode 100644
index 0000000..a1805c0
--- /dev/null
+++ b/src/backends/reference/workloads/DepthToSpace.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Descriptors.hpp>
+#include <armnn/Tensor.hpp>
+
+namespace armnn
+{
+
+void DepthToSpace(const TensorInfo& inputInfo,
+                  const DepthToSpaceDescriptor& descriptor,
+                  const void* inputData,
+                  void* outputData,
+                  unsigned int dataTypeSize);
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp b/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp
new file mode 100644
index 0000000..93c1120
--- /dev/null
+++ b/src/backends/reference/workloads/RefDepthToSpaceWorkload.cpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefDepthToSpaceWorkload.hpp"
+
+#include "DepthToSpace.hpp"
+#include "RefWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+void RefDepthToSpaceWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefDepthToSpaceWorkload_Execute");
+
+    const TensorInfo inputInfo = GetTensorInfo(m_Data.m_Inputs[0]);
+
+    DepthToSpace(inputInfo,
+                 m_Data.m_Parameters,
+                 m_Data.m_Inputs[0]->Map(),
+                 m_Data.m_Outputs[0]->Map(),
+                 GetDataTypeSize(inputInfo.GetDataType()));
+}
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp b/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp
new file mode 100644
index 0000000..327cd9d
--- /dev/null
+++ b/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2019 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "backendsCommon/Workload.hpp"
+
+namespace armnn
+{
+
+class RefDepthToSpaceWorkload : public BaseWorkload<DepthToSpaceQueueDescriptor>
+{
+public:
+    using BaseWorkload<DepthToSpaceQueueDescriptor>::BaseWorkload;
+    virtual void Execute() const override;
+};
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index b4721b1..94592cb 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -25,9 +25,11 @@
 #include "RefConcatWorkload.hpp"
 #include "RefConvertFp16ToFp32Workload.hpp"
 #include "RefConvertFp32ToFp16Workload.hpp"
-#include "RefDepthwiseConvolution2dWorkload.hpp"
-#include "RefDetectionPostProcessWorkload.hpp"
 #include "RefDebugWorkload.hpp"
+#include "RefDepthToSpaceWorkload.hpp"
+#include "RefDepthwiseConvolution2dWorkload.hpp"
+#include "RefDequantizeWorkload.hpp"
+#include "RefDetectionPostProcessWorkload.hpp"
 #include "RefDequantizeWorkload.hpp"
 #include "RefElementwiseWorkload.hpp"
 #include "RefFullyConnectedWorkload.hpp"