IVGCVSW-2164 Added ACL implementation of SpaceToBatchNd operation to ArmNN

!android-nn-driver:428

Change-Id: I42e59ad96d2c80f46b085182855d34b710a74dfe
diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.cpp b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
index ba7cd14..6b55948 100644
--- a/src/backends/aclCommon/ArmComputeTensorUtils.cpp
+++ b/src/backends/aclCommon/ArmComputeTensorUtils.cpp
@@ -159,5 +159,10 @@
     return aclPerm;
 }
 
+arm_compute::Size2D BuildArmComputeSize2D(const unsigned int width, const unsigned int height)
+{
+    return arm_compute::Size2D(width, height);
+}
+
 } // namespace armcomputetensorutils
 } // namespace armnn
diff --git a/src/backends/aclCommon/ArmComputeTensorUtils.hpp b/src/backends/aclCommon/ArmComputeTensorUtils.hpp
index 18f41ee..2a14d65 100644
--- a/src/backends/aclCommon/ArmComputeTensorUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeTensorUtils.hpp
@@ -10,6 +10,7 @@
 #include <arm_compute/core/ITensor.h>
 #include <arm_compute/core/TensorInfo.h>
 #include <arm_compute/core/Types.h>
+#include <arm_compute/core/Size2D.h>
 
 #include <boost/cast.hpp>
 
@@ -49,6 +50,9 @@
 /// Utility function used to setup an arm_compute::PermutationVector object from an armnn::PermutationVector.
 arm_compute::PermutationVector BuildArmComputePermutationVector(const armnn::PermutationVector& vector);
 
+/// Utility function used to setup an arm_compute::Size2D object from width and height values.
+arm_compute::Size2D BuildArmComputeSize2D(const unsigned int width, const unsigned int height);
+
 /// Utility function used to setup an arm_compute::PadStrideInfo object from an armnn layer descriptor.
 template <typename Descriptor>
 arm_compute::PadStrideInfo BuildArmComputePadStrideInfo(const Descriptor &descriptor)
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index 719412b..4bcc480 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -36,6 +36,7 @@
 #include "workloads/ClPermuteWorkload.hpp"
 #include "workloads/ClPooling2dWorkload.hpp"
 #include "workloads/ClSoftmaxBaseWorkload.hpp"
+#include "workloads/ClSpaceToBatchNdWorkload.hpp"
 #include "workloads/ClStridedSliceWorkload.hpp"
 #include "workloads/ClSubtractionWorkload.hpp"
 #endif
@@ -524,6 +525,18 @@
     FORWARD_WORKLOAD_VALIDATE_FUNC(ClSoftmaxWorkloadValidate, reasonIfUnsupported, input, output);
 }
 
+bool ClLayerSupport::IsSpaceToBatchNdSupported(const TensorInfo& input,
+                                               const TensorInfo& output,
+                                               const SpaceToBatchNdDescriptor& descriptor,
+                                               Optional<std::string&> reasonIfUnsupported) const
+{
+    FORWARD_WORKLOAD_VALIDATE_FUNC(ClSpaceToBatchNdWorkloadValidate,
+                                   reasonIfUnsupported,
+                                   input,
+                                   output,
+                                   descriptor);
+}
+
 bool ClLayerSupport::IsSplitterSupported(const TensorInfo& input,
                                          const ViewsDescriptor& descriptor,
                                          Optional<std::string&> reasonIfUnsupported) const
diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp
index 4281dfe..61c08b4 100644
--- a/src/backends/cl/ClLayerSupport.hpp
+++ b/src/backends/cl/ClLayerSupport.hpp
@@ -180,6 +180,11 @@
                             const SoftmaxDescriptor& descriptor,
                             Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsSpaceToBatchNdSupported(const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const SpaceToBatchNdDescriptor& descriptor,
+                                   Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsSplitterSupported(const TensorInfo& input,
                              const ViewsDescriptor& descriptor,
                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 37d9b31..0653b41 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -267,7 +267,7 @@
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor,
     const WorkloadInfo& info) const
 {
-    return nullptr;
+    return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor, info);
 }
 
 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor,
diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk
index ef56356..d8a6665 100644
--- a/src/backends/cl/backend.mk
+++ b/src/backends/cl/backend.mk
@@ -41,6 +41,7 @@
         workloads/ClSoftmaxBaseWorkload.cpp \
         workloads/ClSoftmaxFloatWorkload.cpp \
         workloads/ClSoftmaxUint8Workload.cpp \
+        workloads/ClSpaceToBatchNdWorkload.cpp \
         workloads/ClStridedSliceWorkload.cpp \
         workloads/ClSubtractionWorkload.cpp
 
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index b89fba6..92ac526 100755
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -359,6 +359,27 @@
 ARMNN_AUTO_TEST_CASE(MeanVtsFloat2, MeanVtsFloat2Test)
 ARMNN_AUTO_TEST_CASE(MeanVtsFloat3, MeanVtsFloat3Test)
 
+// Space To Batch Nd
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleFloat32, SpaceToBatchNdSimpleFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsFloat32, SpaceToBatchNdMultiChannelsFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockFloat32, SpaceToBatchNdMultiBlockFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingFloat32, SpaceToBatchNdPaddingFloat32Test)
+
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleUint8, SpaceToBatchNdSimpleUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsUint8, SpaceToBatchNdMultiChannelsUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockUint8, SpaceToBatchNdMultiBlockUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingUint8, SpaceToBatchNdPaddingUint8Test)
+
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNHWCFloat32, SpaceToBatchNdSimpleNHWCFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNHWCFloat32, SpaceToBatchNdMultiChannelsNHWCFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNHWCFloat32, SpaceToBatchNdMultiBlockNHWCFloat32Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNHWCFloat32, SpaceToBatchNdPaddingNHWCFloat32Test)
+
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdSimpleNHWCUint8, SpaceToBatchNdSimpleNHWCUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiChannelsNHWCUint8, SpaceToBatchNdMultiChannelsNHWCUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdMultiBlockNHWCUint8, SpaceToBatchNdMultiBlockNHWCUint8Test)
+ARMNN_AUTO_TEST_CASE(SpaceToBatchNdPaddingNHWCUint8, SpaceToBatchNdPaddingNHWCUint8Test)
+
 // Strided Slice
 ARMNN_AUTO_TEST_CASE(StridedSlice4DFloat32, StridedSlice4DFloat32Test)
 ARMNN_AUTO_TEST_CASE(StridedSlice4DReverseFloat32, StridedSlice4DReverseFloat32Test)
diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt
index d769e57..dbc29a3 100644
--- a/src/backends/cl/workloads/CMakeLists.txt
+++ b/src/backends/cl/workloads/CMakeLists.txt
@@ -58,6 +58,8 @@
     ClSoftmaxFloatWorkload.hpp
     ClSoftmaxUint8Workload.cpp
     ClSoftmaxUint8Workload.hpp
+    ClSpaceToBatchNdWorkload.hpp
+    ClSpaceToBatchNdWorkload.cpp
     ClSplitterWorkload.hpp
     ClStridedSliceWorkload.cpp
     ClStridedSliceWorkload.hpp
diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
new file mode 100644
index 0000000..de58db4
--- /dev/null
+++ b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
@@ -0,0 +1,87 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClSpaceToBatchNdWorkload.hpp"
+
+#include "ClWorkloadUtils.hpp"
+
+#include <aclCommon/ArmComputeUtils.hpp>
+#include <aclCommon/ArmComputeTensorUtils.hpp>
+#include <backendsCommon/CpuTensorHandle.hpp>
+#include <cl/ClLayerSupport.hpp>
+#include <cl/ClTensorHandle.hpp>
+#include <cl/ClLayerSupport.hpp>
+
+#include <boost/polymorphic_pointer_cast.hpp>
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClSpaceToBatchNdWorkloadValidate(const TensorInfo& input,
+                                                     const TensorInfo& output,
+                                                     const SpaceToBatchNdDescriptor& descriptor)
+{
+    const arm_compute::TensorInfo aclInputInfo  = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+    const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
+
+    // ArmNN blockShape is [H, W] Cl asks for W, H
+    int32_t blockHeight = boost::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
+    int32_t blockWidth  = boost::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
+
+    // ACL expects paddingTopBottom [pad_top, pad_bottom] and paddingLeftRight [pad_left, pad_right]
+    arm_compute::Size2D paddingLeftRight = BuildArmComputeSize2D(
+        descriptor.m_PadList[0].first, descriptor.m_PadList[0].second);
+    arm_compute::Size2D paddingTopBottom  = BuildArmComputeSize2D(
+        descriptor.m_PadList[1].first, descriptor.m_PadList[1].second);
+
+    return arm_compute::CLSpaceToBatchLayer::validate(&aclInputInfo,
+                                                      blockWidth,
+                                                      blockHeight,
+                                                      paddingTopBottom,
+                                                      paddingLeftRight,
+                                                      &aclOutputInfo);
+}
+
+ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload(
+    const SpaceToBatchNdQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : BaseWorkload<SpaceToBatchNdQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClSpaceToBatchNdWorkload", 1, 1);
+
+    arm_compute::ICLTensor& input  =
+        boost::polymorphic_pointer_downcast<IClTensorHandle>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output =
+        boost::polymorphic_pointer_downcast<IClTensorHandle>(m_Data.m_Outputs[0])->GetTensor();
+
+    // ArmNN blockShape is [H, W] Cl asks for W, H
+    int32_t blockHeight = boost::numeric_cast<int32_t>(m_Data.m_Parameters.m_BlockShape[0]);
+    int32_t blockWidth  = boost::numeric_cast<int32_t>(m_Data.m_Parameters.m_BlockShape[1]);
+
+    // ACL expects paddingTopBottom [pad_top, pad_bottom] and paddingLeftRight [pad_left, pad_right]
+    arm_compute::Size2D paddingLeftRight = BuildArmComputeSize2D(
+        m_Data.m_Parameters.m_PadList[0].first, m_Data.m_Parameters.m_PadList[0].second);
+    arm_compute::Size2D paddingTopBottom  = BuildArmComputeSize2D(
+        m_Data.m_Parameters.m_PadList[1].first, m_Data.m_Parameters.m_PadList[1].second);
+
+    arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
+    input.info()->set_data_layout(aclDataLayout);
+    output.info()->set_data_layout(aclDataLayout);
+
+    m_SpaceToBatchLayer.configure(&input,
+                                  blockWidth,
+                                  blockHeight,
+                                  paddingTopBottom,
+                                  paddingLeftRight,
+                                  &output);
+}
+
+void ClSpaceToBatchNdWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT_CL("ClSpaceToBatchNdWorkload_Execute");
+    RunClFunction(m_SpaceToBatchLayer, CHECK_LOCATION());
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp
new file mode 100644
index 0000000..7500b5a
--- /dev/null
+++ b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.hpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+#include <armnn/Descriptors.hpp>
+
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClSpaceToBatchNdWorkloadValidate(const TensorInfo& input,
+                                                     const TensorInfo& output,
+                                                     const SpaceToBatchNdDescriptor& descriptor);
+
+class ClSpaceToBatchNdWorkload : public BaseWorkload<SpaceToBatchNdQueueDescriptor>
+{
+public:
+    ClSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLSpaceToBatchLayer m_SpaceToBatchLayer;
+};
+
+} //namespace armnn
+
diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp
index b0b78cd..d00c2bb 100644
--- a/src/backends/cl/workloads/ClWorkloads.hpp
+++ b/src/backends/cl/workloads/ClWorkloads.hpp
@@ -28,6 +28,7 @@
 #include "ClResizeBilinearFloatWorkload.hpp"
 #include "ClSoftmaxFloatWorkload.hpp"
 #include "ClSoftmaxUint8Workload.hpp"
+#include "ClSpaceToBatchNdWorkload.hpp"
 #include "ClSplitterWorkload.hpp"
 #include "ClStridedSliceWorkload.hpp"
 #include "ClSubtractionWorkload.hpp"