IVGCVSW-2291 TILE Operator CL Implementation

 * Added Tile Operator Implementation to CL
 * Added calls to the existing UnitTests
 * Added Documentation

Signed-off-by: Cian McGriskin <cian.mcgriskin@arm.com>
Change-Id: If7d25c7aa669c24e7816e5d445c7a3b9ce6972d4
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index ff2b576..60dab05 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -81,6 +81,7 @@
 #include "workloads/ClStackWorkload.hpp"
 #include "workloads/ClStridedSliceWorkload.hpp"
 #include "workloads/ClSubtractionWorkload.hpp"
+#include "workloads/ClTileWorkload.hpp"
 #include "workloads/ClTransposeConvolution2dWorkload.hpp"
 #include "workloads/ClTransposeWorkload.hpp"
 #include "workloads/ClUnidirectionalSequenceLstmFloatWorkload.hpp"
@@ -627,6 +628,11 @@
             ARMNN_NO_DEPRECATE_WARN_BEGIN
             return IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
             ARMNN_NO_DEPRECATE_WARN_END
+        case LayerType::Tile:
+            return IsTileSupported(infos[0],
+                                   infos[1],
+                                   *(PolymorphicDowncast<const TileDescriptor*>(&descriptor)),
+                                   reasonIfUnsupported);
         case LayerType::Transpose:
             return IsTransposeSupported(infos[0],
                                         infos[1],
@@ -1527,6 +1533,18 @@
                                    nullptr);
 }
 
+bool ClLayerSupport::IsTileSupported(const TensorInfo& input,
+                                     const TensorInfo& output,
+                                     const TileDescriptor& descriptor,
+                                     Optional<std::string&> reasonIfUnsupported) const
+{
+    FORWARD_WORKLOAD_VALIDATE_FUNC(ClTileWorkloadValidate,
+                                   reasonIfUnsupported,
+                                   input,
+                                   output,
+                                   descriptor);
+}
+
 bool ClLayerSupport::IsTransposeConvolution2dSupported(const TensorInfo& input,
                                                        const TensorInfo& output,
                                                        const TransposeConvolution2dDescriptor& descriptor,
diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp
index 737ab7a..9a8f896 100644
--- a/src/backends/cl/ClLayerSupport.hpp
+++ b/src/backends/cl/ClLayerSupport.hpp
@@ -343,6 +343,11 @@
                                            const Optional<TensorInfo>& biases,
                                            Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
 
+    bool IsTileSupported(const TensorInfo& input,
+                         const TensorInfo& output,
+                         const TileDescriptor& descriptor,
+                         Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const;
+
     bool IsTransposeSupported(const TensorInfo& input,
                               const TensorInfo& output,
                               const TransposeDescriptor& descriptor,
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 7b4e9be..da05f67 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -755,6 +755,11 @@
             auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
             return MakeWorkload<ClSubtractionWorkload>(*subtractionQueueDescriptor, info, m_CLCompileContext);
         }
+        case LayerType::Tile:
+        {
+            auto tileQueueDescriptor = PolymorphicDowncast<const TileQueueDescriptor*>(&descriptor);
+            return MakeWorkload<ClTileWorkload>(*tileQueueDescriptor, info, m_CLCompileContext);
+        }
         case LayerType::Transpose :
         {
             auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk
index 5ceab70..3499305 100644
--- a/src/backends/cl/backend.mk
+++ b/src/backends/cl/backend.mk
@@ -90,6 +90,7 @@
         workloads/ClStackWorkload.cpp \
         workloads/ClStridedSliceWorkload.cpp \
         workloads/ClSubtractionWorkload.cpp \
+        workloads/ClTileWorkload.cpp \
         workloads/ClTransposeConvolution2dWorkload.cpp \
         workloads/ClTransposeWorkload.cpp \
         workloads/ClUnidirectionalSequenceLstmFloatWorkload.cpp
diff --git a/src/backends/cl/test/ClEndToEndTests.cpp b/src/backends/cl/test/ClEndToEndTests.cpp
index 2d7a813..50ba1db 100644
--- a/src/backends/cl/test/ClEndToEndTests.cpp
+++ b/src/backends/cl/test/ClEndToEndTests.cpp
@@ -26,6 +26,7 @@
 #include <backendsCommon/test/SpaceToDepthEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SplitterEndToEndTestImpl.hpp>
 #include <backendsCommon/test/SubgraphUtilsTest.hpp>
+#include <backendsCommon/test/TileEndToEndTestImpl.hpp>
 #include <backendsCommon/test/TransposeConvolution2dEndToEndTestImpl.hpp>
 #include <backendsCommon/test/TransposeEndToEndTestImpl.hpp>
 
@@ -466,6 +467,42 @@
     Splitter4dDim3EndToEnd<armnn::DataType::QAsymmU8>(clDefaultBackends);
 }
 
+// Tile
+TEST_CASE("ClTileEndToEndFloat32")
+{
+    TileEndToEnd<armnn::DataType::Float32>(clDefaultBackends);
+}
+
+TEST_CASE("ClTileEndToEndFloat16")
+{
+    TileEndToEnd<armnn::DataType::Float16>(clDefaultBackends);
+}
+
+TEST_CASE("ClTileEndToEndQAsymmS8")
+{
+    TileEndToEnd<armnn::DataType::QAsymmS8>(clDefaultBackends);
+}
+
+TEST_CASE("ClTileEndToEndQAsymmU8")
+{
+    TileEndToEnd<armnn::DataType::QAsymmU8>(clDefaultBackends);
+}
+
+TEST_CASE("ClTileEndToEndQSymmS8")
+{
+    TileEndToEnd<armnn::DataType::QSymmS8>(clDefaultBackends);
+}
+
+TEST_CASE("ClTileEndToEndQSymmS16")
+{
+    TileEndToEnd<armnn::DataType::QSymmS16>(clDefaultBackends);
+}
+
+TEST_CASE("ClTileEndToEndSigned32")
+{
+    TileEndToEnd<armnn::DataType::Signed32>(clDefaultBackends);
+}
+
 // TransposeConvolution2d
 TEST_CASE("ClTransposeConvolution2dEndToEndFloatNchwTest")
 {
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index 93ad653..d8d451e 100644
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -2033,6 +2033,24 @@
 ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleUint8ToFp16, ClContextControlFixture, DequantizeSimpleUint8ToFp16Test)
 ARMNN_AUTO_TEST_FIXTURE(DequantizeSimpleInt16ToFp16, ClContextControlFixture, DequantizeSimpleInt16ToFp16Test)
 
+//Tile
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestFloat32, ClContextControlFixture, Tile1dTest<DataType::Float32>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestFloat32, ClContextControlFixture, Tile2dTest<DataType::Float32>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestFloat32, ClContextControlFixture, Tile3dTest<DataType::Float32>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestFloat32, ClContextControlFixture, Tile4dTest<DataType::Float32>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestFloat16, ClContextControlFixture, Tile1dTest<DataType::Float16>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestFloat16, ClContextControlFixture, Tile2dTest<DataType::Float16>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestFloat16, ClContextControlFixture, Tile3dTest<DataType::Float16>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestFloat16, ClContextControlFixture, Tile4dTest<DataType::Float16>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestInt8, ClContextControlFixture, Tile1dTest<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestInt8, ClContextControlFixture, Tile2dTest<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestInt8, ClContextControlFixture, Tile3dTest<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestInt8, ClContextControlFixture, Tile4dTest<DataType::QAsymmS8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile1dTestUint8, ClContextControlFixture, Tile1dTest<DataType::QAsymmU8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile2dTestUint8, ClContextControlFixture, Tile2dTest<DataType::QAsymmU8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile3dTestUint8, ClContextControlFixture, Tile3dTest<DataType::QAsymmU8>)
+ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tile4dTestUint8, ClContextControlFixture, Tile4dTest<DataType::QAsymmU8>)
+
 // Transpose
 ARMNN_AUTO_TEST_FIXTURE_WITH_THF(
     SimpleTransposeFloat32, ClContextControlFixture, SimpleTransposeTest<DataType::Float32>)
diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt
index 030d719..cb16ab1 100644
--- a/src/backends/cl/workloads/CMakeLists.txt
+++ b/src/backends/cl/workloads/CMakeLists.txt
@@ -131,6 +131,8 @@
     ClStridedSliceWorkload.hpp
     ClSubtractionWorkload.cpp
     ClSubtractionWorkload.hpp
+    ClTileWorkload.cpp
+    ClTileWorkload.hpp
     ClTransposeConvolution2dWorkload.cpp
     ClTransposeConvolution2dWorkload.hpp
     ClTransposeWorkload.cpp
diff --git a/src/backends/cl/workloads/ClTileWorkload.cpp b/src/backends/cl/workloads/ClTileWorkload.cpp
new file mode 100644
index 0000000..2c2f63f
--- /dev/null
+++ b/src/backends/cl/workloads/ClTileWorkload.cpp
@@ -0,0 +1,50 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClTileWorkload.hpp"
+#include "ClWorkloadUtils.hpp"
+#include <aclCommon/ArmComputeUtils.hpp>
+#include <cl/ClTensorHandle.hpp>
+#include <vector>
+#include <algorithm>
+
+using namespace armnn::armcomputetensorutils;
+namespace armnn
+{
+arm_compute::Status ClTileWorkloadValidate(const TensorInfo& input,
+                                           const TensorInfo& output,
+                                           const TileDescriptor& descriptor)
+{
+    const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
+    const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
+
+    std::vector<uint32_t> aclMultiples = descriptor.m_Multiples;
+    std::reverse(aclMultiples.begin(),aclMultiples.end());
+
+    return arm_compute::CLTile::validate(&aclInput, &aclOutput, aclMultiples);
+}
+
+ClTileWorkload::ClTileWorkload(const armnn::TileQueueDescriptor& descriptor,
+                               const armnn::WorkloadInfo& info,
+                               const arm_compute::CLCompileContext& clCompileContext)
+    : BaseWorkload<TileQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClTileWorkload", 1, 1);
+
+    std::vector<uint32_t> aclMultiples = descriptor.m_Parameters.m_Multiples;
+    std::reverse(aclMultiples.begin(),aclMultiples.end());
+
+    arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+    m_Layer.configure(clCompileContext, &input, &output, aclMultiples);
+}
+
+void ClTileWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClTileWorkload_Execute", this->GetGuid());
+    m_Layer.run();
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/cl/workloads/ClTileWorkload.hpp b/src/backends/cl/workloads/ClTileWorkload.hpp
new file mode 100644
index 0000000..0f68559
--- /dev/null
+++ b/src/backends/cl/workloads/ClTileWorkload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+#include "ClBaseWorkload.hpp"
+#include <arm_compute/runtime/CL/functions/CLTile.h>
+
+namespace armnn
+{
+arm_compute::Status ClTileWorkloadValidate(const TensorInfo& input,
+                                           const TensorInfo& output,
+                                           const TileDescriptor& descriptor);
+
+class ClTileWorkload : public BaseWorkload<TileQueueDescriptor> {
+public:
+    ClTileWorkload(const TileQueueDescriptor &descriptor,
+                   const WorkloadInfo &info,
+                   const arm_compute::CLCompileContext& clCompileContext);
+    void Execute() const override;
+
+private:
+    mutable arm_compute::CLTile m_Layer;
+};
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp
index d862aab..cec8706 100644
--- a/src/backends/cl/workloads/ClWorkloads.hpp
+++ b/src/backends/cl/workloads/ClWorkloads.hpp
@@ -68,6 +68,7 @@
 #include "ClSubtractionWorkload.hpp"
 #include "ClConvertFp16ToFp32Workload.hpp"
 #include "ClConvertFp32ToFp16Workload.hpp"
+#include "ClTileWorkload.hpp"
 #include "ClTransposeConvolution2dWorkload.hpp"
 #include "ClTransposeWorkload.hpp"
 #include "ClUnidirectionalSequenceLstmFloatWorkload.hpp"