IVGCVSW-5419 'Add ACL Support for CAST Operator'

Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Change-Id: I481343af311cf3cbc81eee80e80d8b5581fdfd7b
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index 66999c1..a1519cd 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -27,6 +27,7 @@
 #include "workloads/NeonArgMinMaxWorkload.hpp"
 #include "workloads/NeonBatchNormalizationWorkload.hpp"
 #include "workloads/NeonBatchToSpaceNdWorkload.hpp"
+#include "workloads/NeonCastWorkload.hpp"
 #include "workloads/NeonExpWorkload.hpp"
 #include "workloads/NeonComparisonWorkload.hpp"
 #include "workloads/NeonConstantWorkload.hpp"
@@ -220,6 +221,16 @@
                                    descriptor);
 }
 
+bool NeonLayerSupport::IsCastSupported(const TensorInfo& input,
+                                       const TensorInfo& output,
+                                       Optional<std::string&> reasonIfUnsupported) const
+{
+    FORWARD_WORKLOAD_VALIDATE_FUNC(NeonCastValidate,
+                                   reasonIfUnsupported,
+                                   input,
+                                   output);
+}
+
 bool NeonLayerSupport::IsComparisonSupported(const TensorInfo& input0,
                                              const TensorInfo& input1,
                                              const TensorInfo& output,
diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp
index 2ae1b0d..c2c81f4 100644
--- a/src/backends/neon/NeonLayerSupport.hpp
+++ b/src/backends/neon/NeonLayerSupport.hpp
@@ -53,6 +53,10 @@
                                    const BatchToSpaceNdDescriptor& descriptor,
                                    Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsCastSupported(const TensorInfo& input,
+                         const TensorInfo& output,
+                         Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsComparisonSupported(const TensorInfo& input0,
                                const TensorInfo& input1,
                                const TensorInfo& output,
diff --git a/src/backends/neon/NeonTensorHandle.hpp b/src/backends/neon/NeonTensorHandle.hpp
index be5bd45..ae8aa5d 100644
--- a/src/backends/neon/NeonTensorHandle.hpp
+++ b/src/backends/neon/NeonTensorHandle.hpp
@@ -186,6 +186,7 @@
                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
                                                                  static_cast<uint8_t*>(memory));
                 break;
+            case arm_compute::DataType::QSYMM8:
             case arm_compute::DataType::QASYMM8_SIGNED:
                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
                                                                  static_cast<int8_t*>(memory));
@@ -228,6 +229,7 @@
                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
                                                                  this->GetTensor());
                 break;
+            case arm_compute::DataType::QSYMM8:
             case arm_compute::DataType::QASYMM8_SIGNED:
                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
                                                                  this->GetTensor());
@@ -320,6 +322,7 @@
                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
                                                                  static_cast<uint8_t*>(memory));
                 break;
+            case arm_compute::DataType::QSYMM8:
             case arm_compute::DataType::QASYMM8_SIGNED:
                 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
                                                                  static_cast<int8_t*>(memory));
@@ -354,6 +357,7 @@
                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
                                                                  this->GetTensor());
                 break;
+            case arm_compute::DataType::QSYMM8:
             case arm_compute::DataType::QASYMM8_SIGNED:
                 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
                                                                  this->GetTensor());
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index 2835201..9967fb8 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -172,6 +172,12 @@
     return std::make_unique<NeonBatchToSpaceNdWorkload>(descriptor, info);
 }
 
+std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateCast(const CastQueueDescriptor& descriptor,
+                                                           const WorkloadInfo& info) const
+{
+    return std::make_unique<NeonCastWorkload>(descriptor, info);
+}
+
 std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor,
                                                                  const WorkloadInfo& info) const
 {
diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp
index ee11002..e791bbc 100644
--- a/src/backends/neon/NeonWorkloadFactory.hpp
+++ b/src/backends/neon/NeonWorkloadFactory.hpp
@@ -71,6 +71,9 @@
     std::unique_ptr<IWorkload> CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor,
                                                     const WorkloadInfo& Info) const override;
 
+    std::unique_ptr<IWorkload> CreateCast(const CastQueueDescriptor& descriptor,
+                                          const WorkloadInfo& info) const override;
+
     std::unique_ptr<IWorkload> CreateComparison(const ComparisonQueueDescriptor& descriptor,
                                                 const WorkloadInfo& Info) const override;
 
diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk
index 6feeeb5..21a1770 100644
--- a/src/backends/neon/backend.mk
+++ b/src/backends/neon/backend.mk
@@ -28,6 +28,7 @@
         workloads/NeonArgMinMaxWorkload.cpp \
         workloads/NeonBatchNormalizationWorkload.cpp \
         workloads/NeonBatchToSpaceNdWorkload.cpp \
+        workloads/NeonCastWorkload.cpp \
         workloads/NeonComparisonWorkload.cpp \
         workloads/NeonConcatWorkload.cpp \
         workloads/NeonConstantWorkload.cpp \
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index f484ead..6cd26df 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -1391,6 +1391,14 @@
 ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinFloat32, ReduceMinSimpleTest<DataType::Float32>)
 ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinNegativeAxisFloat32, ReduceMinNegativeAxisTest<DataType::Float32>)
 
+// Cast
+ARMNN_AUTO_TEST_CASE_WITH_THF(CastInt32ToFloat, CastInt32ToFloat2dTest)
+ARMNN_AUTO_TEST_CASE_WITH_THF(CastInt8AsymmToFloat, CastInt8AsymmToFloat2dTest)
+ARMNN_AUTO_TEST_CASE_WITH_THF(CastUIntToFloat, CastUInt8ToFloat2dTest)
+ARMNN_AUTO_TEST_CASE_WITH_THF(CastFloatToIn8, CastFloat32ToInt82dTest)
+ARMNN_AUTO_TEST_CASE_WITH_THF(CastFloatToUInt8, CastFloat32ToUInt82dTest)
+
+
 #if defined(ARMNNREF_ENABLED)
 
 // The ARMNN_COMPARE_REF_AUTO_TEST_CASE and the ARMNN_COMPARE_REF_FIXTURE_TEST_CASE test units are not available
diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt
index 7c2b185..f8fc18f 100644
--- a/src/backends/neon/workloads/CMakeLists.txt
+++ b/src/backends/neon/workloads/CMakeLists.txt
@@ -16,6 +16,8 @@
     NeonBatchNormalizationWorkload.hpp
     NeonBatchToSpaceNdWorkload.cpp
     NeonBatchToSpaceNdWorkload.hpp
+    NeonCastWorkload.cpp
+    NeonCastWorkload.hpp
     NeonComparisonWorkload.cpp
     NeonComparisonWorkload.hpp
     NeonConcatWorkload.cpp
diff --git a/src/backends/neon/workloads/NeonCastWorkload.cpp b/src/backends/neon/workloads/NeonCastWorkload.cpp
new file mode 100644
index 0000000..4727fe1
--- /dev/null
+++ b/src/backends/neon/workloads/NeonCastWorkload.cpp
@@ -0,0 +1,44 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NeonCastWorkload.hpp"
+#include "NeonWorkloadUtils.hpp"
+
+#include <aclCommon/ArmComputeTensorHandle.hpp>
+#include <aclCommon/ArmComputeTensorUtils.hpp>
+
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
+namespace armnn
+{
+
+static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
+
+arm_compute::Status NeonCastValidate(const TensorInfo& input, const TensorInfo& output)
+{
+    arm_compute::TensorInfo aclInput  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+    arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+    return arm_compute::NECast::validate(&aclInput, &aclOutput, g_AclConvertPolicy);
+}
+
+NeonCastWorkload::NeonCastWorkload(const CastQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : BaseWorkload<CastQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("NeonCastWorkload", 1, 1);
+
+    arm_compute::ITensor& input  = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    m_CastLayer.configure(&input, &output, g_AclConvertPolicy);
+}
+
+void NeonCastWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonCastWorkload_Execute");
+    m_CastLayer.run();
+}
+
+} // namespace armnn
diff --git a/src/backends/neon/workloads/NeonCastWorkload.hpp b/src/backends/neon/workloads/NeonCastWorkload.hpp
new file mode 100644
index 0000000..4247c2e
--- /dev/null
+++ b/src/backends/neon/workloads/NeonCastWorkload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/core/Error.h>
+#include <arm_compute/runtime/NEON/functions/NECast.h>
+
+namespace armnn
+{
+
+arm_compute::Status NeonCastValidate(const TensorInfo& input, const TensorInfo& output);
+
+class NeonCastWorkload : public BaseWorkload<CastQueueDescriptor>
+{
+public:
+    NeonCastWorkload(const CastQueueDescriptor& descriptor, const WorkloadInfo& info);
+    virtual void Execute() const override;
+
+private:
+    mutable arm_compute::NECast m_CastLayer;
+};
+
+} // namespace armnn
diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp
index 4eb526a..16035e0 100644
--- a/src/backends/neon/workloads/NeonWorkloads.hpp
+++ b/src/backends/neon/workloads/NeonWorkloads.hpp
@@ -10,6 +10,7 @@
 #include "NeonArgMinMaxWorkload.hpp"
 #include "NeonBatchNormalizationWorkload.hpp"
 #include "NeonBatchToSpaceNdWorkload.hpp"
+#include "NeonCastWorkload.hpp"
 #include "NeonComparisonWorkload.hpp"
 #include "NeonConcatWorkload.hpp"
 #include "NeonConstantWorkload.hpp"