IVGCVSW-3294 Add Neon backend support for PReLU

Change-Id: I2f92f7336d3b4188962a9d08b0e7c6372b1c648f
Signed-off-by: Nikhil Raj <nikhil.raj@arm.com>
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index e49ad79..c05456b 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -36,6 +36,7 @@
 #include "workloads/NeonPadWorkload.hpp"
 #include "workloads/NeonPermuteWorkload.hpp"
 #include "workloads/NeonPooling2dWorkload.hpp"
+#include "workloads/NeonPreluWorkload.hpp"
 #include "workloads/NeonQuantizeWorkload.hpp"
 #include "workloads/NeonResizeBilinearWorkload.hpp"
 #include "workloads/NeonSoftmaxBaseWorkload.hpp"
@@ -450,6 +451,14 @@
     FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor);
 }
 
+bool NeonLayerSupport::IsPreluSupported(const armnn::TensorInfo &input,
+                                        const armnn::TensorInfo &alpha,
+                                        const armnn::TensorInfo &output,
+                                        armnn::Optional<std::string &> reasonIfUnsupported) const
+{
+    FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPreluWorkloadValidate, reasonIfUnsupported, input, alpha, output);
+}
+
 bool NeonLayerSupport::IsQuantizeSupported(const TensorInfo& input,
                                            const TensorInfo& output,
                                            Optional<std::string&> reasonIfUnsupported) const
diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp
index 781da42..344ce84 100644
--- a/src/backends/neon/NeonLayerSupport.hpp
+++ b/src/backends/neon/NeonLayerSupport.hpp
@@ -149,6 +149,11 @@
                               const Pooling2dDescriptor& descriptor,
                               Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
 
+    bool IsPreluSupported(const TensorInfo& input,
+                          const TensorInfo& alpha,
+                          const TensorInfo& output,
+                          Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
     bool IsQuantizeSupported(const TensorInfo& input,
                              const TensorInfo& output,
                              Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index a991318..1802459 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -145,6 +145,12 @@
     return std::make_unique<NeonPooling2dWorkload>(descriptor, info);
 }
 
+std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePrelu(const armnn::PreluQueueDescriptor &descriptor,
+                                                                   const armnn::WorkloadInfo &info) const
+{
+    return std::make_unique<NeonPreluWorkload>(descriptor, info);
+}
+
 std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d(
     const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const
 {
diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp
index ebd5001..2341673 100644
--- a/src/backends/neon/NeonWorkloadFactory.hpp
+++ b/src/backends/neon/NeonWorkloadFactory.hpp
@@ -65,6 +65,9 @@
     std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor,
                                                const WorkloadInfo& info) const override;
 
+    std::unique_ptr<IWorkload> CreatePrelu(const PreluQueueDescriptor& descriptor,
+                                           const WorkloadInfo& info) const override;
+
     std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor,
                                                    const WorkloadInfo& info) const override;
 
diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk
index 7b037d2..e5fafa3 100644
--- a/src/backends/neon/backend.mk
+++ b/src/backends/neon/backend.mk
@@ -42,6 +42,7 @@
         workloads/NeonPadWorkload.cpp \
         workloads/NeonPermuteWorkload.cpp \
         workloads/NeonPooling2dWorkload.cpp \
+        workloads/NeonPreluWorkload.cpp \
         workloads/NeonQuantizeWorkload.cpp \
         workloads/NeonReshapeWorkload.cpp \
         workloads/NeonResizeBilinearWorkload.cpp \
diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
index 320ea69..19b520d 100644
--- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp
+++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp
@@ -416,6 +416,49 @@
     NeonCreatePooling2dWorkloadTest<DataType::QuantisedAsymm8>(DataLayout::NHWC);
 }
 
+static void NeonCreatePreluWorkloadTest(const armnn::TensorShape& inputShape,
+                                        const armnn::TensorShape& alphaShape,
+                                        const armnn::TensorShape& outputShape,
+                                        armnn::DataType dataType)
+{
+    Graph graph;
+    NeonWorkloadFactory factory =
+            NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager());
+
+    auto workload = CreatePreluWorkloadTest<NeonPreluWorkload>(factory,
+                                                               graph,
+                                                               inputShape,
+                                                               alphaShape,
+                                                               outputShape,
+                                                               dataType);
+
+    // Checks that outputs and inputs are as we expect them (see definition of CreateReshapeWorkloadTest).
+    PreluQueueDescriptor queueDescriptor = workload->GetData();
+    auto inputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[0]);
+    auto alphaHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Inputs[1]);
+    auto outputHandle = boost::polymorphic_downcast<IAclTensorHandle*>(queueDescriptor.m_Outputs[0]);
+    BOOST_TEST(TestNeonTensorHandleInfo(inputHandle, TensorInfo(inputShape, dataType)));
+    BOOST_TEST(TestNeonTensorHandleInfo(alphaHandle, TensorInfo(alphaShape, dataType)));
+    BOOST_TEST(TestNeonTensorHandleInfo(outputHandle, TensorInfo(outputShape, dataType)));
+}
+
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+    BOOST_AUTO_TEST_CASE(CreatePreluFloat16Workload)
+{
+    NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float16);
+}
+#endif
+
+BOOST_AUTO_TEST_CASE(CreatePreluFloatWorkload)
+{
+    NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::Float32);
+}
+
+BOOST_AUTO_TEST_CASE(CreatePreluUint8Workload)
+{
+    NeonCreatePreluWorkloadTest({ 1, 4, 1, 2 }, { 5, 4, 3, 1 }, { 5, 4, 3, 2 }, DataType::QuantisedAsymm8);
+}
+
 template <typename armnn::DataType DataType>
 static void NeonCreateReshapeWorkloadTest()
 {
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index 425fe3a..8da54a7 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -537,6 +537,10 @@
 ARMNN_AUTO_TEST_CASE(QuantizeSimpleUint8, QuantizeSimpleUint8Test)
 ARMNN_AUTO_TEST_CASE(QuantizeClampUint8, QuantizeClampUint8Test)
 
+// PReLU
+ARMNN_AUTO_TEST_CASE(PreluFloat32, PreluTest<armnn::DataType::Float32>)
+ARMNN_AUTO_TEST_CASE(PreluUint8,   PreluTest<armnn::DataType::QuantisedAsymm8>)
+
 // ============================================================================
 // COMPARE tests
 
diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt
index f6568e1..8f9b7d5 100644
--- a/src/backends/neon/workloads/CMakeLists.txt
+++ b/src/backends/neon/workloads/CMakeLists.txt
@@ -50,6 +50,8 @@
     NeonPermuteWorkload.hpp
     NeonPooling2dWorkload.cpp
     NeonPooling2dWorkload.hpp
+    NeonPreluWorkload.cpp
+    NeonPreluWorkload.hpp
     NeonQuantizeWorkload.cpp
     NeonQuantizeWorkload.hpp
     NeonReshapeWorkload.cpp
diff --git a/src/backends/neon/workloads/NeonPreluWorkload.cpp b/src/backends/neon/workloads/NeonPreluWorkload.cpp
new file mode 100644
index 0000000..107090e
--- /dev/null
+++ b/src/backends/neon/workloads/NeonPreluWorkload.cpp
@@ -0,0 +1,50 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NeonPreluWorkload.hpp"
+#include "NeonWorkloadUtils.hpp"
+#include <aclCommon/ArmComputeUtils.hpp>
+
+#include <arm_compute/runtime/NEON/functions/NEPReluLayer.h>
+
+namespace armnn
+{
+
+arm_compute::Status NeonPreluWorkloadValidate(const TensorInfo& input,
+                                              const TensorInfo& alpha,
+                                              const TensorInfo& output)
+{
+    const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+    const arm_compute::TensorInfo aclAlpha = armcomputetensorutils::BuildArmComputeTensorInfo(alpha);
+    const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+    return arm_compute::NEPReluLayer::validate(&aclInput,
+                                               &aclAlpha,
+                                               &aclOutput);
+}
+
+NeonPreluWorkload::NeonPreluWorkload(const PreluQueueDescriptor& descriptor,
+                                     const WorkloadInfo& info)
+        : BaseWorkload<PreluQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("NeonPreluWorkload", 1, 1);
+
+    arm_compute::ITensor& input = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ITensor& alpha = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+    arm_compute::ITensor& output = boost::polymorphic_downcast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    auto layer = std::make_unique<arm_compute::NEPReluLayer>();
+    layer->configure(&input, &alpha, &output);
+
+    m_PreluLayer.reset(layer.release());
+}
+
+void NeonPreluWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPreluWorkload_Execute");
+    m_PreluLayer->run();
+}
+
+} //namespace armnn
\ No newline at end of file
diff --git a/src/backends/neon/workloads/NeonPreluWorkload.hpp b/src/backends/neon/workloads/NeonPreluWorkload.hpp
new file mode 100644
index 0000000..a0d46c2
--- /dev/null
+++ b/src/backends/neon/workloads/NeonPreluWorkload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+#include <arm_compute/core/Error.h>
+#include <arm_compute/runtime/IFunction.h>
+
+namespace armnn
+{
+
+arm_compute::Status NeonPreluWorkloadValidate(const TensorInfo& input,
+                                              const TensorInfo& alpha,
+                                              const TensorInfo& output);
+
+class NeonPreluWorkload : public BaseWorkload<PreluQueueDescriptor>
+{
+public:
+    NeonPreluWorkload(const PreluQueueDescriptor& descriptor, const WorkloadInfo& info);
+    void Execute() const override;
+
+private:
+    std::unique_ptr<arm_compute::IFunction> m_PreluLayer;
+};
+
+} //namespace armnn
diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp
index ecd50c9..ebea879 100644
--- a/src/backends/neon/workloads/NeonWorkloads.hpp
+++ b/src/backends/neon/workloads/NeonWorkloads.hpp
@@ -27,6 +27,7 @@
 #include "NeonPadWorkload.hpp"
 #include "NeonPermuteWorkload.hpp"
 #include "NeonPooling2dWorkload.hpp"
+#include "NeonPreluWorkload.hpp"
 #include "NeonQuantizeWorkload.hpp"
 #include "NeonReshapeWorkload.hpp"
 #include "NeonResizeBilinearWorkload.hpp"