Create ClPRelu operator

Make the class that was in experimental namespace
as ClOperator to prepare porting to new interface.
The followings are added as a part of this change

Also, in-place computation is now correctly considered
to be aligned with the class description. Test cases
to test in-place computation are added.

Partially Implements: COMPMID-4184

Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Change-Id: I71c18ab47fe0370a2060d5303a58ff3650c0093f
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5201
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
diff --git a/Android.bp b/Android.bp
index a51b91b..93ce568 100644
--- a/Android.bp
+++ b/Android.bp
@@ -677,6 +677,7 @@
         "src/runtime/gpu/cl/operators/ClFill.cpp",
         "src/runtime/gpu/cl/operators/ClFloor.cpp",
         "src/runtime/gpu/cl/operators/ClLogicalNot.cpp",
+        "src/runtime/gpu/cl/operators/ClPRelu.cpp",
         "src/runtime/gpu/cl/operators/ClPermute.cpp",
         "src/runtime/gpu/cl/operators/ClPooling.cpp",
         "src/runtime/gpu/cl/operators/ClReshape.cpp",
diff --git a/arm_compute/runtime/CL/functions/CLPReluLayer.h b/arm_compute/runtime/CL/functions/CLPReluLayer.h
index 1751fda..7b66670 100644
--- a/arm_compute/runtime/CL/functions/CLPReluLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPReluLayer.h
@@ -32,43 +32,6 @@
 class CLCompileContext;
 class ICLTensor;
 class ITensorInfo;
-
-namespace experimental
-{
-/** Basic function to run @ref arm_compute::opencl::kernels::ClArithmeticKernel for PRELU
- *
- * @note The function implements an activation layer with the PRELU activation function.
- */
-class CLPReluLayer : public ICLOperator
-{
-public:
-    /** Default Constructor */
-    CLPReluLayer();
-    /** Set the input and output tensor.
-     *
-     * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in]  alpha           PRelu layer parameters. Data types supported: same of @p input.
-     * @param[out] output          Destination tensor. Data type supported: same as @p input
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref CLPReluLayer
-     *
-     * @param[in] input  Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] alpha  PRelu layer parameters. Data types supported: same of @p input.
-     * @param[in] output Destination tensor info. Data type supported: same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(ITensorPack &tensors) override;
-};
-} // namespace experimental
-
 /** Basic function to run @ref opencl::kernels::ClArithmeticKernel for PRELU
  *
  * @note The function implements an activation layer with the PRELU activation function.
diff --git a/src/runtime/CL/functions/CLPReluLayer.cpp b/src/runtime/CL/functions/CLPReluLayer.cpp
index 74286d4..bb7aff2 100644
--- a/src/runtime/CL/functions/CLPReluLayer.cpp
+++ b/src/runtime/CL/functions/CLPReluLayer.cpp
@@ -21,44 +21,22 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/CL/functions/CLPReluLayer.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "src/core/gpu/cl/IClKernel.h"
+#include "src/runtime/gpu/cl/operators/ClPRelu.h"
 
 namespace arm_compute
 {
-namespace experimental
-{
-CLPReluLayer::CLPReluLayer()
-{
-}
-
-void CLPReluLayer::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output)
-{
-    auto k = std::make_unique<arm_compute::opencl::kernels::ClArithmeticKernel>();
-    k->configure(compile_context, ArithmeticOperation::PRELU, input, alpha, output);
-    _kernel = std::move(k);
-}
-
-Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
-{
-    return arm_compute::opencl::kernels::ClArithmeticKernel::validate(ArithmeticOperation::PRELU, input, alpha, output);
-}
-
-void CLPReluLayer::run(ITensorPack &tensors)
-{
-    ICLOperator::run(tensors);
-}
-} // namespace experimental
+using OperatorType = opencl::ClPRelu;
 
 struct CLPReluLayer::Impl
 {
-    const ICLTensor                            *src_0{ nullptr };
-    const ICLTensor                            *src_1{ nullptr };
-    ICLTensor                                  *dst{ nullptr };
-    std::unique_ptr<experimental::CLPReluLayer> op{ nullptr };
+    const ICLTensor              *src_0{ nullptr };
+    const ICLTensor              *src_1{ nullptr };
+    ICLTensor                    *dst{ nullptr };
+    std::unique_ptr<OperatorType> op{ nullptr };
 };
 
 CLPReluLayer::CLPReluLayer()
@@ -79,13 +57,13 @@
     _impl->src_0 = input;
     _impl->src_1 = alpha;
     _impl->dst   = output;
-    _impl->op    = std::make_unique<experimental::CLPReluLayer>();
-    _impl->op->configure(compile_context, input->info(), alpha->info(), output->info());
+    _impl->op    = std::make_unique<OperatorType>();
+    _impl->op->configure(compile_context, input->info(), alpha->info(), (output == nullptr ? input->info() : output->info()));
 }
 
 Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
 {
-    return experimental::CLPReluLayer::validate(input, alpha, output);
+    return OperatorType::validate(input, alpha, output);
 }
 
 void CLPReluLayer::run()
diff --git a/src/runtime/gpu/cl/operators/ClPRelu.cpp b/src/runtime/gpu/cl/operators/ClPRelu.cpp
new file mode 100644
index 0000000..d1ce14c
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClPRelu.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/gpu/cl/operators/ClPRelu.h"
+#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+using KernelType = kernels::ClArithmeticKernel;
+void ClPRelu::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output)
+{
+    auto k = std::make_unique<KernelType>();
+    k->configure(compile_context, ArithmeticOperation::PRELU, input, alpha, (output == nullptr ? input : output));
+    _kernel = std::move(k);
+}
+
+Status ClPRelu::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
+{
+    return KernelType::validate(ArithmeticOperation::PRELU, input, alpha, (output == nullptr ? input : output));
+}
+
+void ClPRelu::run(ITensorPack &tensors)
+{
+    // Output tensor can be given as nullptr for in-place computation.
+    // In this case, get the input tensor and use it as the output tensor.
+    if(tensors.get_tensor(TensorType::ACL_DST) == nullptr)
+    {
+        auto src_tensor = const_cast<ITensor *>(tensors.get_const_tensor(TensorType::ACL_SRC_0));
+        ARM_COMPUTE_ERROR_ON_MSG(src_tensor == nullptr, "invalid source tensor is given for in-place computation");
+        tensors.add_tensor(TensorType::ACL_DST, src_tensor);
+    }
+    IClOperator::run(tensors);
+}
+} // namespace opencl
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/gpu/cl/operators/ClPRelu.h b/src/runtime/gpu/cl/operators/ClPRelu.h
new file mode 100644
index 0000000..70202ae
--- /dev/null
+++ b/src/runtime/gpu/cl/operators/ClPRelu.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL_PRELU_H
+#define ARM_COMPUTE_CL_PRELU_H
+
+#include "src/core/gpu/cl/ClCompileContext.h"
+#include "src/runtime/gpu/cl/IClOperator.h"
+
+namespace arm_compute
+{
+namespace opencl
+{
+/** Basic operator to run @ref arm_compute::opencl::kernels::ClArithmeticKernel for PRELU
+ *
+ * @note The operator implements an activation layer with the PRELU activation function.
+ */
+class ClPRelu : public IClOperator
+{
+public:
+    /** Default constructor */
+    ClPRelu() = default;
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  alpha           PRelu layer parameters. Data types supported: same of @p input.
+     * @param[out] output          Destination tensor. Data type supported: same as @p input
+     */
+    void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClArithmeticKernel for PRELU
+     *
+     * @param[in] input  Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in] alpha  PRelu layer parameters. Data types supported: same of @p input.
+     * @param[in] output Destination tensor info. Data type supported: same as @p input
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output);
+
+    // Inherited methods overridden:
+    void run(ITensorPack &tensors) override;
+};
+} // namespace opencl
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_CL_PRELU_H */
diff --git a/tests/Utils.h b/tests/Utils.h
index ab25603..2569c41 100644
--- a/tests/Utils.h
+++ b/tests/Utils.h
@@ -514,6 +514,21 @@
 
 /** Create and initialize a tensor of the given type.
  *
+ * @param[in] info Tensor information to be used to create the tensor
+ * @param[in] ctx  (Optional) Pointer to the runtime context.
+ *
+ * @return Initialized tensor of given type.
+ */
+template <typename T>
+inline T create_tensor(const TensorInfo &info, IRuntimeContext *ctx = nullptr)
+{
+    T tensor(ctx);
+    tensor.allocator()->init(info);
+    return tensor;
+}
+
+/** Create and initialize a tensor of the given type.
+ *
  * @param[in] shape             Tensor shape.
  * @param[in] data_type         Data type.
  * @param[in] num_channels      (Optional) Number of channels.
@@ -531,9 +546,8 @@
     TensorInfo info(shape, num_channels, data_type);
     info.set_quantization_info(quantization_info);
     info.set_data_layout(data_layout);
-    tensor.allocator()->init(info);
 
-    return tensor;
+    return create_tensor<T>(info, ctx);
 }
 
 /** Create and initialize a tensor of the given type.
@@ -549,10 +563,7 @@
 {
     TensorInfo info(shape, format);
 
-    T tensor(ctx);
-    tensor.allocator()->init(info);
-
-    return tensor;
+    return create_tensor<T>(info, ctx);
 }
 
 /** Create and initialize a multi-image of the given type.
diff --git a/tests/validation/CL/PReluLayer.cpp b/tests/validation/CL/PReluLayer.cpp
index 82436a9..82f3e4f 100644
--- a/tests/validation/CL/PReluLayer.cpp
+++ b/tests/validation/CL/PReluLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -93,6 +93,63 @@
 // clang-format on
 // *INDENT-ON*
 
+TEST_SUITE(InPlace)
+TEST_CASE(Validate, framework::DatasetMode::ALL)
+{
+    // PRelu operaotr should be able to take nullptr as output and do the in-place computation.
+    // Shape and data type are selected randomly since they shouldn't matter
+    const auto tensor_info = TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32);
+    const auto result      = arm_compute::CLPReluLayer::validate(&tensor_info, &tensor_info, nullptr);
+    ARM_COMPUTE_EXPECT(bool(result) == true, framework::LogLevel::ERRORS);
+}
+
+SimpleTensor<float> compute_float_reference(const TensorInfo &tensor_info)
+{
+    SimpleTensor<float> ref_src1{ tensor_info.tensor_shape(), tensor_info.data_type() };
+    SimpleTensor<float> ref_src2{ tensor_info.tensor_shape(), tensor_info.data_type() };
+    SimpleTensor<float> ref_dst{ tensor_info.tensor_shape(), tensor_info.data_type() };
+
+    library->fill_tensor_uniform(ref_src1, 0);
+    library->fill_tensor_uniform(ref_src2, 1);
+
+    return reference::arithmetic_operation<float>(ArithmeticOperation::PRELU, ref_src1, ref_src2, ref_dst);
+}
+
+void compute_float_target_in_place(CLTensor &src1, CLTensor &src2, bool use_nullptr_output)
+{
+    auto fn = arm_compute::CLPReluLayer{};
+    fn.configure(&src1, &src2, use_nullptr_output ? nullptr : &src1);
+
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+
+    library->fill_tensor_uniform(CLAccessor(src1), 0);
+    library->fill_tensor_uniform(CLAccessor(src2), 1);
+
+    fn.run();
+}
+
+TEST_CASE(ComputeWithNullPtr, framework::DatasetMode::ALL)
+{
+    const auto tensor_info = TensorInfo(TensorShape(33U, 13U, 2U), 1, DataType::F32);
+
+    auto src1 = create_tensor<CLTensor>(tensor_info);
+    auto src2 = create_tensor<CLTensor>(tensor_info);
+    compute_float_target_in_place(src1, src2, true);
+    validate(CLAccessor(src1), compute_float_reference(tensor_info));
+}
+
+TEST_CASE(ComputeWithSameTensor, framework::DatasetMode::ALL)
+{
+    const auto tensor_info = TensorInfo(TensorShape(33U, 13U, 2U), 1, DataType::F32);
+
+    auto src1 = create_tensor<CLTensor>(tensor_info);
+    auto src2 = create_tensor<CLTensor>(tensor_info);
+    compute_float_target_in_place(src1, src2, false);
+    validate(CLAccessor(src1), compute_float_reference(tensor_info));
+}
+TEST_SUITE_END() // InPlace
+
 template <typename T>
 using CLPReluLayerFixture = PReluLayerValidationFixture<CLTensor, CLAccessor, CLPReluLayer, T>;