Make CLArithmeticAddition kernel and function state-less

Resolves COMPMID-4006

Change-Id: Iddc32b0b250142aac9a4a7b9dc0eef462d196025
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4913
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
diff --git a/src/runtime/CL/functions/CLElementwiseOperations.cpp b/src/runtime/CL/functions/CLElementwiseOperations.cpp
index a72e957..638990e 100644
--- a/src/runtime/CL/functions/CLElementwiseOperations.cpp
+++ b/src/runtime/CL/functions/CLElementwiseOperations.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,7 +25,9 @@
 
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "src/core/CL/kernels/CLElementwiseOperationKernel.h"
+#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
+
+#include "src/runtime/gpu/cl/operators/ClAdd.h"
 
 #include <utility>
 
@@ -33,34 +35,13 @@
 {
 namespace experimental
 {
-CLArithmeticAddition::CLArithmeticAddition()
-{
-}
-
-void CLArithmeticAddition::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
-{
-    auto k = std::make_unique<CLSaturatedArithmeticOperationKernel>();
-    k->configure(compile_context, ArithmeticOperation::ADD, input1, input2, output, policy, act_info);
-    _kernel = std::move(k);
-}
-
-Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
-{
-    return CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::ADD, input1, input2, output, policy, act_info);
-}
-
-void CLArithmeticAddition::run(ITensorPack &tensors)
-{
-    ICLOperator::run(tensors);
-}
-
 CLArithmeticSubtraction::CLArithmeticSubtraction()
 {
 }
 void CLArithmeticSubtraction::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy,
                                         const ActivationLayerInfo &act_info)
 {
-    auto k = std::make_unique<CLSaturatedArithmeticOperationKernel>();
+    auto k = std::make_unique<arm_compute::opencl::kernels::ClSaturatedArithmeticKernel>();
     k->configure(compile_context, ArithmeticOperation::SUB, input1, input2, output, policy, act_info);
     _kernel = std::move(k);
 }
@@ -68,7 +49,7 @@
 Status CLArithmeticSubtraction::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_UNUSED(policy);
-    return CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::SUB, input1, input2, output, policy, act_info);
+    return arm_compute::opencl::kernels::ClSaturatedArithmeticKernel::validate(ArithmeticOperation::SUB, input1, input2, output, policy, act_info);
 }
 
 void CLArithmeticSubtraction::run(ITensorPack &tensors)
@@ -82,14 +63,14 @@
 
 void CLArithmeticDivision::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info)
 {
-    auto k = std::make_unique<CLArithmeticOperationKernel>();
+    auto k = std::make_unique<arm_compute::opencl::kernels::ClArithmeticKernel>();
     k->configure(compile_context, ArithmeticOperation::DIV, input1, input2, output, act_info);
     _kernel = std::move(k);
 }
 
 Status CLArithmeticDivision::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
 {
-    return CLArithmeticOperationKernel::validate(ArithmeticOperation::DIV, input1, input2, output, act_info);
+    return arm_compute::opencl::kernels::ClArithmeticKernel::validate(ArithmeticOperation::DIV, input1, input2, output, act_info);
 }
 
 void CLArithmeticDivision::run(ITensorPack &tensors)
@@ -103,14 +84,14 @@
 
 void CLElementwiseMax::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info)
 {
-    auto k = std::make_unique<CLArithmeticOperationKernel>();
+    auto k = std::make_unique<arm_compute::opencl::kernels::ClArithmeticKernel>();
     k->configure(compile_context, ArithmeticOperation::MAX, input1, input2, output, act_info);
     _kernel = std::move(k);
 }
 
 Status CLElementwiseMax::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
 {
-    return CLArithmeticOperationKernel::validate(ArithmeticOperation::MAX, input1, input2, output, act_info);
+    return arm_compute::opencl::kernels::ClArithmeticKernel::validate(ArithmeticOperation::MAX, input1, input2, output, act_info);
 }
 
 void CLElementwiseMax::run(ITensorPack &tensors)
@@ -124,14 +105,14 @@
 
 void CLElementwiseMin::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info)
 {
-    auto k = std::make_unique<CLArithmeticOperationKernel>();
+    auto k = std::make_unique<arm_compute::opencl::kernels::ClArithmeticKernel>();
     k->configure(compile_context, ArithmeticOperation::MIN, input1, input2, output, act_info);
     _kernel = std::move(k);
 }
 
 Status CLElementwiseMin::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
 {
-    return CLArithmeticOperationKernel::validate(ArithmeticOperation::MIN, input1, input2, output, act_info);
+    return arm_compute::opencl::kernels::ClArithmeticKernel::validate(ArithmeticOperation::MIN, input1, input2, output, act_info);
 }
 
 void CLElementwiseMin::run(ITensorPack &tensors)
@@ -145,14 +126,14 @@
 
 void CLElementwiseSquaredDiff::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info)
 {
-    auto k = std::make_unique<CLArithmeticOperationKernel>();
+    auto k = std::make_unique<arm_compute::opencl::kernels::ClArithmeticKernel>();
     k->configure(compile_context, ArithmeticOperation::SQUARED_DIFF, input1, input2, output, act_info);
     _kernel = std::move(k);
 }
 
 Status CLElementwiseSquaredDiff::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
 {
-    return CLArithmeticOperationKernel::validate(ArithmeticOperation::SQUARED_DIFF, input1, input2, output, act_info);
+    return arm_compute::opencl::kernels::ClArithmeticKernel::validate(ArithmeticOperation::SQUARED_DIFF, input1, input2, output, act_info);
 }
 
 void CLElementwiseSquaredDiff::run(ITensorPack &tensors)
@@ -166,14 +147,14 @@
 
 void CLElementwisePower::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info)
 {
-    auto k = std::make_unique<CLArithmeticOperationKernel>();
+    auto k = std::make_unique<arm_compute::opencl::kernels::ClArithmeticKernel>();
     k->configure(compile_context, ArithmeticOperation::POWER, input1, input2, output, act_info);
     _kernel = std::move(k);
 }
 
 Status CLElementwisePower::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info)
 {
-    return CLArithmeticOperationKernel::validate(ArithmeticOperation::POWER, input1, input2, output, act_info);
+    return arm_compute::opencl::kernels::ClArithmeticKernel::validate(ArithmeticOperation::POWER, input1, input2, output, act_info);
 }
 
 void CLElementwisePower::run(ITensorPack &tensors)
@@ -181,13 +162,12 @@
     ICLOperator::run(tensors);
 }
 } // namespace experimental
-
 struct CLArithmeticAddition::Impl
 {
-    const ICLTensor                                    *src_0{ nullptr };
-    const ICLTensor                                    *src_1{ nullptr };
-    ICLTensor                                          *dst{ nullptr };
-    std::unique_ptr<experimental::CLArithmeticAddition> op{ nullptr };
+    const ICLTensor               *src_0{ nullptr };
+    const ICLTensor               *src_1{ nullptr };
+    ICLTensor                     *dst{ nullptr };
+    std::unique_ptr<opencl::ClAdd> op{ nullptr };
 };
 
 CLArithmeticAddition::CLArithmeticAddition()
@@ -209,13 +189,13 @@
     _impl->src_0 = input1;
     _impl->src_1 = input2;
     _impl->dst   = output;
-    _impl->op    = std::make_unique<experimental::CLArithmeticAddition>();
+    _impl->op    = std::make_unique<opencl::ClAdd>();
     _impl->op->configure(compile_context, input1->info(), input2->info(), output->info(), policy, act_info);
 }
 
 Status CLArithmeticAddition::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy, const ActivationLayerInfo &act_info)
 {
-    return experimental::CLArithmeticAddition::validate(input1, input2, output, policy, act_info);
+    return opencl::ClAdd::validate(input1, input2, output, policy, act_info);
 }
 
 void CLArithmeticAddition::run()
diff --git a/src/runtime/CL/functions/CLLogicalAnd.cpp b/src/runtime/CL/functions/CLLogicalAnd.cpp
index f1c5365..98c98ab 100644
--- a/src/runtime/CL/functions/CLLogicalAnd.cpp
+++ b/src/runtime/CL/functions/CLLogicalAnd.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,7 +23,7 @@
  */
 #include "arm_compute/runtime/CL/functions/CLLogicalAnd.h"
 #include "arm_compute/core/CL/ICLTensor.h"
-#include "src/core/CL/kernels/CLElementwiseOperationKernel.h"
+#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
 
 #include <utility>
 
@@ -33,14 +33,14 @@
 {
 void CLLogicalAnd::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output)
 {
-    auto k = std::make_unique<CLLogicalBinaryKernel>();
-    k->configure(compile_context, kernels::LogicalOperation::And, input1, input2, output);
+    auto k = std::make_unique<arm_compute::opencl::kernels::ClLogicalBinaryKernel>();
+    k->configure(compile_context, LogicalOperation::And, input1, input2, output);
     _kernel = std::move(k);
 }
 
 Status CLLogicalAnd::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
 {
-    return CLLogicalBinaryKernel::validate(kernels::LogicalOperation::And, input1, input2, output);
+    return arm_compute::opencl::kernels::ClLogicalBinaryKernel::validate(LogicalOperation::And, input1, input2, output);
 }
 
 void CLLogicalAnd::run(ITensorPack &tensors)
diff --git a/src/runtime/CL/functions/CLLogicalOr.cpp b/src/runtime/CL/functions/CLLogicalOr.cpp
index 8c6087e..897963a 100644
--- a/src/runtime/CL/functions/CLLogicalOr.cpp
+++ b/src/runtime/CL/functions/CLLogicalOr.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,7 +23,7 @@
  */
 #include "arm_compute/runtime/CL/functions/CLLogicalOr.h"
 #include "arm_compute/core/CL/ICLTensor.h"
-#include "src/core/CL/kernels/CLElementwiseOperationKernel.h"
+#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
 
 #include <utility>
 
@@ -33,14 +33,14 @@
 {
 void CLLogicalOr::configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output)
 {
-    auto k = std::make_unique<CLLogicalBinaryKernel>();
-    k->configure(compile_context, kernels::LogicalOperation::Or, input1, input2, output);
+    auto k = std::make_unique<arm_compute::opencl::kernels::ClLogicalBinaryKernel>();
+    k->configure(compile_context, LogicalOperation::Or, input1, input2, output);
     _kernel = std::move(k);
 }
 
 Status CLLogicalOr::validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output)
 {
-    return CLLogicalBinaryKernel::validate(kernels::LogicalOperation::Or, input1, input2, output);
+    return arm_compute::opencl::kernels::ClLogicalBinaryKernel::validate(LogicalOperation::Or, input1, input2, output);
 }
 
 void CLLogicalOr::run(ITensorPack &tensors)
diff --git a/src/runtime/CL/functions/CLPReluLayer.cpp b/src/runtime/CL/functions/CLPReluLayer.cpp
index 876b5de..74286d4 100644
--- a/src/runtime/CL/functions/CLPReluLayer.cpp
+++ b/src/runtime/CL/functions/CLPReluLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "src/core/CL/kernels/CLElementwiseOperationKernel.h"
+#include "src/core/gpu/cl/kernels/ClElementwiseKernel.h"
 
 #include "arm_compute/core/CL/ICLTensor.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
@@ -37,14 +37,14 @@
 
 void CLPReluLayer::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output)
 {
-    auto k = std::make_unique<CLArithmeticOperationKernel>();
+    auto k = std::make_unique<arm_compute::opencl::kernels::ClArithmeticKernel>();
     k->configure(compile_context, ArithmeticOperation::PRELU, input, alpha, output);
     _kernel = std::move(k);
 }
 
 Status CLPReluLayer::validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output)
 {
-    return CLArithmeticOperationKernel::validate(ArithmeticOperation::PRELU, input, alpha, output);
+    return arm_compute::opencl::kernels::ClArithmeticKernel::validate(ArithmeticOperation::PRELU, input, alpha, output);
 }
 
 void CLPReluLayer::run(ITensorPack &tensors)