Rename functions/classes for elementwise unary operations

* Create CpuElementwiseUnary operator
* Rename kernel classes
* Make the kernels stateless

Partially implements: COMPMID-4003

Change-Id: Ie0440cd01d4924847d6991b4df7ccaf311439297
Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4912
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/Android.bp b/Android.bp
index 6984bbe..8a596cd 100644
--- a/Android.bp
+++ b/Android.bp
@@ -245,7 +245,6 @@
         "src/core/NEON/kernels/NEDilateKernel.cpp",
         "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp",
         "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.cpp",
-        "src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp",
         "src/core/NEON/kernels/NEErodeKernel.cpp",
         "src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp",
         "src/core/NEON/kernels/NEFFTRadixStageKernel.cpp",
@@ -409,6 +408,7 @@
         "src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp",
         "src/core/cpu/kernels/CpuCopyKernel.cpp",
         "src/core/cpu/kernels/CpuElementwiseKernel.cpp",
+        "src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp",
         "src/core/cpu/kernels/CpuFillKernel.cpp",
         "src/core/cpu/kernels/CpuFloorKernel.cpp",
         "src/core/cpu/kernels/CpuPermuteKernel.cpp",
@@ -788,6 +788,7 @@
         "src/runtime/cpu/operators/CpuConcatenate.cpp",
         "src/runtime/cpu/operators/CpuCopy.cpp",
         "src/runtime/cpu/operators/CpuElementwise.cpp",
+        "src/runtime/cpu/operators/CpuElementwiseUnary.cpp",
         "src/runtime/cpu/operators/CpuFill.cpp",
         "src/runtime/cpu/operators/CpuFloor.cpp",
         "src/runtime/cpu/operators/CpuPermute.cpp",
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
index 46a7316..e79e869 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,151 +25,63 @@
 #define ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H
 
 #include "arm_compute/core/Error.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "src/core/common/Macros.h"
+
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
 class ITensorInfo;
-
-/** Basic function to perform inverse square root on an input tensor. */
-class NERsqrtLayer : public INESimpleFunctionNoBorder
+/** Basic function to perform unary elementwise operations */
+template <ElementWiseUnary op>
+class NEElementwiseUnaryLayer : public IFunction
 {
 public:
+    /** Default Constructor */
+    NEElementwiseUnaryLayer();
+    /** Default Destructor */
+    ~NEElementwiseUnaryLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseUnaryLayer(const NEElementwiseUnaryLayer &) = delete;
+    /** Default move constructor */
+    NEElementwiseUnaryLayer(NEElementwiseUnaryLayer &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseUnaryLayer &operator=(const NEElementwiseUnaryLayer &) = delete;
+    /** Default move assignment operator */
+    NEElementwiseUnaryLayer &operator=(NEElementwiseUnaryLayer &&);
+
     /** Initialize the function
      *
-     * @param[in]  input  Input tensor. Data types supported: F16/F32.
-     * @param[out] output Output tensor. Data types supported: same as @p input.
+     * @param[in]  input  Input tensor. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
+     * @param[out] output Output tensor. Data types supported: Same as @p input.
      */
     void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NERsqrtLayer
+    /** Static function to check if given info will lead to a valid configuration
      *
-     * @param[in] input  First tensor input info. Data types supported: F16/F32.
+     * @param[in] input  Input tensor info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
      * @param[in] output Output tensor info. Data types supported: Same as @p input.
      *
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 
-/** Basic function to perform exponential on an input tensor. */
-class NEExpLayer : public INESimpleFunctionNoBorder
-{
-public:
-    /** Initialize the function
-     *
-     * @param[in]  input  Input tensor. Data types supported: F16/F32.
-     * @param[out] output Output tensor. Data types supported: same as @p input.
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEExpLayer
-     *
-     * @param[in] input  First tensor input info. Data types supported: F16/F32.
-     * @param[in] output Output tensor info. Data types supported: Same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
+using NERsqrtLayer = NEElementwiseUnaryLayer<ElementWiseUnary::RSQRT>;
+using NEExpLayer   = NEElementwiseUnaryLayer<ElementWiseUnary::EXP>;
+using NENegLayer   = NEElementwiseUnaryLayer<ElementWiseUnary::NEG>;
+using NELogLayer   = NEElementwiseUnaryLayer<ElementWiseUnary::LOG>;
+using NEAbsLayer   = NEElementwiseUnaryLayer<ElementWiseUnary::ABS>;
+using NERoundLayer = NEElementwiseUnaryLayer<ElementWiseUnary::ROUND>;
+using NESinLayer   = NEElementwiseUnaryLayer<ElementWiseUnary::SIN>;
 
-/** Basic function to negate an input tensor. */
-class NENegLayer : public INESimpleFunctionNoBorder
-{
-public:
-    /** Initialize the function
-     *
-     * @param[in]  input  Input tensor. Data types supported: F16/F32/S32.
-     * @param[out] output Output tensor. Data types supported: same as @p input.
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NENegLayer
-     *
-     * @param[in] input  First tensor input info. Data types supported: F16/F32/S32.
-     * @param[in] output Output tensor info. Data types supported: Same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-
-/** Basic function to compute the natural logarithm of an input tensor. */
-class NELogLayer : public INESimpleFunctionNoBorder
-{
-public:
-    /** Initialize the function
-     *
-     * @param[in]  input  Input tensor. Data types supported: F16/F32.
-     * @param[out] output Output tensor. Data types supported: same as @p input.
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NELogLayer
-     *
-     * @param[in] input  First tensor input info. Data types supported: F16/F32.
-     * @param[in] output Output tensor info. Data types supported: Same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-
-/** Basic function to compute the absolute value of an input tensor. */
-class NEAbsLayer : public INESimpleFunctionNoBorder
-{
-public:
-    /** Initialize the function
-     *
-     * @param[in]  input  Input tensor. Data types supported: F16/F32/S32.
-     * @param[out] output Output tensor. Data types supported: same as @p input.
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEAbsLayer
-     *
-     * @param[in] input  First tensor input info. Data types supported: F16/F32/S32.
-     * @param[in] output Output tensor info. Data types supported: Same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-
-/** Basic function to compute the round value elementwise of an input tensor. */
-class NERoundLayer : public INESimpleFunctionNoBorder
-{
-public:
-    /** Initialize the function
-     *
-     * @param[in]  input  Input tensor. Data types supported: F16/F32.
-     * @param[out] output Output tensor. Data types supported: same as @p input.
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NERoundLayer
-     *
-     * @param[in] input  First tensor input info. Data types supported: F16/F32.
-     * @param[in] output Output tensor info. Data types supported: Same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-
-/** Basic function to compute the sine of an input tensor. */
-class NESinLayer : public INESimpleFunctionNoBorder
-{
-public:
-    /** Initialize the function
-     *
-     * @param[in]  input  Input tensor. Data types supported: F16/F32.
-     * @param[out] output Output tensor. Data types supported: same as @p input.
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NESinLayer
-     *
-     * @param[in] input  First tensor input info. Data types supported: F16/F32.
-     * @param[in] output Output tensor info. Data types supported: Same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H */
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index f2dddbe..3c89cfd 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -98,7 +98,7 @@
    - @ref NEBatchNormalizationLayerKernel
    - @ref NELogits1DSoftmaxKernel
    - @ref NELogits1DMaxKernel
-   - @ref NEElementwiseUnaryKernel
+   - NEElementwiseUnaryKernel
  - Remove functions:
    - NELocallyConnectedLayer / CLLocallyConnectedLayer
    - NEIm2Col
@@ -830,7 +830,7 @@
     - @ref NESlice
     - @ref NEUnstack
     - @ref NEStridedSliceKernel / @ref NEStridedSlice
-    - @ref NEElementwiseUnaryKernel
+    - NEElementwiseUnaryKernel
     - @ref NERsqrtLayer
     - @ref NEExpLayer
     - @ref NEReverseKernel / @ref NEReverse
diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h
index c009a6d..87eec38 100644
--- a/src/core/NEON/NEKernels.h
+++ b/src/core/NEON/NEKernels.h
@@ -54,7 +54,6 @@
 #include "src/core/NEON/kernels/NEDilateKernel.h"
 #include "src/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
 #include "src/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h"
 #include "src/core/NEON/kernels/NEErodeKernel.h"
 #include "src/core/NEON/kernels/NEFFTDigitReverseKernel.h"
 #include "src/core/NEON/kernels/NEFFTRadixStageKernel.h"
diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
similarity index 70%
rename from src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp
rename to src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
index fdd2aab..d2681bb 100644
--- a/src/core/NEON/kernels/NEElementwiseUnaryKernel.cpp
+++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
@@ -21,31 +21,35 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h"
+#include "src/core/cpu/kernels/CpuElementwiseUnaryKernel.h"
 
 #include "arm_compute/core/Error.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensor.h"
 #include "arm_compute/core/Validate.h"
 #include "src/core/CPP/Validate.h"
-#include "src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h"
-#include "src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h"
 #include "src/core/common/Registrars.h"
+#include "src/core/cpu/kernels/elementwise/neon/elementwise_unary_list.h"
+#include "src/core/cpu/kernels/elementwise/sve/elementwise_unary_list.h"
 #include "src/core/helpers/AutoConfiguration.h"
 #include "src/core/helpers/WindowHelpers.h"
 #include "support/ToolchainSupport.h"
 
 namespace arm_compute
 {
+namespace cpu
+{
+namespace kernels
+{
 namespace
 {
 using ElementwiseUnarySelector = std::add_pointer<bool(DataType)>::type;
 
 struct ElementwiseUnaryKernel
 {
-    const char                                          *name;
-    const ElementwiseUnarySelector                       is_selected;
-    NEElementwiseUnaryKernel::ElementwiseUnaryUkernelPtr ukernel;
+    const char                                           *name;
+    const ElementwiseUnarySelector                        is_selected;
+    CpuElementwiseUnaryKernel::ElementwiseUnaryUkernelPtr ukernel;
 };
 
 static const ElementwiseUnaryKernel available_kernels[] =
@@ -99,41 +103,35 @@
 }
 } // namespace
 
-NEElementwiseUnaryKernel::NEElementwiseUnaryKernel()
-    : _func(nullptr), _input(nullptr), _output(nullptr), _op()
+CpuElementwiseUnaryKernel::CpuElementwiseUnaryKernel()
+    : _op()
 {
 }
 
-void NEElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensor *input, ITensor *output)
+void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &input, ITensorInfo &output)
 {
-    ARM_COMPUTE_ERROR_THROW_ON(validate(op, input->info(), output->info()));
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_ERROR_THROW_ON(validate(op, input, output));
 
     // Configure kernel window
-    const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*input->info());
+    const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input);
     const TensorShape &out_shape    = broadcast_pair.first;
     const ValidRegion &valid_region = broadcast_pair.second;
 
     // Auto initialize output if not initialized
-    auto_init_if_empty(*output->info(), out_shape, 1, input->info()->data_type());
+    auto_init_if_empty(output, out_shape, 1, input.data_type());
 
     Window win = calculate_max_window(valid_region);
 
-    _input  = input;
-    _output = output;
-    _op     = op;
+    _op = op;
 
-    INEKernel::configure(win);
-
-    _func = get_implementation(input->info()->data_type())->ukernel;
+    ICpuKernel::configure(win);
 }
 
-Status NEElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output)
+Status CpuElementwiseUnaryKernel::validate(ElementWiseUnary op, const ITensorInfo &input, const ITensorInfo &output)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+    ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input);
 
-    const auto *uk = get_implementation(input->data_type());
+    const auto *uk = get_implementation(input.data_type());
     ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
 
     switch(op)
@@ -143,30 +141,36 @@
         case ElementWiseUnary::LOG:
         case ElementWiseUnary::ROUND:
         case ElementWiseUnary::SIN:
-            ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+            ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32);
             break;
         case ElementWiseUnary::NEG:
         case ElementWiseUnary::ABS:
-            ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::S32);
+            ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32, DataType::S32);
             break;
         default:
             ARM_COMPUTE_ERROR("ElementWiseUnary operation not supported");
     }
     // Validate in case of configured output
-    if(output->total_size() > 0)
+    if(output.total_size() > 0)
     {
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &output);
     }
 
     return Status{};
 }
 
-void NEElementwiseUnaryKernel::run(const Window &window, const ThreadInfo &info)
+void CpuElementwiseUnaryKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info)
 {
     ARM_COMPUTE_UNUSED(info);
     ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-    ARM_COMPUTE_ERROR_ON(_func == nullptr);
-    (*_func)(_input, _output, window, _op);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICpuKernel::window(), window);
+
+    auto src  = tensors.get_const_tensor(TensorType::ACL_SRC);
+    auto dst  = tensors.get_tensor(TensorType::ACL_DST);
+    auto func = get_implementation(src->info()->data_type())->ukernel;
+    ARM_COMPUTE_ERROR_ON(func == nullptr);
+    func(src, dst, window, _op);
 }
+} // namespace kernels
+} // namespace cpu
 } // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEElementwiseUnaryKernel.h b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h
similarity index 64%
rename from src/core/NEON/kernels/NEElementwiseUnaryKernel.h
rename to src/core/cpu/kernels/CpuElementwiseUnaryKernel.h
index b248e82..193f6f1 100644
--- a/src/core/NEON/kernels/NEElementwiseUnaryKernel.h
+++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h
@@ -21,51 +21,48 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H
-#define ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H
+#ifndef ARM_COMPUTE_CPU_ELEMENTWISE_UNARY_KERNEL_H
+#define ARM_COMPUTE_CPU_ELEMENTWISE_UNARY_KERNEL_H
 
 #include "arm_compute/core/Types.h"
-#include "src/core/NEON/INEKernel.h"
+#include "src/core/common/Macros.h"
+#include "src/core/cpu/ICpuKernel.h"
 
 namespace arm_compute
 {
 class ITensor;
-
+namespace cpu
+{
+namespace kernels
+{
 /** Interface for an element-wise unary operation kernel
  *
  * Element-wise operation is computed by:
  * @f[ output(x) = OP(input(x))@f]
  *
  */
-class NEElementwiseUnaryKernel : public INEKernel
+class CpuElementwiseUnaryKernel : public ICpuKernel
 {
 public:
     const char *name() const override
     {
-        return "NEElementwiseUnaryKernel";
+        return "CpuElementwiseUnaryKernel";
     }
     /** Default constructor */
-    NEElementwiseUnaryKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEElementwiseUnaryKernel(const NEElementwiseUnaryKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEElementwiseUnaryKernel &operator=(const NEElementwiseUnaryKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEElementwiseUnaryKernel(NEElementwiseUnaryKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEElementwiseUnaryKernel &operator=(NEElementwiseUnaryKernel &&) = default;
+    CpuElementwiseUnaryKernel();
     /** Default destructor */
-    ~NEElementwiseUnaryKernel() = default;
+    ~CpuElementwiseUnaryKernel() = default;
+    ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuElementwiseUnaryKernel);
 
-    /** Function to configure the @ref NEElementwiseUnaryKernel
+    /** Function to configure the @ref CpuElementwiseUnaryKernel
      *
      * @param[in]  op     Arithmetic operation to be executed.
      * @param[in]  input  First tensor input. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
      * @param[out] output Output tensor. Data types supported: Same as @p input.
      */
-    void configure(ElementWiseUnary op, const ITensor *input, ITensor *output);
+    void configure(ElementWiseUnary op, const ITensorInfo &input, ITensorInfo &output);
 
-    /** Static function to check if given info will lead to a valid configuration of @ref NEElementwiseUnaryKernel
+    /** Static function to check if given info will lead to a valid configuration of @ref CpuElementwiseUnaryKernel
      *
      * @param[in] op     Arithmetic operation to be executed.
      * @param[in] input  First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
@@ -73,10 +70,10 @@
      *
      * @return a Status
      */
-    static Status validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output);
+    static Status validate(ElementWiseUnary op, const ITensorInfo &input, const ITensorInfo &output);
 
     // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
+    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
 
     /** Common signature for all the specialised elementwise unary micro-kernels
      *
@@ -85,10 +82,9 @@
     using ElementwiseUnaryUkernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &, ElementWiseUnary)>::type;
 
 private:
-    ElementwiseUnaryUkernelPtr _func;
-    const ITensor             *_input;
-    ITensor                   *_output;
-    ElementWiseUnary           _op;
+    ElementWiseUnary _op;
 };
+} // namespace kernels
+} // namespace cpu
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H */
+#endif /* ARM_COMPUTE_CPU_ELEMENTWISE_UNARY_KERNEL_H */
diff --git a/src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h b/src/core/cpu/kernels/elementwise/neon/elementwise_unary_list.h
similarity index 100%
rename from src/core/NEON/kernels/elementwise/impl/elementwise_unary_list.h
rename to src/core/cpu/kernels/elementwise/neon/elementwise_unary_list.h
diff --git a/src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h b/src/core/cpu/kernels/elementwise/sve/elementwise_unary_list.h
similarity index 100%
rename from src/core/SVE/kernels/elementwise/impl/elementwise_unary_list.h
rename to src/core/cpu/kernels/elementwise/sve/elementwise_unary_list.h
diff --git a/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp b/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp
index 5c779f1..1a9e883 100644
--- a/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp
+++ b/src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,88 +22,63 @@
  * SOFTWARE.
  */
 #include "arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h"
-
-#include "src/core/NEON/kernels/NEElementwiseUnaryKernel.h"
-
+#include "src/runtime/cpu/operators/CpuElementwiseUnary.h"
 #include <utility>
 
 namespace arm_compute
 {
-void NERsqrtLayer::configure(const ITensor *input, ITensor *output)
+using OperatorType = cpu::CpuElementwiseUnary;
+
+template <ElementWiseUnary op>
+struct NEElementwiseUnaryLayer<op>::Impl
 {
-    auto k = std::make_unique<NEElementwiseUnaryKernel>();
-    k->configure(ElementWiseUnary::RSQRT, input, output);
-    _kernel = std::move(k);
+    const ITensor                *src{ nullptr };
+    ITensor                      *dst{ nullptr };
+    std::unique_ptr<OperatorType> cpu_op{ nullptr };
+};
+
+template <ElementWiseUnary op>
+NEElementwiseUnaryLayer<op>::NEElementwiseUnaryLayer()
+    : _impl(std::make_unique<Impl>())
+{
 }
-Status NERsqrtLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
+template <ElementWiseUnary op>
+NEElementwiseUnaryLayer<op>::~NEElementwiseUnaryLayer() = default;
+template <ElementWiseUnary op>
+NEElementwiseUnaryLayer<op>::NEElementwiseUnaryLayer(NEElementwiseUnaryLayer &&) = default;
+template <ElementWiseUnary   op>
+NEElementwiseUnaryLayer<op> &NEElementwiseUnaryLayer<op>::operator=(NEElementwiseUnaryLayer &&) = default;
+
+template <ElementWiseUnary op>
+void NEElementwiseUnaryLayer<op>::configure(const ITensor *input, ITensor *output)
 {
-    return NEElementwiseUnaryKernel::validate(ElementWiseUnary::RSQRT, input, output);
+    _impl->src    = input;
+    _impl->dst    = output;
+    _impl->cpu_op = std::make_unique<OperatorType>();
+    _impl->cpu_op->configure(op, *_impl->src->info(), *_impl->dst->info());
 }
 
-void NEExpLayer::configure(const ITensor *input, ITensor *output)
+template <ElementWiseUnary op>
+Status NEElementwiseUnaryLayer<op>::validate(const ITensorInfo *input, const ITensorInfo *output)
 {
-    auto k = std::make_unique<NEElementwiseUnaryKernel>();
-    k->configure(ElementWiseUnary::EXP, input, output);
-    _kernel = std::move(k);
-}
-Status NEExpLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
-    return NEElementwiseUnaryKernel::validate(ElementWiseUnary::EXP, input, output);
+    return OperatorType::validate(op, *input, *output);
 }
 
-void NENegLayer::configure(const ITensor *input, ITensor *output)
+template <ElementWiseUnary op>
+void                       NEElementwiseUnaryLayer<op>::run()
 {
-    auto k = std::make_unique<NEElementwiseUnaryKernel>();
-    k->configure(ElementWiseUnary::NEG, input, output);
-    _kernel = std::move(k);
-}
-Status NENegLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
-    return NEElementwiseUnaryKernel::validate(ElementWiseUnary::NEG, input, output);
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->cpu_op->run(pack);
 }
 
-void NELogLayer::configure(const ITensor *input, ITensor *output)
-{
-    auto k = std::make_unique<NEElementwiseUnaryKernel>();
-    k->configure(ElementWiseUnary::LOG, input, output);
-    _kernel = std::move(k);
-}
-Status NELogLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
-    return NEElementwiseUnaryKernel::validate(ElementWiseUnary::LOG, input, output);
-}
-
-void NEAbsLayer::configure(const ITensor *input, ITensor *output)
-{
-    auto k = std::make_unique<NEElementwiseUnaryKernel>();
-    k->configure(ElementWiseUnary::ABS, input, output);
-    _kernel = std::move(k);
-}
-Status NEAbsLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
-    return NEElementwiseUnaryKernel::validate(ElementWiseUnary::ABS, input, output);
-}
-
-void NERoundLayer::configure(const ITensor *input, ITensor *output)
-{
-    auto k = std::make_unique<NEElementwiseUnaryKernel>();
-    k->configure(ElementWiseUnary::ROUND, input, output);
-    _kernel = std::move(k);
-}
-Status NERoundLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
-    return NEElementwiseUnaryKernel::validate(ElementWiseUnary::ROUND, input, output);
-}
-
-void NESinLayer::configure(const ITensor *input, ITensor *output)
-{
-    auto k = std::make_unique<NEElementwiseUnaryKernel>();
-    k->configure(ElementWiseUnary::SIN, input, output);
-    _kernel = std::move(k);
-}
-Status NESinLayer::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
-    return NEElementwiseUnaryKernel::validate(ElementWiseUnary::SIN, input, output);
-}
+template class NEElementwiseUnaryLayer<ElementWiseUnary::RSQRT>;
+template class NEElementwiseUnaryLayer<ElementWiseUnary::EXP>;
+template class NEElementwiseUnaryLayer<ElementWiseUnary::NEG>;
+template class NEElementwiseUnaryLayer<ElementWiseUnary::LOG>;
+template class NEElementwiseUnaryLayer<ElementWiseUnary::ABS>;
+template class NEElementwiseUnaryLayer<ElementWiseUnary::ROUND>;
+template class NEElementwiseUnaryLayer<ElementWiseUnary::SIN>;
 
 } // namespace arm_compute
diff --git a/src/runtime/cpu/operators/CpuElementwiseUnary.cpp b/src/runtime/cpu/operators/CpuElementwiseUnary.cpp
new file mode 100644
index 0000000..d1b1700
--- /dev/null
+++ b/src/runtime/cpu/operators/CpuElementwiseUnary.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/cpu/operators/CpuElementwiseUnary.h"
+#include "src/core/cpu/kernels/CpuElementwiseUnaryKernel.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+using KernelType = kernels::CpuElementwiseUnaryKernel;
+
+void CpuElementwiseUnary::configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst)
+{
+    auto k = std::make_unique<KernelType>();
+    k->configure(op, src, dst);
+    _kernel = std::move(k);
+}
+
+Status CpuElementwiseUnary::validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst)
+{
+    return KernelType::validate(op, src, dst);
+}
+} // namespace cpu
+} // namespace arm_compute
\ No newline at end of file
diff --git a/src/runtime/cpu/operators/CpuElementwiseUnary.h b/src/runtime/cpu/operators/CpuElementwiseUnary.h
new file mode 100644
index 0000000..0b2a9e7
--- /dev/null
+++ b/src/runtime/cpu/operators/CpuElementwiseUnary.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CPU_ELEMENTWISE_UNARY_H
+#define ARM_COMPUTE_CPU_ELEMENTWISE_UNARY_H
+
+#include "arm_compute/core/Types.h"
+#include "src/runtime/cpu/ICpuOperator.h"
+
+namespace arm_compute
+{
+namespace cpu
+{
+class CpuElementwiseUnary : public ICpuOperator
+{
+public:
+    /** Initialize the function
+     *
+     * @param[in]  op  Unary operation to execute
+     * @param[in]  src Input tensor information. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
+     * @param[out] dst Output tensor information. Data types supported: Same as @p src.
+     */
+    void configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst);
+    /** Static function to check if given info will lead to a valid configuration
+     *
+     * @param[in] op  Unary operation to execute
+     * @param[in] src First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
+     * @param[in] dst Output tensor info. Data types supported: Same as @p input.
+     *
+     * @return a status
+     */
+    static Status validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst);
+};
+
+} // namespace cpu
+} // namespace arm_compute
+
+#endif /* ARM_COMPUTE_CPU_ELEMENTWISE_UNARY_H */
\ No newline at end of file