COMPMID-3377: Async support to NEElementwiseUnaryLayerKernel kernels/functions

Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Change-Id: I208287b44ece051e95f891d43a691cb0ac6e56c5
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3419
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/core/CPP/ICPPKernel.h b/arm_compute/core/CPP/ICPPKernel.h
index 3ec5475..45c7b52 100644
--- a/arm_compute/core/CPP/ICPPKernel.h
+++ b/arm_compute/core/CPP/ICPPKernel.h
@@ -84,7 +84,7 @@
      * @param[in] window  Region on which to execute the kernel. (Must be a region of the window returned by window())
      * @param[in] info    Info about executing thread and CPU.
      */
-    virtual void run_op(const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs, const Window &window, const ThreadInfo &info)
+    virtual void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs, const Window &window, const ThreadInfo &info)
     {
         ARM_COMPUTE_UNUSED(inputs, outputs, window, info);
     }
diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
index 399afa6..7064e3d 100644
--- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
@@ -76,7 +76,7 @@
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
 
     // Inherited methods overridden:
-    void run_op(const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs,
+    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
                 const Window &window, const ThreadInfo &info) override;
 
 private:
diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
index 61c25e1..b109ddd 100644
--- a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
@@ -57,18 +57,19 @@
     /** Default destructor */
     ~NEElementwiseOperationKernel() = default;
 
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
     /** Common signature for all the specialised arithmetic functions
      *
-     * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
-     * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
-     * @param[in] output Output tensor. Data types supported: Dependent on subclass.
+     * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
+     * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+     * @param[in] output Output tensor info. Data types supported: Dependent on subclass.
      * @param[in] window Region on which to execute the kernel.
      */
     using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
 
+    // Inherited methods overridden:
+    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
+                const Window &window, const ThreadInfo &info) override;
+
 protected:
     /** Validate the argument passed to the kernel
      *
@@ -81,7 +82,7 @@
     /** Commmon configure function for element-wise operators with no additional options (e.g. Min, Max, SquaredDiff)
      *
      */
-    void configure_common(const ITensor *input1, const ITensor *input2, ITensor *output);
+    void configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
 
     /** Function to use for the particular tensor types passed to configure() */
     std::function<void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window)> _function;
@@ -100,11 +101,11 @@
     /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
      *
      * @param[in] op     Arithmetic operation to be executed.
-     * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
-     * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
-     * @param[in] output Output tensor. Data types supported: Same as @p input1.
+     * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
+     * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+     * @param[in] output Output tensor info. Data types supported: Same as @p input1.
      */
-    void configure(ArithmeticOperation op, const ITensor *input1, const ITensor *input2, ITensor *output);
+    void configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
      *
@@ -130,11 +131,11 @@
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
      *
-     * @param[in] input1 First tensor input. Data types supported: F16/F32.
-     * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
-     * @param[in] output Output tensor. Data types supported: Same as @p input1.
+     * @param[in] input1 First tensor input info. Data types supported: F16/F32.
+     * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+     * @param[in] output Output tensor info. Data types supported: Same as @p input1.
      */
-    void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
+    void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
      *
@@ -159,11 +160,11 @@
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
      *
-     * @param[in]  input1 First tensor input. Data types supported: F16/F32.
-     * @param[in]  input2 Second tensor input. Data types supported: Same as @p input1.
-     * @param[out] output Output tensor. Data types supported: Same as @p input1.
+     * @param[in]  input1 First tensor input info. Data types supported: F16/F32.
+     * @param[in]  input2 Second tensor input info. Data types supported: Same as @p input1.
+     * @param[out] output Output tensor info. Data types supported: Same as @p input1.
      */
-    void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
+    void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
      *
@@ -189,11 +190,11 @@
     /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
      *
      * @param[in] op     Comparison operation to be executed.
-     * @param[in] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
-     * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
-     * @param[in] output Output tensor. Data types supported: U16/U32.
+     * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+     * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+     * @param[in] output Output tensor info. Data types supported: U16/U32.
      */
-    void configure(ComparisonOperation op, const ITensor *input1, const ITensor *input2, ITensor *output);
+    void configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
      *
diff --git a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
index 7a4dce1..1ed3554 100644
--- a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
@@ -57,7 +57,7 @@
     static Status validate(const ITensorInfo *input, const ITensorInfo *output);
 
     // Inherited methods overridden:
-    void run_op(const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs,
+    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
                 const Window &window, const ThreadInfo &info) override;
 };
 } // namespace arm_compute
diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h
index 2b55918..62dd6ff 100644
--- a/arm_compute/core/experimental/Types.h
+++ b/arm_compute/core/experimental/Types.h
@@ -50,29 +50,9 @@
     ACL_INT_2   = 52
 };
 
-/** Input tensor aggregate */
-struct InputTensor
-{
-    InputTensor(TensorType type, const ITensor *tensor)
-        : type(type), tensor(tensor)
-    {
-    }
-
-    TensorType     type{ TensorType::ACL_UNKNOWN };
-    const ITensor *tensor{ nullptr };
-};
-/** Output tensor aggregate */
-struct OutputTensor
-{
-    OutputTensor(TensorType type, ITensor *tensor)
-        : type(type), tensor(tensor)
-    {
-    }
-
-    TensorType type{ TensorType::ACL_UNKNOWN };
-    ITensor   *tensor{ nullptr };
-};
-using OperatorTensor = OutputTensor;
+using InputTensorMap    = std::map<TensorType, const ITensor *>;
+using OutputTensorMap   = std::map<TensorType, ITensor *>;
+using OperatorTensorMap = OutputTensorMap;
 
 namespace experimental
 {
diff --git a/arm_compute/runtime/CPP/CPPScheduler.h b/arm_compute/runtime/CPP/CPPScheduler.h
index 2ccb094..2f7951e 100644
--- a/arm_compute/runtime/CPP/CPPScheduler.h
+++ b/arm_compute/runtime/CPP/CPPScheduler.h
@@ -77,7 +77,7 @@
      * @param[in] inputs  Vector that contains the input tensors.
      * @param[in] outputs Vector that contains the output tensors.
      */
-    void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs) override;
+    void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) override;
 
 protected:
     /** Will run the workloads in parallel using num_threads
@@ -87,7 +87,7 @@
     void run_workloads(std::vector<Workload> &workloads) override;
 
 private:
-    void schedule_common(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs);
+    void schedule_common(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs);
     struct Impl;
     std::unique_ptr<Impl> _impl;
 };
diff --git a/arm_compute/runtime/IOperator.h b/arm_compute/runtime/IOperator.h
index cf3c8b0..887bed4 100644
--- a/arm_compute/runtime/IOperator.h
+++ b/arm_compute/runtime/IOperator.h
@@ -46,7 +46,7 @@
      * @param[in] workspace Vector that contains the workspace tensors.
      *
      */
-    virtual void run(std::vector<InputTensor> inputs, std::vector<OutputTensor> outputs, std::vector<OperatorTensor> workspace) = 0;
+    virtual void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) = 0;
     /** Prepare the function for executing
      *
      * Any one off pre-processing step required by the function is handled here
@@ -55,7 +55,7 @@
      *
      * @note Prepare stage might not need all the function's buffers' backing memory to be available in order to execute
      */
-    virtual void prepare(std::vector<OperatorTensor> constants) = 0;
+    virtual void prepare(OperatorTensorMap constants) = 0;
 
     /** Return the memory requirements required by the workspace
      */
diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h
index 40da86f..29135f4 100644
--- a/arm_compute/runtime/IScheduler.h
+++ b/arm_compute/runtime/IScheduler.h
@@ -157,7 +157,7 @@
      * @param[in] inputs  Vector containing the input tensors.
      * @param[in] outputs Vector containing the output tensors.
      */
-    virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs) = 0;
+    virtual void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) = 0;
 
     /** Execute all the passed workloads
      *
diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h
index 2f6e180..2e8f8f3 100644
--- a/arm_compute/runtime/NEON/INEOperator.h
+++ b/arm_compute/runtime/NEON/INEOperator.h
@@ -54,8 +54,8 @@
     INEOperator &operator=(INEOperator &&) = default;
 
     // Inherited methods overridden:
-    void run(std::vector<InputTensor> inputs, std::vector<OutputTensor> outputs, std::vector<OperatorTensor> workspace) override final;
-    void prepare(std::vector<OperatorTensor> constants) override final;
+    void run(InputTensorMap inputs, OutputTensorMap outputs, OperatorTensorMap workspace) override final;
+    void prepare(OperatorTensorMap constants) override final;
 
 protected:
     std::unique_ptr<INEKernel> _kernel;
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
index cac105c..08f798e 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
@@ -25,7 +25,8 @@
 #define ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/NEON/INEOperator.h"
 
 namespace arm_compute
 {
@@ -36,9 +37,21 @@
  * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
  * @note The function performs a max operation between two tensors.
  */
-class NEElementwiseMax : public INESimpleFunction
+class NEElementwiseMax : public IFunction
 {
 public:
+    /** Default Constructor */
+    NEElementwiseMax();
+    /** Default Destructor */
+    ~NEElementwiseMax();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseMax(const NEElementwiseMax &) = delete;
+    /** Default move constructor */
+    NEElementwiseMax(NEElementwiseMax &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseMax &operator=(const NEElementwiseMax &) = delete;
+    /** Default move assignment operator */
+    NEElementwiseMax &operator=(NEElementwiseMax &&);
     /** Initialise the kernel's inputs, output and conversion policy.
      *
      * @param[in, out] input1   First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
@@ -57,6 +70,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 
 /** Basic function to run @ref NEArithmeticOperationKernel for min
@@ -64,9 +84,21 @@
  * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
  * @note The function performs a min operation between two tensors.
  */
-class NEElementwiseMin : public INESimpleFunction
+class NEElementwiseMin : public IFunction
 {
 public:
+    /** Default Constructor */
+    NEElementwiseMin();
+    /** Default Destructor */
+    ~NEElementwiseMin();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseMin(const NEElementwiseMin &) = delete;
+    /** Default move constructor */
+    NEElementwiseMin(NEElementwiseMin &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseMin &operator=(const NEElementwiseMin &) = delete;
+    /** Default move assignment operator */
+    NEElementwiseMin &operator=(NEElementwiseMin &&);
     /** Initialise the kernel's inputs, output and conversion policy.
      *
      * @param[in, out] input1   First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
@@ -85,6 +117,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 
 /** Basic function to run @ref NEArithmeticOperationKernel for squared difference
@@ -92,9 +131,21 @@
  * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
  * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2
  */
-class NEElementwiseSquaredDiff : public INESimpleFunction
+class NEElementwiseSquaredDiff : public IFunction
 {
 public:
+    /** Default Constructor */
+    NEElementwiseSquaredDiff();
+    /** Default Destructor */
+    ~NEElementwiseSquaredDiff();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseSquaredDiff(const NEElementwiseSquaredDiff &) = delete;
+    /** Default move constructor */
+    NEElementwiseSquaredDiff(NEElementwiseSquaredDiff &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseSquaredDiff &operator=(const NEElementwiseSquaredDiff &) = delete;
+    /** Default move assignment operator */
+    NEElementwiseSquaredDiff &operator=(NEElementwiseSquaredDiff &&);
     /** Initialise the kernel's inputs, output and conversion policy.
      *
      * @param[in, out] input1   First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
@@ -113,6 +164,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 
 /** Basic function to run @ref NEArithmeticOperationKernel for division
@@ -120,9 +178,21 @@
  * @note The tensor data type for the inputs must be F16/F32.
  * @note The function performs a squared different operation between two tensors (i.e., out[i] = in1[i] / in2[i])
  */
-class NEElementwiseDivision : public INESimpleFunction
+class NEElementwiseDivision : public IFunction
 {
 public:
+    /** Default Constructor */
+    NEElementwiseDivision();
+    /** Default Destructor */
+    ~NEElementwiseDivision();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseDivision(const NEElementwiseDivision &) = delete;
+    /** Default move constructor */
+    NEElementwiseDivision(NEElementwiseDivision &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseDivision &operator=(const NEElementwiseDivision &) = delete;
+    /** Default move assignment operator */
+    NEElementwiseDivision &operator=(NEElementwiseDivision &&);
     /** Initialise the kernel's inputs, output and conversion policy.
      *
      * @param[in, out] input1   First tensor input. Data types supported: F16/F32.
@@ -141,6 +211,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 
 /** Basic function to run @ref NEArithmeticOperationKernel for power
@@ -149,9 +226,21 @@
  * @note The function performs a elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i])
  * @note For an exponent that is a float, this function will only work with a positive base.
  */
-class NEElementwisePower : public INESimpleFunction
+class NEElementwisePower : public IFunction
 {
 public:
+    /** Default Constructor */
+    NEElementwisePower();
+    /** Default Destructor */
+    ~NEElementwisePower();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwisePower(const NEElementwisePower &) = delete;
+    /** Default move constructor */
+    NEElementwisePower(NEElementwisePower &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwisePower &operator=(const NEElementwisePower &) = delete;
+    /** Default move assignment operator */
+    NEElementwisePower &operator=(NEElementwisePower &&);
     /** Initialise the kernel's inputs, output and conversion policy.
      *
      * @param[in, out] input1   First tensor input. Data types supported: F16/F32.
@@ -170,6 +259,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 
 /** Basic function to run @ref NEComparisonOperationKernel.
@@ -177,9 +273,21 @@
  * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
  * @note The function performs a comparison operation between two tensors.
  */
-class NEElementwiseComparison : public INESimpleFunction
+class NEElementwiseComparison : public IFunction
 {
 public:
+    /** Default Constructor */
+    NEElementwiseComparison();
+    /** Default Destructor */
+    ~NEElementwiseComparison();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseComparison(const NEElementwiseComparison &) = delete;
+    /** Default move constructor */
+    NEElementwiseComparison(NEElementwiseComparison &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseComparison &operator=(const NEElementwiseComparison &) = delete;
+    /** Default move assignment operator */
+    NEElementwiseComparison &operator=(NEElementwiseComparison &&);
     /** Initialise the kernel's inputs, output and conversion policy.
      *
      * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
@@ -198,6 +306,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 
 /** Basic function to run @ref NEComparisonOperationKernel
@@ -206,9 +321,21 @@
  * @note The function performs a comparison operation between two tensors.
  */
 template <ComparisonOperation op>
-class NEElementwiseComparisonStatic : public INESimpleFunction
+class NEElementwiseComparisonStatic : public IFunction
 {
 public:
+    /** Default Constructor */
+    NEElementwiseComparisonStatic();
+    /** Default Destructor */
+    ~NEElementwiseComparisonStatic();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseComparisonStatic(const NEElementwiseComparisonStatic &) = delete;
+    /** Default move constructor */
+    NEElementwiseComparisonStatic(NEElementwiseComparisonStatic &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEElementwiseComparisonStatic &operator=(const NEElementwiseComparisonStatic &) = delete;
+    /** Default move assignment operator */
+    NEElementwiseComparisonStatic &operator=(NEElementwiseComparisonStatic &&);
     /** Initialise the kernel's inputs, output and conversion policy.
      *
      * @param[in, out] input1 First tensor input. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
@@ -225,6 +352,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 
 /** Basic function to run equal comparison. */
@@ -239,5 +373,238 @@
 using NELess = NEElementwiseComparisonStatic<ComparisonOperation::Less>;
 /** Basic function to run less-equal comparison. */
 using NELessEqual = NEElementwiseComparisonStatic<ComparisonOperation::LessEqual>;
+
+namespace experimental
+{
+/** Basic function to run @ref NEArithmeticOperationKernel for max
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @note The function performs a max operation between two tensors.
+ */
+class NEElementwiseMax : public INEOperator
+{
+public:
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in, out] input1   First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+     * @param[in, out] input2   Second tensor input info. Data types supported: Same as @p input1.
+     * @param[out]     output   Output tensor info. Data types supported: Same as @p input1.
+     * @param[in]      act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+     */
+    void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for max
+     *
+     * @param[in] input1   First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+     * @param[in] input2   Second tensor input info. Data types supported: Same as @p input1.
+     * @param[in] output   Output tensor info. Data types supported: Same as @p input1.
+     * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+    // Inherited methods overridden:
+    MemoryRequirements workspace() const override;
+};
+
+/** Basic function to run @ref NEArithmeticOperationKernel for min
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @note The function performs a min operation between two tensors.
+ */
+class NEElementwiseMin : public INEOperator
+{
+public:
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in, out] input1   First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+     * @param[in, out] input2   Second tensor input info. Data types supported: Same as @p input1.
+     * @param[out]     output   Output tensor info. Data types supported: Same as @p input1.
+     * @param[in]      act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+     */
+    void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for min
+     *
+     * @param[in] input1   First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+     * @param[in] input2   Second tensor input info. Data types supported: Same as @p input1.
+     * @param[in] output   Output tensor info. Data types supported: Same as @p input1.
+     * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+    // Inherited methods overridden:
+    MemoryRequirements workspace() const override;
+};
+
+/** Basic function to run @ref NEArithmeticOperationKernel for squared difference
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @note The function performs a squared different operation between two tensors (i.e., out[i] = (in1[i] - in2[i])^2
+ */
+class NEElementwiseSquaredDiff : public INEOperator
+{
+public:
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in, out] input1   First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+     * @param[in, out] input2   Second tensor input info. Data types supported: Same as @p input1.
+     * @param[out]     output   Output tensor info. Data types supported: Same as @p input1.
+     * @param[in]      act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+     */
+    void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for squared difference
+     *
+     * @param[in] input1   First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+     * @param[in] input2   Second tensor input info. Data types supported: Same as @p input1.
+     * @param[in] output   Output tensor info. Data types supported: Same as @p input1.
+     * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+    // Inherited methods overridden:
+    MemoryRequirements workspace() const override;
+};
+
+/** Basic function to run @ref NEArithmeticOperationKernel for division
+ *
+ * @note The tensor data type for the inputs must be F16/F32.
+ * @note The function performs a squared different operation between two tensors (i.e., out[i] = in1[i] / in2[i])
+ */
+class NEElementwiseDivision : public INEOperator
+{
+public:
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in, out] input1   First tensor input info. Data types supported: F16/F32.
+     * @param[in, out] input2   Second tensor input info. Data types supported: Same as @p input1.
+     * @param[out]     output   Output tensor info. Data types supported: Same as @p input1.
+     * @param[in]      act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+     */
+    void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for division
+     *
+     * @param[in] input1   First tensor input info. Data types supported: F16/F32.
+     * @param[in] input2   Second tensor input info. Data types supported: Same as @p input1.
+     * @param[in] output   Output tensor info. Data types supported: Same as @p input1.
+     * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+    // Inherited methods overridden:
+    MemoryRequirements workspace() const override;
+};
+
+/** Basic function to run @ref NEArithmeticOperationKernel for power
+ *
+ * @note The tensor data type for the inputs must be F16/F32.
+ * @note The function performs a elementwise power of in1 to in2 (i.e., out[i] = in1[i] ^ in2[i])
+ * @note For an exponent that is a float, this function will only work with a positive base.
+ */
+class NEElementwisePower : public INEOperator
+{
+public:
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in, out] input1   First tensor input info. Data types supported: F16/F32.
+     * @param[in, out] input2   Second tensor input info. Data types supported: Same as @p input1.
+     * @param[out]     output   Output tensor info. Data types supported: Same as @p input1.
+     * @param[in]      act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+     */
+    void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+    /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for power
+     *
+     * @param[in] input1   First tensor input info. Data types supported: F16/F32.
+     * @param[in] input2   Second tensor input info. Data types supported: Same as @p input1.
+     * @param[in] output   Output tensor info. Data types supported: Same as @p input1.
+     * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const ActivationLayerInfo &act_info = ActivationLayerInfo());
+
+    // Inherited methods overridden:
+    MemoryRequirements workspace() const override;
+};
+
+/** Basic function to run @ref NEComparisonOperationKernel.
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @note The function performs a comparison operation between two tensors.
+ */
+class NEElementwiseComparison : public INEOperator
+{
+public:
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+     * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
+     * @param[out]     output Output tensor info. Data types supported: U16/U32.
+     * @param[in]      op     Comparison Operation to be performed.
+     */
+    void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ComparisonOperation op);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
+     *
+     * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+     * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+     * @param[in] output Output tensor info. Data types supported: U16/U32.
+     * @param[in] op     Comparison Operation to be performed.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ComparisonOperation op);
+
+    // Inherited methods overridden:
+    MemoryRequirements workspace() const override;
+};
+
+/** Basic function to run @ref NEComparisonOperationKernel
+ *
+ * @note The tensor data type for the inputs must be QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+ * @note The function performs a comparison operation between two tensors.
+ */
+template <ComparisonOperation op>
+class NEElementwiseComparisonStatic : public INEOperator
+{
+public:
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in, out] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+     * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
+     * @param[out]     output Output tensor info. Data types supported: U16/U32.
+     */
+    void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
+     *
+     * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
+     * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
+     * @param[in] output Output tensor info. Data types supported: U16/U32.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+
+    // Inherited methods overridden:
+    MemoryRequirements workspace() const override;
+};
+
+/** Basic function to run equal comparison. */
+using NEEqual = NEElementwiseComparisonStatic<ComparisonOperation::Equal>;
+/** Basic function to run not equal comparison. */
+using NENotEqual = NEElementwiseComparisonStatic<ComparisonOperation::NotEqual>;
+/** Basic function to run greater comparison. */
+using NEGreater = NEElementwiseComparisonStatic<ComparisonOperation::Greater>;
+/** Basic function to run greater-equal comparison. */
+using NEGreaterEqual = NEElementwiseComparisonStatic<ComparisonOperation::GreaterEqual>;
+/** Basic function to run less comparison. */
+using NELess = NEElementwiseComparisonStatic<ComparisonOperation::Less>;
+/** Basic function to run less-equal comparison. */
+using NELessEqual = NEElementwiseComparisonStatic<ComparisonOperation::LessEqual>;
+} // namespace experimental
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPReluLayer.h b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
index 102a165..9229a84 100644
--- a/arm_compute/runtime/NEON/functions/NEPReluLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,21 +25,65 @@
 #define ARM_COMPUTE_NEPRELULAYER_H
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/NEON/INEOperator.h"
 
 namespace arm_compute
 {
 class ITensor;
 
+namespace experimental
+{
 /** Basic function to run @ref NEArithmeticOperationKernel for PRELU
  *
  * @note The function implements an activation layer with the PRELU activation function.
  */
-class NEPReluLayer : public INESimpleFunction
+class NEPReluLayer : public INEOperator
 {
 public:
     /** Set the input and output tensor.
      *
+     * @param[in]  input  Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]  alpha  Source alpha tensor info. Data types supported: same of @p input.
+     * @param[out] output Destination tensor info. Data type supported: same as @p input
+     */
+    void configure(const ITensorInfo *input, const ITensorInfo *alpha, ITensorInfo *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
+     *
+     * @param[in] input  Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in] alpha  Source alpha tensor info. Data types supported: same of @p input.
+     * @param[in] output Destination tensor info. Data type supported: same as @p input
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output);
+
+    // Inherited methods overridden:
+    MemoryRequirements workspace() const override;
+};
+} // namespace experimental
+
+/** Basic function to run @ref NEArithmeticOperationKernel for PRELU
+ *
+ * @note The function implements an activation layer with the PRELU activation function.
+ */
+class NEPReluLayer : public IFunction
+{
+public:
+    /** Default Constructor */
+    NEPReluLayer();
+    /** Default Destructor */
+    ~NEPReluLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEPReluLayer(const NEPReluLayer &) = delete;
+    /** Default move constructor */
+    NEPReluLayer(NEPReluLayer &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEPReluLayer &operator=(const NEPReluLayer &) = delete;
+    /** Default move assignment operator */
+    NEPReluLayer &operator=(NEPReluLayer &&);
+    /** Set the input and output tensor.
+     *
      * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in]  alpha  Source alpha tensor. Data types supported: same of @p input.
      * @param[out] output Destination tensor. Data type supported: same as @p input
@@ -54,6 +98,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *alpha, const ITensorInfo *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEPRELULAYER_H */
diff --git a/arm_compute/runtime/OMP/OMPScheduler.h b/arm_compute/runtime/OMP/OMPScheduler.h
index b7c186a..1742e95 100644
--- a/arm_compute/runtime/OMP/OMPScheduler.h
+++ b/arm_compute/runtime/OMP/OMPScheduler.h
@@ -66,7 +66,7 @@
      * @param[in] inputs  Vector containing the input tensors.
      * @param[in] outputs Vector containing the output tensors.
      */
-    void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs) override;
+    void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) override;
 
 protected:
     /** Execute all the passed workloads
diff --git a/arm_compute/runtime/SingleThreadScheduler.h b/arm_compute/runtime/SingleThreadScheduler.h
index 8a69a5b..0d576b9 100644
--- a/arm_compute/runtime/SingleThreadScheduler.h
+++ b/arm_compute/runtime/SingleThreadScheduler.h
@@ -57,7 +57,7 @@
      * @param[in] inputs  Vector containing the input tensors.
      * @param[in] outputs Vector containing the output tensors.
      */
-    void schedule_op(ICPPKernel *kernel, const Hints &hints, const std::vector<InputTensor> &inputs, const std::vector<OutputTensor> &outputs) override;
+    void schedule_op(ICPPKernel *kernel, const Hints &hints, const InputTensorMap &inputs, const OutputTensorMap &outputs) override;
 
 protected:
     /** Will run the workloads sequentially and in order.