Make memset/copy functions state-less

Port following functions:
- NECopy
- NEFill
- NEPermute
- NEReshapeLayer

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I75f3f837012abab79c7dde9a20a34f64f75571d8
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4800
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h
index a58ac9e..d5f22d7 100644
--- a/arm_compute/runtime/NEON/functions/NECopy.h
+++ b/arm_compute/runtime/NEON/functions/NECopy.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,30 +24,33 @@
 #ifndef ARM_COMPUTE_NECOPY_H
 #define ARM_COMPUTE_NECOPY_H
 
+#include "arm_compute/runtime/IFunction.h"
+
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
 class ITensorInfo;
 
-/** Basic function to run @ref NECopyKernel */
-class NECopy : public INESimpleFunctionNoBorder
+/** Basic function to run @ref CpuCopyKernel */
+class NECopy : public IFunction
 {
 public:
-    /** Constructor */
-    NECopy() = default;
+    /** Default Constructor */
+    NECopy();
+    /** Default Destructor */
+    ~NECopy();
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NECopy(const NECopy &) = delete;
+    /** Default move constructor */
+    NECopy(NECopy &&);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NECopy &operator=(const NECopy &) = delete;
-    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
-    NECopy(NECopy &&) = delete;
-    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
-    NECopy &operator=(NECopy &&) = delete;
-    /** Default destructor */
-    ~NECopy();
+    /** Default move assignment operator */
+    NECopy &operator=(NECopy &&);
     /** Initialise the function's source and destination.
      *
      * @param[in]  input  Source tensor. Data types supported: All
@@ -63,6 +66,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+
+    // Inherited methods overridden
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NECOPY_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h
index 14d690f..3162e26 100644
--- a/arm_compute/runtime/NEON/functions/NEFill.h
+++ b/arm_compute/runtime/NEON/functions/NEFill.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,24 +24,46 @@
 #ifndef ARM_COMPUTE_NEFILL_H
 #define ARM_COMPUTE_NEFILL_H
 
+#include "arm_compute/runtime/IFunction.h"
+
 #include "arm_compute/core/PixelValue.h"
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
 
-/** Basic function to run @ref NEMemsetKernel */
-class NEFill : public INESimpleFunctionNoBorder
+/** Basic function to run @ref CpuFillKernel */
+class NEFill : public IFunction
 {
 public:
+    /** Default Constructor */
+    NEFill();
+    /** Default Destructor */
+    ~NEFill();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFill(const NEFill &) = delete;
+    /** Default move constructor */
+    NEFill(NEFill &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFill &operator=(const NEFill &) = delete;
+    /** Default move assignment operator */
+    NEFill &operator=(NEFill &&);
     /** Initialize the function
      *
      * @param[in,out] tensor         Source tensor. Data types supported: All
      * @param[in]     constant_value Constant value to use to fill tensor.
      */
     void configure(ITensor *tensor, PixelValue constant_value);
+
+    // Inherited methods overridden
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_FILL_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
index 5b5bb5c..7973a6e 100644
--- a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,12 +32,12 @@
 {
 class ITensor;
 class ITensorInfo;
-class NEMemsetKernel;
+class NEFill;
 class NEMaxUnpoolingLayerKernel;
 
 /** Function to perform MaxUnpooling. This function calls the following NEON kernels:
  *
- * -# @ref NEMemsetKernel
+ * -# @ref NEFillKernel
  * -# @ref NEMaxUnpoolingLayerKernel
  */
 class NEMaxUnpoolingLayer : public IFunction
@@ -82,7 +82,7 @@
     void run() override;
 
 private:
-    std::unique_ptr<NEMemsetKernel>            _memset_kernel;
+    std::unique_ptr<NEFill>                    _fill_func;
     std::unique_ptr<NEMaxUnpoolingLayerKernel> _unpooling_layer_kernel;
 };
 }
diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h
index 3fdbb0d..ede9758 100644
--- a/arm_compute/runtime/NEON/functions/NEPadLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,6 +26,7 @@
 
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
 #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
 #include "arm_compute/runtime/SubTensor.h"
 
@@ -35,7 +36,6 @@
 
 namespace arm_compute
 {
-class NECopyKernel;
 class NEPadLayerKernel;
 
 /** Basic function to pad a tensor. This function calls the following NEON functions/kernels:
@@ -43,7 +43,7 @@
  *  - For padding mode = PaddingMode::CONSTANT:
  *      -# @ref NEPadLayerKernel
  *  - Otherwise:
- *      -# @ref NECopyKernel
+ *      -# @ref NECopy
  *      -# @ref NEStridedSlice
  *      -# @ref NEConcatenateLayer
  *
@@ -109,7 +109,7 @@
     void configure_reflect_symmetric_mode(ITensor *input, ITensor *output);
 
 private:
-    std::unique_ptr<NECopyKernel>     _copy_kernel;
+    NECopy                            _copy_function;
     std::unique_ptr<NEPadLayerKernel> _pad_kernel;
     PaddingMode                       _mode;
     PaddingList                       _padding;
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index ef8854b..998a1d6 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,20 +24,34 @@
 #ifndef ARM_COMPUTE_NEPERMUTE_H
 #define ARM_COMPUTE_NEPERMUTE_H
 
-#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
+#include "arm_compute/runtime/IFunction.h"
 
 #include "arm_compute/core/Types.h"
 
+#include <memory>
+
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
 class ITensorInfo;
 
-/** Basic function to run @ref NEPermuteKernel */
-class NEPermute : public INESimpleFunctionNoBorder
+/** Basic function to run @ref CpuPermuteKernel */
+class NEPermute : public IFunction
 {
 public:
+    /** Default Constructor */
+    NEPermute();
+    /** Default Destructor */
+    ~NEPermute();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEPermute(const NEPermute &) = delete;
+    /** Default move constructor */
+    NEPermute(NEPermute &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEPermute &operator=(const NEPermute &) = delete;
+    /** Default move assignment operator */
+    NEPermute &operator=(NEPermute &&);
     /** Configure the permute NEON kernel
      *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4
@@ -58,6 +72,13 @@
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm);
+
+    // Inherited methods overridden
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEPERMUTE_H */
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index c42b303..66f7f2e 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,6 +27,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
 #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
 
@@ -34,7 +35,6 @@
 {
 // Forward declarations
 class ITensor;
-class NECopyKernel;
 
 /** Basic function to run @ref NERNNLayer */
 class NERNNLayer : public IFunction
@@ -83,16 +83,16 @@
     void prepare() override;
 
 private:
-    MemoryGroup                   _memory_group;
-    NEGEMM                        _gemm_state_f;
-    NEArithmeticAddition          _add_f;
-    NEActivationLayer             _activation;
-    NEFullyConnectedLayer         _fully_connected;
-    std::unique_ptr<NECopyKernel> _copy_kernel;
-    Tensor                        _fully_connected_out;
-    Tensor                        _gemm_output;
-    Tensor                        _add_output;
-    bool                          _is_prepared;
+    MemoryGroup           _memory_group;
+    NEGEMM                _gemm_state_f;
+    NEArithmeticAddition  _add_f;
+    NEActivationLayer     _activation;
+    NEFullyConnectedLayer _fully_connected;
+    NECopy                _copy_f;
+    Tensor                _fully_connected_out;
+    Tensor                _gemm_output;
+    Tensor                _add_output;
+    bool                  _is_prepared;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NERNNLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index 641a96e..b4c3af1 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -73,41 +73,5 @@
     struct Impl;
     std::unique_ptr<Impl> _impl;
 };
-
-namespace experimental
-{
-/** Basic function to run @ref NEReshapeLayerKernel */
-class NEReshape : public INEOperator
-{
-public:
-    /** Default Constructor */
-    NEReshape() = default;
-    /** Default Destructor */
-    ~NEReshape();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEReshape(const NEReshape &) = delete;
-    /** Default move constructor */
-    NEReshape(NEReshapeLayer &&);
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEReshape &operator=(const NEReshape &) = delete;
-    /** Default move assignment operator */
-    NEReshape &operator=(NEReshape &&);
-    /** Initialise the kernel's inputs and outputs
-     *
-     * @param[in]  input  Input tensor info. Data type supported: All
-     * @param[out] output Output info. Data type supported: Same as @p input
-     */
-    void configure(const ITensorInfo *input, ITensorInfo *output);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayer
-     *
-     * @param[in] input  Input tensor info. Data type supported: All
-     * @param[in] output Output tensor info. Data type supported: Same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-};
-} // namespace experimental
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NERESHAPELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
index 62af092..3a6f8d7 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,7 +34,7 @@
 class ITensor;
 class ITensorInfo;
 class NESpaceToBatchLayerKernel;
-class NEMemsetKernel;
+class NEFill;
 
 /** Basic function to spatial divide a tensor. This function calls the following NEON kernels/functions:
  *
@@ -102,7 +102,7 @@
 
 private:
     std::unique_ptr<NESpaceToBatchLayerKernel> _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
-    std::unique_ptr<NEMemsetKernel>            _memset_kernel;         /**< Memset kernel to run */
+    std::unique_ptr<NEFill>                    _fill_f;                /**< Fill function to run */
     bool                                       _has_padding;           /**< Flag to check if the output has padding */
 };
 } // namespace arm_compute