Make memset/copy functions state-less

Port following functions:
- NECopy
- NEFill
- NEPermute
- NEReshapeLayer

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I75f3f837012abab79c7dde9a20a34f64f75571d8
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4800
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/runtime/NEON/functions/NEPadLayer.cpp b/src/runtime/NEON/functions/NEPadLayer.cpp
index 88a73b8..531b06d 100644
--- a/src/runtime/NEON/functions/NEPadLayer.cpp
+++ b/src/runtime/NEON/functions/NEPadLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,7 +27,6 @@
 
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/NEON/kernels/NECopyKernel.h"
 #include "src/core/NEON/kernels/NEPadLayerKernel.h"
 #include "src/core/helpers/AutoConfiguration.h"
 
@@ -52,7 +51,7 @@
 NEPadLayer::~NEPadLayer() = default;
 
 NEPadLayer::NEPadLayer()
-    : _copy_kernel(), _pad_kernel(), _mode(), _padding(), _num_dimensions(0), _slice_functions(), _concat_functions(), _slice_results(), _concat_results()
+    : _copy_function(), _pad_kernel(), _mode(), _padding(), _num_dimensions(0), _slice_functions(), _concat_functions(), _slice_results(), _concat_results()
 {
 }
 
@@ -200,8 +199,7 @@
     else
     {
         // Copy the input to the whole output if no padding is applied
-        _copy_kernel = std::make_unique<NECopyKernel>();
-        _copy_kernel->configure(input, output);
+        _copy_function.configure(input, output);
     }
 }
 
@@ -286,7 +284,7 @@
     }
     else
     {
-        NEScheduler::get().schedule(_copy_kernel.get(), Window::DimY);
+        _copy_function.run();
     }
 }
 } // namespace arm_compute