Make memset/copy functions state-less

Port following functions:
- CLCopy
- CLFill
- CLPermute
- CLReshapeLayer
- CLCropResize

Resolves: COMPMID-4002

Signed-off-by: Sheri Zhang <sheri.zhang@arm.com>
Change-Id: I8392aa515aaeb5b44dab6122be6a795d08376d5f
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5003
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
index 5c5e5be..dc02fa1 100644
--- a/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
+++ b/arm_compute/runtime/CL/functions/CLSpaceToBatchLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLFill.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include <memory>
@@ -33,14 +34,13 @@
 namespace arm_compute
 {
 class CLCompileContext;
-class CLMemsetKernel;
 class CLSpaceToBatchLayerKernel;
 class ICLTensor;
 class ITensorInfo;
 
 /** Basic function to spatial divide a tensor. This function calls the following OpenCL kernels/functions:
  *
- *  -# @ref CLMemsetKernel
+ *  -# @ref CLFill
  *  -# @ref CLSpaceToBatchLayerKernel
  */
 class CLSpaceToBatchLayer : public IFunction
@@ -125,7 +125,7 @@
 
 private:
     std::unique_ptr<CLSpaceToBatchLayerKernel> _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
-    std::unique_ptr<CLMemsetKernel>            _memset_kernel;         /**< Memset kernel to run */
+    CLFill                                     _fill;                  /**< Fill function to run */
     bool                                       _has_padding;           /**< Flag to check if the output has padding */
 };
 } // namespace arm_compute