Make memset/copy functions state-less

Port following functions:
- CLCopy
- CLFill
- CLPermute
- CLReshapeLayer
- CLCropResize

Resolves: COMPMID-4002

Signed-off-by: Sheri Zhang <sheri.zhang@arm.com>
Change-Id: I8392aa515aaeb5b44dab6122be6a795d08376d5f
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5003
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/runtime/CL/functions/CLCopy.cpp b/src/runtime/CL/functions/CLCopy.cpp
index c3e30ad..98916bf 100644
--- a/src/runtime/CL/functions/CLCopy.cpp
+++ b/src/runtime/CL/functions/CLCopy.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,30 +23,58 @@
  */
 #include "arm_compute/runtime/CL/functions/CLCopy.h"
 
+#include "arm_compute/core/CL/CLKernelLibrary.h"
 #include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
 #include "arm_compute/core/Validate.h"
-#include "src/core/CL/kernels/CLCopyKernel.h"
+#include "src/core/CL/ICLKernel.h"
+#include "src/runtime/gpu/cl/operators/ClCopy.h"
 
 #include <utility>
 
-using namespace arm_compute;
-
-void CLCopy::configure(ICLTensor *input, ICLTensor *output)
+namespace arm_compute
 {
-    configure(CLKernelLibrary::get().get_compile_context(), input, output);
+struct CLCopy::Impl
+{
+    const ICLTensor                *src{ nullptr };
+    ICLTensor                      *dst{ nullptr };
+    std::unique_ptr<opencl::ClCopy> op{ nullptr };
+};
+
+CLCopy::CLCopy()
+    : _impl(std::make_unique<Impl>())
+{
+}
+CLCopy::CLCopy(CLCopy &&) = default;
+CLCopy &CLCopy::operator=(CLCopy &&) = default;
+CLCopy::~CLCopy()                    = default;
+
+void CLCopy::configure(ICLTensor *input, ICLTensor *output, Window *dst_window)
+{
+    configure(CLKernelLibrary::get().get_compile_context(), input, output, dst_window);
 }
 
-void CLCopy::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output)
+void CLCopy::configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, Window *dst_window)
 {
-    auto k = std::make_unique<CLCopyKernel>();
-    k->configure(compile_context, input, output);
-    _kernel = std::move(k);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input);
+
+    _impl->src = input;
+    _impl->dst = output;
+
+    _impl->op = std::make_unique<opencl::ClCopy>();
+    _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info(), dst_window);
 }
 
-Status CLCopy::validate(const arm_compute::ITensorInfo *input, const arm_compute::ITensorInfo *output)
+Status CLCopy::validate(const ITensorInfo *input, const ITensorInfo *output, Window *dst_window)
 {
-    return CLCopyKernel::validate(input, output);
+    return opencl::ClCopy::validate(input, output, dst_window);
 }
+
+void CLCopy::run()
+{
+    ITensorPack pack;
+    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
+    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
+    _impl->op->run(pack);
+}
+} // namespace arm_compute