Make memset/copy functions state-less

Port following functions:
- CLCopy
- CLFill
- CLPermute
- CLReshapeLayer
- CLCropResize

Resolves: COMPMID-4002

Signed-off-by: Sheri Zhang <sheri.zhang@arm.com>
Change-Id: I8392aa515aaeb5b44dab6122be6a795d08376d5f
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5003
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/Android.bp b/Android.bp
index 5653fc8..94a4e6c 100644
--- a/Android.bp
+++ b/Android.bp
@@ -95,8 +95,6 @@
         "src/core/CL/kernels/CLComparisonKernel.cpp",
         "src/core/CL/kernels/CLConvertFullyConnectedWeightsKernel.cpp",
         "src/core/CL/kernels/CLConvolutionKernel.cpp",
-        "src/core/CL/kernels/CLCopyKernel.cpp",
-        "src/core/CL/kernels/CLCropKernel.cpp",
         "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp",
         "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp",
         "src/core/CL/kernels/CLDepthConvertLayerKernel.cpp",
@@ -150,7 +148,6 @@
         "src/core/CL/kernels/CLMeanStdDevKernel.cpp",
         "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp",
         "src/core/CL/kernels/CLMedian3x3Kernel.cpp",
-        "src/core/CL/kernels/CLMemsetKernel.cpp",
         "src/core/CL/kernels/CLMinMaxLayerKernel.cpp",
         "src/core/CL/kernels/CLMinMaxLocationKernel.cpp",
         "src/core/CL/kernels/CLNonLinearFilterKernel.cpp",
@@ -158,7 +155,6 @@
         "src/core/CL/kernels/CLNormalizationLayerKernel.cpp",
         "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp",
         "src/core/CL/kernels/CLPadLayerKernel.cpp",
-        "src/core/CL/kernels/CLPermuteKernel.cpp",
         "src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp",
         "src/core/CL/kernels/CLPriorBoxLayerKernel.cpp",
         "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp",
@@ -169,7 +165,6 @@
         "src/core/CL/kernels/CLReductionOperationKernel.cpp",
         "src/core/CL/kernels/CLRemapKernel.cpp",
         "src/core/CL/kernels/CLReorgLayerKernel.cpp",
-        "src/core/CL/kernels/CLReshapeLayerKernel.cpp",
         "src/core/CL/kernels/CLReverseKernel.cpp",
         "src/core/CL/kernels/CLScaleKernel.cpp",
         "src/core/CL/kernels/CLScharr3x3Kernel.cpp",
@@ -437,12 +432,17 @@
         "src/core/cpu/kernels/sub/neon/qsymm16.cpp",
         "src/core/gpu/cl/kernels/ClActivationKernel.cpp",
         "src/core/gpu/cl/kernels/ClBatchConcatenateKernel.cpp",
+        "src/core/gpu/cl/kernels/ClCopyKernel.cpp",
+        "src/core/gpu/cl/kernels/ClCropKernel.cpp",
         "src/core/gpu/cl/kernels/ClDepthConcatenateKernel.cpp",
         "src/core/gpu/cl/kernels/ClElementwiseKernel.cpp",
         "src/core/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp",
+        "src/core/gpu/cl/kernels/ClFillKernel.cpp",
         "src/core/gpu/cl/kernels/ClFloorKernel.cpp",
         "src/core/gpu/cl/kernels/ClHeightConcatenateKernel.cpp",
+        "src/core/gpu/cl/kernels/ClPermuteKernel.cpp",
         "src/core/gpu/cl/kernels/ClPoolingKernel.cpp",
+        "src/core/gpu/cl/kernels/ClReshapeKernel.cpp",
         "src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp",
         "src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp",
         "src/core/gpu/cl/kernels/ClWidthConcatenateKernel.cpp",
@@ -505,6 +505,7 @@
         "src/runtime/CL/functions/CLConvolution.cpp",
         "src/runtime/CL/functions/CLConvolutionLayer.cpp",
         "src/runtime/CL/functions/CLCopy.cpp",
+        "src/runtime/CL/functions/CLCrop.cpp",
         "src/runtime/CL/functions/CLCropResize.cpp",
         "src/runtime/CL/functions/CLDeconvolutionLayer.cpp",
         "src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp",
@@ -802,11 +803,16 @@
         "src/runtime/gpu/cl/operators/ClActivation.cpp",
         "src/runtime/gpu/cl/operators/ClAdd.cpp",
         "src/runtime/gpu/cl/operators/ClConcatenate.cpp",
+        "src/runtime/gpu/cl/operators/ClCopy.cpp",
+        "src/runtime/gpu/cl/operators/ClCrop.cpp",
         "src/runtime/gpu/cl/operators/ClElementwiseOperations.cpp",
         "src/runtime/gpu/cl/operators/ClElementwiseUnary.cpp",
+        "src/runtime/gpu/cl/operators/ClFill.cpp",
         "src/runtime/gpu/cl/operators/ClFloor.cpp",
         "src/runtime/gpu/cl/operators/ClLogicalNot.cpp",
+        "src/runtime/gpu/cl/operators/ClPermute.cpp",
         "src/runtime/gpu/cl/operators/ClPooling.cpp",
+        "src/runtime/gpu/cl/operators/ClReshape.cpp",
         "src/runtime/gpu/cl/operators/ClSub.cpp",
         "utils/CommonGraphOptions.cpp",
         "utils/GraphUtils.cpp",