Make CLArithmeticSubtraction kernel and function state-less

Resolves COMPMID-4008

Change-Id: Ic5f40610e771f31e6d301dfae976c81e9c79fa8b
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4917
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/src/core/gpu/cl/kernels/ClActivationKernel.h b/src/core/gpu/cl/kernels/ClActivationKernel.h
index 30adc55..68c309e 100644
--- a/src/core/gpu/cl/kernels/ClActivationKernel.h
+++ b/src/core/gpu/cl/kernels/ClActivationKernel.h
@@ -45,9 +45,9 @@
      * @note If the output tensor is a nullptr, the activation function will be performed in-place
      *
      * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] src             Source tensor. In case of @p dst tensor = nullptr, this tensor will store the result
+     * @param[in, out] src             Source tensor info. In case of @p dst tensor = nullptr, this tensor will store the result
      *                                 of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
-     * @param[out]     dst             Destination tensor. Data type supported: same as @p src
+     * @param[out]     dst             Destination tensor info. Data type supported: same as @p src
      * @param[in]      act_info        Activation layer information.
      */
     void configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, ActivationLayerInfo act_info);
diff --git a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h
index 378a08a..d9fa905 100644
--- a/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h
+++ b/src/core/gpu/cl/kernels/ClBatchConcatenateKernel.h
@@ -46,9 +46,9 @@
     /** Initialise the kernel's source and destination
      *
      * @param[in]     compile_context The compile context to be used.
-     * @param[in]     src             Source tensor. Data types supported: All.
+     * @param[in]     src             Source tensor info. Data types supported: All.
      * @param[in]     batch_offset    The offset on axis # 3.
-     * @param[in,out] dst             Destination tensor. Data types supported: Same as @p src.
+     * @param[in,out] dst             Destination tensor info. Data types supported: Same as @p src.
      *
      * @note: The dst tensor's low two dimensions can't be smaller than the src one's.
      * @note: The gaps between the two lowest dimensions of src and dst need to be divisible by 2.
diff --git a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h
index 144d7d4..5acfb33 100644
--- a/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h
+++ b/src/core/gpu/cl/kernels/ClDepthConcatenateKernel.h
@@ -46,9 +46,9 @@
     /** Initialise the kernel's source and destination
      *
      * @param[in]     compile_context The compile context to be used.
-     * @param[in]     src             Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
+     * @param[in]     src             Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in]     depth_offset    The offset on the Z axis.
-     * @param[in,out] dst             Destination tensor. Data types supported: Same as @p src.
+     * @param[in,out] dst             Destination tensor info. Data types supported: Same as @p src.
      *
      * @note: The dst tensor's low two dimensions can't be smaller than the src one's.
      * @note: The gaps between the two lowest dimensions of src and dst need to be divisible by 2.
diff --git a/src/core/gpu/cl/kernels/ClFloorKernel.h b/src/core/gpu/cl/kernels/ClFloorKernel.h
index 09ab801..646dfb3 100644
--- a/src/core/gpu/cl/kernels/ClFloorKernel.h
+++ b/src/core/gpu/cl/kernels/ClFloorKernel.h
@@ -43,8 +43,8 @@
     /** Configure kernel for a given list of arguments
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor. Data type supported: F16/F32.
-     * @param[out] dst             Destination tensor. Same as @p src
+     * @param[in]  src             Source tensor info. Data type supported: F16/F32.
+     * @param[out] dst             Destination tensor info. Same as @p src
      */
     void configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst);
 
diff --git a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h
index 88cd4c4..9a4380a 100644
--- a/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h
+++ b/src/core/gpu/cl/kernels/ClHeightConcatenateKernel.h
@@ -46,9 +46,9 @@
     /** Initialise the kernel's source and destination
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src             Source tensor. Data types supported: All.
+     * @param[in]  src             Source tensor info. Data types supported: All.
      * @param[in]  height_offset   The starting offset on the Y axis for the dst tensor.
-     * @param[out] dst             Destination tensor. Data types supported: same as @p src.
+     * @param[out] dst             Destination tensor info. Data types supported: same as @p src.
      *
      */
     void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst);
diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h
index 9271500..ddade29 100644
--- a/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.h
@@ -46,9 +46,9 @@
     /** Initialise the kernel's sources and destination
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src1            First source tensor. Data types supported: All.
-     * @param[in]  src2            Second source tensor. Data types supported: same as @p src1
-     * @param[out] dst             Destination tensor. Data types supported: Same as @p src1.
+     * @param[in]  src1            First source tensor info. Data types supported: All.
+     * @param[in]  src2            Second source tensor info. Data types supported: same as @p src1
+     * @param[out] dst             Destination tensor info. Data types supported: Same as @p src1.
      */
     void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst);
     /**  Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenate2TensorsKernel
diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h
index 06d6c03..19bda65 100644
--- a/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.h
@@ -47,11 +47,11 @@
     /** Initialise the kernel's sources and destination
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  src1            First source tensor. Data types supported: All.
-     * @param[in]  src2            Second source tensor. Data types supported: same as @p src1
-     * @param[in]  src3            Third source tensor. Data types supported: same as @p src1
-     * @param[in]  src4            Fourth source tensor. Data types supported: same as @p src1
-     * @param[out] dst             Destination tensor. Data types supported: same as @p src1.
+     * @param[in]  src1            First source tensor info. Data types supported: All.
+     * @param[in]  src2            Second source tensor info. Data types supported: same as @p src1
+     * @param[in]  src3            Third source tensor info. Data types supported: same as @p src1
+     * @param[in]  src4            Fourth source tensor info. Data types supported: same as @p src1
+     * @param[out] dst             Destination tensor info. Data types supported: same as @p src1.
      */
     void configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *src3, ITensorInfo *src4, ITensorInfo *dst);
     /**  Static function to check if given info will lead to a valid configuration of @ref ClWidthConcatenate4TensorsKernel
diff --git a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h
index 3bffe52..6bc8e57 100644
--- a/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h
+++ b/src/core/gpu/cl/kernels/ClWidthConcatenateKernel.h
@@ -46,9 +46,9 @@
     /** Initialise the kernel's source and destination
      *
      * @param[in]     compile_context The compile context to be used.
-     * @param[in]     src             Source tensor. Data types supported: All.
+     * @param[in]     src             Source tensor info. Data types supported: All.
      * @param[in]     width_offset    The offset on the X axis.
-     * @param[in,out] dst             Destination tensor. Data types supported: same as @p src.
+     * @param[in,out] dst             Destination tensor info. Data types supported: same as @p src.
      *
      */
     void configure(const CLCompileContext &compile_context, ITensorInfo *src, unsigned int width_offset, ITensorInfo *dst);