COMPMID-2754: Add support for QASYMM8_SIGNED in NE kernels/functions.

Kernels/Functions extended support:
- NEBatchToSpaceLayerKernel/NEBatchToSpaceLayer
- NEChannelShuffleLayerKernel/NEChannelShuffleLayer
- NECol2ImKernel/NECol2Im
- NEConvertFullyConnectedWeightsKernel/NEConvertFullyConnectedWeights
- NECopyKernel/NECopy
- NEConvolutionLayerReshapeWeights
- NEDepthToSpaceLayerKernel/NEDepthToSpaceLayer
- NEFlattenLayerKernel/NEFlattenLayer
- NEFillBorderKernel
- NEFullyConnectedLayerReshapeWeights
- NEGatherKernel/NEGather
- NEGEMMInterleave4x4Kernel
- NEGEMMTranspose1xWKernel
- NEIm2ColKernel/NEIm2Col
- NEMemsetKernel
- NEPadLayerKernel/NEPadLayer
- NEPermuteKernel/NEPermute
- NEReverseKernel/NEReverse
- NEReorgLayerKernel/NEReorgLayer
- NEReshapeLayerKernel/NEReshapeLayer
- NESplit
- NESlice
- NEStridedSliceKernel/NEStridedSlice
- NESpaceToBatchLayerKernel/NESpaceToBatchLayer
- NESpaceToDepthLayerKernel/NESpaceToDepthLayerKernel
- NEStackLayerKernel/NEStackLayer
- NETileKernel/NETile
- NETransposeKernel/NETranspose
- NEWidthConcatenateLayerKernel/NEHeightConcatenateLayer
- NEHeightConcatenateLayerKernel/NEHeightConcatenateLayer
- NEDepthConcatenateLayerKernel/NEDepthConcatenateLayer
- NEBathConcatenateLayerKernel/NEBatchConcatenateLayer

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: Ia070332ad4c4dbced2541dc46f7f2f3a86833b65
Reviewed-on: https://review.mlplatform.org/c/2442
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
index f0a33a7..2a62530 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
@@ -40,14 +40,14 @@
 public:
     /** Set the input and output tensors.
      *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape 1-D tensor with shape [M]. Data types supported: S32
      * @param[out] output      Tensor output. Data types supported: same as @p input
      */
     void configure(const ITensor *input, const ITensor *block_shape, ITensor *output);
     /** Set the input and output tensors. (Static block shape).
      *
-     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape_x Block shape x value.
      * @param[in]  block_shape_y Block shape y value.
      * @param[out] output        Tensor output. Data types supported: same as @p input
@@ -55,7 +55,7 @@
     void configure(const ITensor *input, int32_t block_shape_x, int32_t block_shape_y, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer
      *
-     * @param[in]  input       Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape block shape tensor info with shape [M]. Data types supported: S32
      * @param[out] output      Tensor output info. Data types supported: same as @p input
      *
@@ -64,7 +64,7 @@
     static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
     /** Static function to check if given info will lead to a valid configuration of @ref CLBatchToSpaceLayer (Static block shape).
      *
-     * @param[in]  input         Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input         Tensor input info. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape_x Block shape x value.
      * @param[in]  block_shape_y Block shape y value.
      * @param[out] output        Tensor output info. Data types supported: same as @p input
diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
index ecc1822..716518a 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
@@ -42,14 +42,14 @@
 public:
     /** Initialize the function
      *
-     * @param[in]  input      Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input      Input tensor. Data types supported: All
      * @param[out] output     Output tensor. Data type supported: Same as @p input
      * @param[in]  num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
      */
     void configure(const ITensor *input, ITensor *output, unsigned int num_groups);
     /** Static function to check if given info will lead to a valid configuration of @ref NEChannelShuffleLayer
      *
-     * @param[in]  input      Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input      Input tensor. Data types supported: All
      * @param[out] output     Output tensor. Data type supported: Same as @p input
      * @param[in]  num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
      *
diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h
index 8987c82..5da0b91 100644
--- a/arm_compute/runtime/NEON/functions/NECol2Im.h
+++ b/arm_compute/runtime/NEON/functions/NECol2Im.h
@@ -39,7 +39,7 @@
 public:
     /** Configure the col2im NEON kernel
      *
-     * @param[in]  input          The input tensor to convert. Data types supported: Any
+     * @param[in]  input          The input tensor to convert. Data types supported: All
      * @param[out] output         The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
      *                            while the rest represent batch of outputs. Data types supported: Same as @p input
      * @param[in]  convolved_dims Output convolved dimensions.
@@ -47,7 +47,7 @@
     void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims);
     /** Static function to check if given info will lead to a valid configuration of @ref NECol2Im
      *
-     * @param[in] input          The input tensor to convert. Data types supported: Any
+     * @param[in] input          The input tensor to convert. Data types supported: All
      * @param[in] output         The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
      *                           while the rest represent batch of outputs. Data types supported: Same as @p input
      * @param[in] convolved_dims Output convolved dimensions.
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index 3591cfd..8207589 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -57,7 +57,7 @@
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
      * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
      *
-     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/F16/F32.
+     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[out]    output        Output tensor. Data types supported: Same as @p input.
      * @param[in]     axis          Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
      */
@@ -68,7 +68,7 @@
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
      * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
      *
-     * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/F16/F32.
+     * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in] output        Output tensor info. Data types supported: Same as @p input.
      * @param[in] axis          Concatenation axis. Supported underlying concatenation axis are 0, 1, 2 and 3.
      *
diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
index 37b6b6c..42f7870 100644
--- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
@@ -32,6 +32,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */
@@ -42,7 +43,7 @@
     NEConvertFullyConnectedWeights();
     /** Initialize the function.
      *
-     * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
      * @param[out] output               The converted weights tensor. Shape and Data Type: Same as @p input.
      * @param[in]  original_input_shape Shape of the original input tensor (the one entering fully connected layer).
      * @param[in]  data_layout          The data layout the weights have been trained in.
@@ -50,7 +51,7 @@
     void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
     /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeights
      *
-     * @param[in] input                Source weights tensor info to convert. Must be 2 dimensional. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in] input                Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
      * @param[in] output               The converted weights tensor info. Shape and Data Type: Same as @p input.
      * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
      * @param[in] data_layout          The data layout the weights have been trained in.
diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h
index 7ad89e2..b03f408 100644
--- a/arm_compute/runtime/NEON/functions/NECopy.h
+++ b/arm_compute/runtime/NEON/functions/NECopy.h
@@ -37,14 +37,14 @@
 public:
     /** Initialise the function's source and destination.
      *
-     * @param[in]  input  Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input  Source tensor. Data types supported: All
      * @param[out] output Output tensor. Data types supported: Same as @p input.
      *
      */
     void configure(ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NECopy
      *
-     * @param[in] input  Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input  Source tensor. Data types supported: All
      * @param[in] output Output tensor. Data types supported: Same as @p input.
      *
      * @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
index 56c2374..3c21d1a 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
@@ -32,6 +32,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to run @ref NEDepthToSpaceLayerKernel. */
@@ -40,14 +41,14 @@
 public:
     /** Set the input and output tensors.
      *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All
      * @param[out] output      Tensor output. Data types supported: same as @p input
      * @param[in]  block_shape Block shape value.
      */
     void configure(const ITensor *input, ITensor *output, int32_t block_shape);
     /** Static function to check if given info will lead to a valid configuration of @ref NEDepthToSpaceLayer.
      *
-     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All
      * @param[in] output      Tensor output info. Data types supported: same as @p input
      * @param[in] block_shape Block shape x value.
      *
diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h
index e5badcb..f8a1507 100644
--- a/arm_compute/runtime/NEON/functions/NEFill.h
+++ b/arm_compute/runtime/NEON/functions/NEFill.h
@@ -39,7 +39,7 @@
 public:
     /** Initialize the function
      *
-     * @param[in,out] tensor         Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in,out] tensor         Source tensor. Data types supported: All
      * @param[in]     constant_value Constant value to use to fill tensor.
      */
     void configure(ITensor *tensor, PixelValue constant_value);
diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h
index b8266d9..0ae04cb 100644
--- a/arm_compute/runtime/NEON/functions/NEFillBorder.h
+++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h
@@ -31,6 +31,7 @@
 
 namespace arm_compute
 {
+// Forward declaration
 class ITensor;
 
 /** Basic function to run @ref NEFillBorderKernel */
@@ -41,7 +42,7 @@
      *
      * @note This function fills the borders within the XY-planes.
      *
-     * @param[in, out] input                 Source tensor. Data type supported: U8/S16/S32/F32
+     * @param[in, out] input                 Source tensor. Data type supported: All
      * @param[in]      border_width          Width of the tensor border in pixels.
      * @param[in]      border_mode           Strategy to use for borders.
      * @param[in]      constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
@@ -54,5 +55,5 @@
 private:
     NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */
 };
-}
+} // namespace arm_compute
 #endif /*ARM_COMPUTE_NEFILLBORDER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
index db73f52..7b4801c 100644
--- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
@@ -37,7 +37,7 @@
 public:
     /** Initialise the kernel's input and output.
      *
-     * @param[in]  input  First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: F16/F32
+     * @param[in]  input  First input tensor to flatten with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data types supported: All
      * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
      *             w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
      */
@@ -46,7 +46,7 @@
     /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayer
      *
      * @param[in]  input  First input tensor to flatten with at least 3 dimensions.
-     *                    The dimensions above the third will be interpreted as batches. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     *                    The dimensions above the third will be interpreted as batches. Data types supported: All
      * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
      *                    w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
      *
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 5eabdcd..8150737 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -49,13 +49,13 @@
 public:
     /** Set the input and output tensors.
      *
-     * @param[in]  input  Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/F16/F32.
+     * @param[in]  input  Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[out] output Destination tensor. Data type supported: Same as @p input.
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayerReshapeWeights
      *
-     * @param[in] input  Weights tensor info. The weights must be 2 dimensional. Data types supported: QASYMM8/F16/F32.
+     * @param[in] input  Weights tensor info. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in] output Destination tensor info. Data type supported: Same as @p input.
      *
      * @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
index 8b66eb2..6a38490 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
@@ -28,6 +28,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels:
@@ -40,13 +41,13 @@
 public:
     /** Initialise the kernel's inputs, output
      *
-     * @param[in]  input  First input tensor. Data type supported: U8/S8/QASYMM8/QSYMM8_PER_CHANNEL/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  First input tensor. Data type supported: All
      * @param[out] output Output tensor. Data type supported: same as @p input
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xW
      *
-     * @param[in] input  First input tensor. Data type supported: U8/S8/QASYMM8/QSYMM8_PER_CHANNEL/U16/S16/F16/U32/S32/F32
+     * @param[in] input  First input tensor. Data type supported: All
      * @param[in] output Output tensor. Data type supported: same as @p input
      *
      * @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h
index 6b6072c..7ed45c0 100644
--- a/arm_compute/runtime/NEON/functions/NEGather.h
+++ b/arm_compute/runtime/NEON/functions/NEGather.h
@@ -30,6 +30,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to run @ref NEGatherKernel */
@@ -38,7 +39,7 @@
 public:
     /** Initialise the kernel's inputs and outputs
      *
-     * @param[in]  input   Source tensor. Supported tensor rank: up to 4. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in]  input   Source tensor. Supported tensor rank: up to 4. Data type supported: All
      * @param[in]  indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
      * @param[out] output  Destination tensor. Data type supported: Same as @p input
      * @param[in]  axis    (Optional) The axis in @p input to gather @p indices from. Defaults to 0
@@ -47,7 +48,7 @@
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel
      *
-     * @param[in] input   Source tensor info. Supported tensor rank: up to 4. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in] input   Source tensor info. Supported tensor rank: up to 4. Data type supported: All
      * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following types: U32/S32. Each value Must be in range [0, input.shape[@p axis])
      * @param[in] output  Destination tensor info. Data type supported: Same as @p input
      * @param[in] axis    (Optional) The axis in @p input to gather @p indices from. Defaults to 0
@@ -56,7 +57,6 @@
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis);
 };
-
 } // namespace arm_compute
 
 #endif /* ARM_COMPUTE_NEGATHER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h
index 475e1d6..ac1b267 100644
--- a/arm_compute/runtime/NEON/functions/NEIm2Col.h
+++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h
@@ -32,6 +32,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to run @ref NEIm2ColKernel */
@@ -43,7 +44,7 @@
     /** Configure the im2col NEON kernel
      *
      * @param[in]  input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
-     *                         while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
+     *                         while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
      *                         Note: QASYMM8 works only for has_bias = false
      * @param[out] output      The output tensor. Data types supported: Same as @p input
      * @param[in]  kernel_dims The kernel dimensions (width and height).
@@ -57,7 +58,7 @@
     /** Static function to check if given info will lead to a valid configuration of @ref NEIm2Col
      *
      * @param[in] input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
-     *                        while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32
+     *                        while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32
      *                        Note: QASYMM8 works only for has_bias = false
      * @param[in] output      The output tensor. Data types supported: Same as @p input
      * @param[in] kernel_dims The kernel dimensions (width and height).
diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h
index a177393..d3074e7 100644
--- a/arm_compute/runtime/NEON/functions/NEPadLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h
@@ -53,7 +53,7 @@
     NEPadLayer();
     /** Initialize the function
      *
-     * @param[in]  input          Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input          Source tensor. Data types supported: All.
      * @param[out] output         Output tensor. Data type supported: same as @p input
      * @param[in]  padding        The padding for each spatial dimension of the input tensor. The pair padding[i]
      *                            specifies the front and the end padding in the i-th dimension.
@@ -64,7 +64,7 @@
     void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
     /**  Static function to check if given info will lead to a valid configuration of @ref NEPadLayer.
      *
-     * @param[in] input          Source tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input          Source tensor info. Data types supported: All.
      * @param[in] output         Output tensor info. Data type supported: same as @p input
      * @param[in] padding        The padding for each spatial dimension of the input tensor. The pair padding[i]
      *                           specifies the front and the end padding in the i-th dimension.
@@ -82,7 +82,7 @@
 private:
     /** Configure kernels for when constant padding is used.
      *
-     * @param[in]  input          Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input          Source tensor. Data types supported: All.
      * @param[out] output         Output tensor. Data type supported: same as @p input
      * @param[in]  padding        The padding for each spatial dimension of the input tensor. The pair padding[i]
      *                            specifies the front and the end padding in the i-th dimension.
@@ -91,7 +91,7 @@
     void configure_constant_mode(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value);
     /** Configure functions for when reflect or symmetric padding is used.
      *
-     * @param[in]  input  Source tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input  Source tensor. Data types supported: All.
      * @param[out] output Output tensor. Data type supported: same as @p input
      */
     void configure_reflect_symmetric_mode(ITensor *input, ITensor *output);
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index 72e8642..4651b30 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -30,6 +30,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to run @ref NEPermuteKernel */
@@ -40,7 +41,7 @@
      *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4
      *
-     * @param[in]  input  The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  The input tensor to permute. Data types supported: All
      * @param[out] output The output tensor. Data types supported: Same as @p input
      * @param[in]  perm   Permutation vector
      */
@@ -49,7 +50,7 @@
      *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4
      *
-     * @param[in] input  The input tensor to permute. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input  The input tensor to permute. Data types supported: All
      * @param[in] output The output tensor. Data types supported: Same as @p input
      * @param[in] perm   Permutation vector
      *
diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
index 5f28821..8ef7f8a 100644
--- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
@@ -38,7 +38,7 @@
 public:
     /** Initialise the kernel's inputs and outputs
      *
-     * @param[in]  input  First tensor input. Data type supported: U8/S8/QASYMM8//U16/S16/U32/S32/F16/F32
+     * @param[in]  input  First tensor input. Data type supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      * @param[in]  stride Stride to be used during data re-organization
      *                    It defines the spatial distance between 2 consecutive pixels in the x and y direction
@@ -47,7 +47,7 @@
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEReorgLayer
      *
-     * @param[in] input  First tensor info. Data type supported: U8/S8/QASYMM8//U16/S16/U32/S32/F16/F32
+     * @param[in] input  First tensor info. Data type supported: All
      * @param[in] output Output tensor info. Data type supported: Same as @p input
      * @param[in] stride Stride to be used during data re-organization
      *                   It defines the spatial distance between 2 consecutive pixels in the x and y direction
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index c57387a..d664384 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -29,6 +29,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 
 /** Basic function to run @ref NEReshapeLayerKernel */
@@ -37,14 +38,14 @@
 public:
     /** Initialise the kernel's inputs and outputs
      *
-     * @param[in]  input  First tensor input. Data type supported: U8/S8/QASYMM8//U16/S16/U32/S32/F16/F32
+     * @param[in]  input  First tensor input. Data type supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
     void configure(const ITensor *input, ITensor *output);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayer
      *
-     * @param[in] input  First tensor info. Data type supported: U8/S8/QASYMM8//U16/S16/U32/S32/F16/F32
+     * @param[in] input  First tensor info. Data type supported: All
      * @param[in] output Output tensor info. Data type supported: Same as @p input
      *
      * @return a status
diff --git a/arm_compute/runtime/NEON/functions/NEReverse.h b/arm_compute/runtime/NEON/functions/NEReverse.h
index 6414cbd..ab5a5d0 100644
--- a/arm_compute/runtime/NEON/functions/NEReverse.h
+++ b/arm_compute/runtime/NEON/functions/NEReverse.h
@@ -38,14 +38,14 @@
 public:
     /** Initialize the function
      *
-     * @param[in]  input  Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  Input tensor. Data types supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      * @param[in]  axis   Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
      */
     void configure(const ITensor *input, ITensor *output, const ITensor *axis);
     /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel
      *
-     * @param[in] input  Input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input  Input tensor info. Data types supported: All
      * @param[in] output Output tensor info. Data type supported: Same as @p input
      * @param[in] axis   Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32
      *
diff --git a/arm_compute/runtime/NEON/functions/NESelect.h b/arm_compute/runtime/NEON/functions/NESelect.h
index 049267a..6ac3280 100644
--- a/arm_compute/runtime/NEON/functions/NESelect.h
+++ b/arm_compute/runtime/NEON/functions/NESelect.h
@@ -39,7 +39,7 @@
     /** Initialise the kernel's inputs and output.
      *
      * @param[in]  c      Condition input tensor. Data types supported: U8.
-     * @param[in]  x      First input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  x      First input tensor. Data types supported: All.
      * @param[in]  y      Second input tensor. Data types supported: Same as @p x
      * @param[out] output Output tensor. Data types supported: Same as @p x.
      */
@@ -47,7 +47,7 @@
     /** Static function to check if given info will lead to a valid configuration of @ref NESelect
      *
      * @param[in] c      Condition input tensor. Data types supported: U8.
-     * @param[in] x      First input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in] x      First input tensor. Data types supported: All.
      * @param[in] y      Second input tensor. Data types supported: Same as @p x
      * @param[in] output Output tensor. Data types supported: Same as @p x.
      *
diff --git a/arm_compute/runtime/NEON/functions/NESlice.h b/arm_compute/runtime/NEON/functions/NESlice.h
index 179c64a..834ec27 100644
--- a/arm_compute/runtime/NEON/functions/NESlice.h
+++ b/arm_compute/runtime/NEON/functions/NESlice.h
@@ -42,7 +42,7 @@
      * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
      * @note End indices are not inclusive unless negative.
      *
-     * @param[in]  input  Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in]  input  Source tensor. Data type supported: All
      * @param[out] output Destination tensor. Data type supported: Same as @p input
      * @param[in]  starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in]  ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
@@ -56,7 +56,7 @@
      * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
      * @note End indices are not inclusive unless negative.
      *
-     * @param[in] input  Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in] input  Source tensor info. Data type supported: All
      * @param[in] output Destination tensor info. Data type supported: Same as @p input
      * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in] ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
index c433f18..75fa50c 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
@@ -56,7 +56,7 @@
     virtual ~NESpaceToBatchLayer() = default;
     /** Set the input and output tensors.
      *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape 1-D tensor with shape [M]. Data types supported: S32
      * @param[in]  paddings    2-D tensor with shape [2, M]. Data types supported: S32
      * @param[out] output      Tensor output. Data types supported: same as @p input
@@ -64,7 +64,7 @@
     void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output);
     /** Set the input and output tensors. (Static block shape and paddings)
      *
-     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[in]  block_shape_x Block shape x value.
      * @param[in]  block_shape_y Block shape y value.
      * @param[in]  padding_left  The left padding of the output tensor.
@@ -74,7 +74,7 @@
     void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer
      *
-     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
      * @param[in] block_shape block shape tensor info with shape [M]. Data types supported: S32
      * @param[in] paddings    paddings tensor info with shape [2, M]. Data types supported: S32
      * @param[in] output      Tensor output info. Data types supported: same as @p input
@@ -84,7 +84,7 @@
     static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayer (Static block shape and paddings)
      *
-     * @param[in] input         Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input         Tensor input info. Supported tensor rank: 4. Data types supported: All.
      * @param[in] block_shape_x Block shape x value.
      * @param[in] block_shape_y Block shape y value.
      * @param[in] padding_left  The left padding of the output tensor.
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
index ac93b48..6a7a9c8 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
@@ -55,14 +55,14 @@
     virtual ~NESpaceToDepthLayer() = default;
     /** Set the input and output tensors.
      *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
      * @param[out] output      Tensor output. Data types supported: same as @p input
      * @param[in]  block_shape Block shape value
      */
     void configure(const ITensor *input, ITensor *output, int32_t block_shape);
     /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToDepthLayer (Static block shape and paddings)
      *
-     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
      * @param[in] output      Tensor output info. Data types supported: same as @p input
      * @param[in] block_shape Block shape value
      *
diff --git a/arm_compute/runtime/NEON/functions/NESplit.h b/arm_compute/runtime/NEON/functions/NESplit.h
index 26ff405..e4d6204 100644
--- a/arm_compute/runtime/NEON/functions/NESplit.h
+++ b/arm_compute/runtime/NEON/functions/NESplit.h
@@ -45,7 +45,7 @@
     NESplit();
     /** Initialise the kernel's input and outputs.
      *
-     * @param[in]  input   The input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in]  input   The input tensor. Data types supported: All
      * @param[out] outputs A vector containing the output tensors. Data types supported: Same as @p input.
      *                     The output tensors should match the input tensor dimensions for all shape dimensions apart
      *                     from the split dimension.
@@ -54,7 +54,7 @@
     void configure(const ITensor *input, const std::vector<ITensor *> &outputs, unsigned int axis);
     /** Static function to check if given info will lead to a valid configuration of @ref NESplit
      *
-     * @param[in] input   The input tensor info. Data types supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32.
+     * @param[in] input   The input tensor info. Data types supported: All
      * @param[in] outputs A vector containing the output tensors' info. Data types supported: Same as @p input.
      *                    The output tensors should match the input tensor dimensions for all shape dimensions apart
      *                    from the split dimension
diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h
index e839956..9288035 100644
--- a/arm_compute/runtime/NEON/functions/NEStackLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h
@@ -50,7 +50,7 @@
      *
      * @note Supported input tensor rank: up to 4
      *
-     * @param[in]  input  The vectors containing all the tensors with the same shape to stack. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  The vectors containing all the tensors with the same shape to stack. Data types supported: All
      * @param[in]  axis   The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
      *                    Negative values wrap around
      * @param[out] output Output tensor. Data types supported: Same as @p input.
@@ -60,7 +60,7 @@
      *
      * @note Supported input tensor rank: up to 4
      *
-     * @param[in] input  The vectors containing all the tensors info with the same shape to stack. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input  The vectors containing all the tensors info with the same shape to stack. Data types supported: All
      * @param[in] axis   The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
      *                   Negative values wrap around
      * @param[in] output Output tensor info. Data types supported: Same as @p input.
diff --git a/arm_compute/runtime/NEON/functions/NEStridedSlice.h b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
index d8c3918..6d5e639 100644
--- a/arm_compute/runtime/NEON/functions/NEStridedSlice.h
+++ b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
@@ -39,7 +39,7 @@
      *
      * @note Supported tensor rank: up to 4
      *
-     * @param[in]  input            Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in]  input            Source tensor. Data type supported: All
      * @param[out] output           Destination tensor. Data type supported: Same as @p input
      * @param[in]  starts           The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in]  ends             The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
@@ -57,7 +57,7 @@
      *
      * @note Supported tensor rank: up to 4
      *
-     * @param[in] input            Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/U32/S32/F16/F32
+     * @param[in] input            Source tensor info. Data type supported: All
      * @param[in] output           Destination tensor info. Data type supported: Same as @p input
      * @param[in] starts           The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in] ends             The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
diff --git a/arm_compute/runtime/NEON/functions/NETile.h b/arm_compute/runtime/NEON/functions/NETile.h
index c7a0896..14d4f22 100644
--- a/arm_compute/runtime/NEON/functions/NETile.h
+++ b/arm_compute/runtime/NEON/functions/NETile.h
@@ -38,14 +38,14 @@
 public:
     /** Set the source, destination of the kernel
      *
-     * @param[in]  input     Source tensor. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in]  input     Source tensor. Data type supported: All.
      * @param[out] output    Destination tensor. Same as @p input
      * @param[in]  multiples Contains the number of times the input tensor should be replicated on the given dimension.
      */
     void configure(const ITensor *input, ITensor *output, const Multiples &multiples);
     /** Static function to check if given info will lead to a valid configuration of @ref NETile
      *
-     * @param[in] input     Source tensor info. Data type supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32.
+     * @param[in] input     Source tensor info. Data type supported: All.
      * @param[in] output    Destination tensor info. Same as @p input
      * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
      *
diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h
index 6148a67..03c90e5 100644
--- a/arm_compute/runtime/NEON/functions/NETranspose.h
+++ b/arm_compute/runtime/NEON/functions/NETranspose.h
@@ -41,13 +41,13 @@
 public:
     /** Initialise the kernel's inputs and output
      *
-     * @param[in]  input  Input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in]  input  Input tensor. Data types supported: All
      * @param[out] output Output tensor. Data type supported: Same as @p input
      */
     void configure(const ITensor *input, ITensor *output);
     /** Static function to check if given info will lead to a valid configuration of @ref NETranspose
      *
-     * @param[in] input  The input tensor. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32
+     * @param[in] input  The input tensor. Data types supported: All
      * @param[in] output The output tensor. Data types supported: Same as @p input
      *
      * @return a status