Fix trademarks throughout the codebase

Resolves: COMPMID-4299

Change-Id: Ie6a52c1371b9a2a7b5bb4f019ecd5e70a2008567
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5338
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
index 8235185..cbf1d5b 100644
--- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
@@ -37,7 +37,7 @@
 /** Function to calculate the index of the minimum or maximum values in a
  *  tensor based on an axis.
  *
- *  This function calls the following Neon kernels:
+ *  This function calls the following kernels:
  *
  * -# @ref NEReductionOperationKernel
  * -# @ref NEFillBorderKernel
diff --git a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
index de8dfef..c377520 100644
--- a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
+++ b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,11 +32,7 @@
 class ITensor;
 class ITensorInfo;
 
-/** Basic function to run @ref NEBoundingBoxTransformKernel.
- *
- * This function calls the following Neon kernels:
- * -# @ref NEBoundingBoxTransformKernel
- */
+/** Basic function to run @ref NEBoundingBoxTransformKernel. */
 class NEBoundingBoxTransform : public INESimpleFunctionNoBorder
 {
 public:
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index a387255..d2d41c1 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -37,7 +37,7 @@
 // Forward declarations
 class ITensor;
 
-/** Basic function to simulate a convolution layer. This function calls one of the following Neon functions:
+/** Basic function to simulate a convolution layer. This function calls one of the following functions:
  * -# @ref NEGEMMConvolutionLayer     (executed only in case GEMM is required for the operation)
  * -# @ref NEWinogradConvolutionLayer (executed only in case Winograd is required for the operation)
  * -# @ref NEDirectConvolutionLayer   (executed only in case Direct Convolution is required for the operation)
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index 02a0f78..3864a66 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -64,7 +64,7 @@
  * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the
  * reverse order to perform an actual convolution. This is achieved by using @ref NEReverse.
  *
- * This function calls the following Neon kernels/functions:
+ * This function calls the following kernels/functions:
  *
  * -# @ref CPPUpsample
  * -# @ref NEConvolutionLayer
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index 98fffe0..9aa8f04 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -110,7 +110,7 @@
                                                                           const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
                                                                           ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
 
-    /** Basic function to execute optimized depthwise convolution routines. This function calls the following Neon kernels:
+    /** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels:
     *
     * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported
     *
@@ -192,7 +192,7 @@
         bool                                   _is_prepared;
     };
 
-    /** Basic function to execute a generic depthwise convolution. This function calls the following Neon kernel:
+    /** Basic function to execute a generic depthwise convolution. This function calls the following kernel:
      *
      * -# @ref NEDepthwiseConvolutionLayerNativeKernel
      *
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index ff0c305..86914fa 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -41,7 +41,7 @@
 
 /** Function to run the direct convolution.
  *
- *  This function calls the following Neon kernels:
+ *  This function calls the following kernels:
  *
  * -# @ref NEFillBorderKernel for the input
  * -# @ref NEDirectConvolutionLayerOutputStageKernel
diff --git a/arm_compute/runtime/NEON/functions/NEFFT1D.h b/arm_compute/runtime/NEON/functions/NEFFT1D.h
index 04e8f81..a533aa7 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT1D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT1D.h
@@ -40,7 +40,7 @@
 class NEFFTRadixStageKernel;
 class NEFFTScaleKernel;
 
-/** Basic function to execute one dimensional FFT. This function calls the following Neon kernels:
+/** Basic function to execute one dimensional FFT. This function calls the following kernels:
  *
  * -# @ref NEFFTDigitReverseKernel Performs digit reverse
  * -# @ref NEFFTRadixStageKernel   A list of FFT kernels depending on the radix decomposition
diff --git a/arm_compute/runtime/NEON/functions/NEFFT2D.h b/arm_compute/runtime/NEON/functions/NEFFT2D.h
index 218401b..ce84a85 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT2D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT2D.h
@@ -36,7 +36,7 @@
 // Forward declaration
 class ITensor;
 
-/** Basic function to execute two dimensional FFT. This function calls the following Neon kernels:
+/** Basic function to execute two dimensional FFT. This function calls the following kernels:
  *
  * -# @ref NEFFT1D 1D FFT is performed on the first given axis
  * -# @ref NEFFT1D 1D FFT is performed on the second given axis
diff --git a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
index 8967363..213fa60 100644
--- a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
@@ -43,7 +43,7 @@
 // Forward declarations
 class ITensor;
 
-/** Basic function to execute FFT-based convolution on Neon. This function calls the following Neon functions/kernels:
+/** Basic function to execute FFT-based convolution on CPU. This function calls the following functions/kernels:
  *
  *  -# @ref NEPermute                        Permute input if NHWC(only NCHW is supported).
  *  -# @ref NEPadLayer                       Pad input.
@@ -84,7 +84,7 @@
      *                              Data types supported: Same as @p input.
      * @param[in]  conv_info        Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in]  act_info         (Optional) Activation layer information in case of a fused activation.
-     * @param[in]  enable_fast_math (Optional) Enable fast math computation. Unused for Neon backend.
+     * @param[in]  enable_fast_math (Optional) Enable fast math computation. Unused for CPU backend.
      */
     void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info,
                    const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
@@ -101,7 +101,7 @@
      *                             Data types supported: Same as @p input.
      * @param[in] conv_info        Contains padding and stride information described in @ref PadStrideInfo.
      * @param[in] act_info         (Optional) Activation layer information in case of a fused activation.
-     * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for Neon backend.
+     * @param[in] enable_fast_math (Optional) Enable fast math computation. Unused for CPU backend.
      *
      * @return a status
      */
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 1b3f36d..58b1174 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -35,7 +35,7 @@
 
 namespace arm_compute
 {
-/** Basic function to reshape the weights of Fully Connected layer with Neon. This function calls the following kernels:
+/** Basic function to reshape the weights of Fully Connected layer. This function calls the following kernels:
  *
  * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
  */
@@ -117,7 +117,7 @@
 };
 } // namespace weights_transformations
 
-/** Basic function to compute a Fully Connected layer on Neon. This function calls the following Neon kernels:
+/** Basic function to compute a Fully Connected layer. This function calls the following kernels:
  *  -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
  *  -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
  *  -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 6f7951e..a6c3436 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -43,7 +43,7 @@
 class NEGEMMTranspose1xWKernel;
 class NEGEMMAssemblyDispatch;
 
-/** Basic function to execute GEMM on Neon. This function calls the following Neon kernels:
+/** Basic function to execute GEMM. This function calls the following kernels:
  *
  * If optimized assembly is available:
  *  -# @ref NEGEMMAssemblyDispatch
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
index 2bd233f..8c3ba4f 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConv2d.h
@@ -38,7 +38,7 @@
 class ITensor;
 class NEGEMMAssemblyDispatch;
 
-/** Basic function to compute the convolution layer. This function calls the following Neon kernels/functions:
+/** Basic function to compute the convolution layer. This function calls the following kernels/functions:
  *
  * Supports only NHWC data layout
  *
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 33f00c0..9897bf1 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -150,7 +150,7 @@
 };
 } // namespace weights_transformations
 
-/** Basic function to compute the convolution layer. This function calls the following Neon kernels/functions:
+/** Basic function to compute the convolution layer. This function calls the following kernels/functions:
  *
  * -# @ref NEIm2ColKernel
  * -# @ref NEGEMM (if the data type is BFLOAT16/FP16/FP32)
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index 821b498..b2b77bd 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -37,7 +37,6 @@
 {
 class ITensor;
 class NEConvertQuantizedSignednessKernel;
-class NEConvertQuantizedSignednessKernel;
 class NEGEMMInterleave4x4Kernel;
 class NEGEMMLowpMatrixMultiplyKernel;
 class NEGEMMLowpOffsetContributionKernel;
@@ -47,7 +46,7 @@
 class NEGEMMTranspose1xWKernel;
 class NEGEMMAssemblyDispatch;
 
-/** Basic function to execute GEMMLowpMatrixMultiplyCore on Neon. This function calls the following Neon kernels if the DOT product instruction is not available:
+/** Basic function to execute GEMMLowpMatrixMultiplyCore. This function calls the following kernels if the DOT product instruction is not available:
  *
  *  -# @ref NEGEMMInterleave4x4Kernel
  *  -# @ref NEGEMMTranspose1xWKernel
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
index 79b427e..c22ed1b 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
@@ -27,7 +27,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
-/** This file contains all available output stages for GEMMLowp on Neon.
+/** This file contains all available output stages for GEMMLowp.
  *
  *  In gemmlowp, the "output stage" is the process that takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyCore),
  *  and processes it to obtain the final ASYMM8 value.
@@ -40,7 +40,7 @@
 class ITensor;
 class ITensorInfo;
 
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on Neon.
+/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint.
  *
  *  NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint depends on 3 parameters:
  *
@@ -61,7 +61,7 @@
  *
  * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
  *
- *  This function calls the following Neon kernels:
+ *  This function calls the following kernels:
  *
  * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
  *
@@ -112,7 +112,7 @@
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
 };
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint on Neon.
+/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint.
  *
  *  NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint depends on 3 parameters:
  *
@@ -133,7 +133,7 @@
  *
  * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
  *
- *  This function calls the following Neon kernels:
+ *  This function calls the following kernels:
  *
  * -# @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
  *
@@ -184,7 +184,7 @@
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
 };
-/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint on Neon.
+/** Basic function to execute NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint.
  *
  *  NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint depends on 2 parameters:
  *
@@ -205,7 +205,7 @@
  *
  * ((FixedPointMul(input[i][k] + bias[k], result_fixedpoint_multiplier)) >> result_shift) + result_offset_after_shift
  *
- *  This function calls the following Neon kernels:
+ *  This function calls the following kernels:
  *
  * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
  *
@@ -256,9 +256,9 @@
     static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = std::numeric_limits<int32_t>::lowest(), int max = std::numeric_limits<int32_t>::max());
 };
 
-/** Basic function to execute GEMMLowpQuantizeDown kernels on Neon.
+/** Basic function to execute GEMMLowpQuantizeDown kernels.
  *
- *  This function calls the following Neon kernels:
+ *  This function calls the following kernels:
  *
  * -# @ref NEGEMMLowpQuantizeDownInt32ScaleKernel
  * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
index a59dcf8..53a024a 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
@@ -47,7 +47,7 @@
 
 /** Basic function to run @ref NELSTMLayerQuantized
  *
- * This function calls the following Neon functions/kernels:
+ * This function calls the following functions/kernels:
  *
  * -# @ref NEGEMMLowpMatrixMultiplyCore                          Quantized matrix multiplication core. Accumulators are 32-bit integers
  * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint   Convert 32-bit integers into QSYMM16
diff --git a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
index 7b1f7e9..fae26b3 100644
--- a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
@@ -35,7 +35,7 @@
 class NEFill;
 class NEMaxUnpoolingLayerKernel;
 
-/** Function to perform MaxUnpooling. This function calls the following Neon kernels:
+/** Function to perform MaxUnpooling. This function calls the following kernels:
  *
  * -# @ref NEFill
  * -# @ref NEMaxUnpoolingLayerKernel
diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
index fbc2f6f..8c4ad15 100644
--- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
@@ -39,7 +39,7 @@
 class ITensor;
 class NENormalizationLayerKernel;
 
-/** Basic function to compute a normalization layer. This function calls the following Neon kernels:
+/** Basic function to compute a normalization layer. This function calls the following kernels:
  *
  * -# @ref NEPixelWiseMultiplication
  * -# @ref NEFillBorderKernel
diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h
index 2426256..76ff064 100644
--- a/arm_compute/runtime/NEON/functions/NEPadLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h
@@ -38,7 +38,7 @@
 {
 class NEPadLayerKernel;
 
-/** Basic function to pad a tensor. This function calls the following Neon functions/kernels:
+/** Basic function to pad a tensor. This function calls the following functions/kernels:
  *
  *  - For padding mode = PaddingMode::CONSTANT:
  *      -# @ref NEPadLayerKernel
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index fb95e45..2508458 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -52,7 +52,7 @@
     NEPermute &operator=(const NEPermute &) = delete;
     /** Default move assignment operator */
     NEPermute &operator=(NEPermute &&) = default;
-    /** Configure the permute Neon kernel
+    /** Configure the permute function
      *
      * @note Arbitrary permutation vectors are supported with rank not greater than 4
      *
diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
index e374348..cb136eb 100644
--- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
@@ -36,7 +36,7 @@
 class ITensor;
 class ITensorInfo;
 
-/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following Neon kernels:
+/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following kernels:
  *
  * -# @ref NEFillBorderKernel (executed if padding size is different from zero)
  * -# @ref cpu::kernels::CpuPoolingKernel
diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
index 954aceb..e706179 100644
--- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
@@ -47,7 +47,7 @@
 
 /** Basic function to run @ref NEQLSTMLayer
  *
- * This function calls the following Neon functions/kernels:
+ * This function calls the following kernels:
  *
  * -# @ref NEActivationLayer                                     Activation functions (tanh and logistic)
  * -# @ref NEArithmeticAddition                                  Elementwise addition
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index 54ec76b..9e2d9ec 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -35,12 +35,7 @@
 class ITensor;
 class ITensorInfo;
 
-/** Basic function to simulate a quantization layer. This function calls the following Arm(R) Neon(TM) implementation layers:
- *
- *
- * -# @ref cpu::CpuQuantization
- *
- */
+/** Basic function to run a quantization layer using @ref cpu::CpuQuantization */
 class NEQuantizationLayer : public IFunction
 {
 public:
diff --git a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
index 9d93458..c72cd49 100644
--- a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
@@ -32,12 +32,7 @@
 class ITensor;
 class ITensorInfo;
 
-/** Basic function to run @ref NEROIAlignLayerKernel.
- *
- * This function calls the following Neon kernels:
- * -# @ref NEROIAlignLayerKernel
- *
- */
+/** Basic function to run @ref NEROIAlignLayerKernel. */
 class NEROIAlignLayer : public INESimpleFunctionNoBorder
 {
 public:
diff --git a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
index 510c89c..214dd43 100644
--- a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
@@ -35,12 +35,7 @@
 class NEROIPoolingLayerKernel;
 class ROIPoolingLayerInfo;
 
-/** Basic function to run @ref NEROIPoolingLayerKernel.
- *
- * This function calls the following Neon kernels:
- * -# @ref NEROIPoolingLayerKernel
- *
- */
+/** Basic function to run @ref NEROIPoolingLayerKernel. */
 class NEROIPoolingLayer : public IFunction
 {
 public:
diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
index f30cc81..b96b709 100644
--- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h
+++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
@@ -35,7 +35,7 @@
 class ITensor;
 class NEReductionOperationKernel;
 
-/** Basic function to simulate a reduction operation. This function calls the following Neon kernels:
+/** Basic function to simulate a reduction operation. This function calls the following kernels:
  *
  * -# @ref NEReshapeLayer
  * -# @ref NEReductionOperationKernel
diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h
index 84d0f2e..835ebfa 100644
--- a/arm_compute/runtime/NEON/functions/NERemap.h
+++ b/arm_compute/runtime/NEON/functions/NERemap.h
@@ -34,13 +34,10 @@
 {
 class ITensor;
 
-/** Basic function to execute remap. This function calls the following Neon kernels:
+/** Basic function to execute remap. This function calls the following kernels:
  *
  * -# @ref NEFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE)
  * -# @ref NERemapKernel
- *
- * @deprecated This function is deprecated and is intended to be removed in 21.05 release
- *
  */
 class NERemap : public INESimpleFunction
 {
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
index aeeaefc..27c1ddf 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
@@ -36,7 +36,7 @@
 class NESpaceToBatchLayerKernel;
 class NEFill;
 
-/** Basic function to spatial divide a tensor. This function calls the following Neon kernels/functions:
+/** Basic function to spatial divide a tensor. This function calls the following kernels/functions:
  *
  *  -# @ref NEFill
  *  -# @ref NESpaceToBatchLayerKernel
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
index d76fc48..73c228d 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
@@ -35,10 +35,7 @@
 class ITensorInfo;
 class NESpaceToDepthLayerKernel;
 
-/** This function calls the following Neon kernels/functions:
- *
- *  -# @ref NESpaceToDepthLayerKernel
- */
+/** Basic function to run @ref NESpaceToDepthLayerKernel. */
 class NESpaceToDepthLayer : public IFunction
 {
 public:
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index e41cdbd..befc373 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -42,7 +42,8 @@
 class ITensor;
 class ICPPKernel;
 
-/** Basic function to simulate a convolution layer. This function calls the following Neon kernels:
+/** Basic function to simulate a convolution layer. This function calls the following kernels:
+ *
  * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method )
  * -# @ref NEWinogradLayerTransformInputKernel
  * -# @ref NEWinogradLayerTransformOutputKernel