Fix doxygen references to new kernels

Resolves COMPMID-4117

Change-Id: I9945a92402e34b9cfe0ba9ef2a961b168bf62721
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4883
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index 3d89355..ffda840 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -37,7 +37,7 @@
 class ITensor;
 class ITensorInfo;
 
-/** Basic function to run @ref NEActivationLayerKernel
+/** Basic function to run @ref cpu::kernels::CpuActivationKernel
  *
  * @note The function simulates an activation layer with the specified activation function.
  */
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
index 6648e46..8f9fd27 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
@@ -33,7 +33,7 @@
 class ITensor;
 class ITensorInfo;
 
-/** Basic function to run @ref CpuAddKernel */
+/** Basic function to run @ref cpu::kernels::CpuAddKernel */
 class NEArithmeticAddition : public IFunction
 {
 public:
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index d16ab13..6aa724a 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -56,7 +56,8 @@
     /** Initialise the kernel's inputs vector and output.
      *
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
-     * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
+     * @note Preconditions can be found respectively at @ref cpu::kernels::CpuConcatenateWidthKernel, @ref cpu::kernels::CpuConcatenateHeightKernel,
+     *       @ref cpu::kernels::CpuConcatenateDepthKernel and @ref cpu::kernels::CpuConcatenateBatchKernel.
      *
      * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[out]    output        Output tensor. Data types supported: Same as @p input.
@@ -66,7 +67,8 @@
     /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer
      *
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
-     * @note Preconditions can be found respectively at @ref NEWidthConcatenateLayerKernel, @ref NEHeightConcatenateLayerKernel and @ref NEDepthConcatenateLayerKernel.
+     * @note Preconditions can be found respectively at @ref cpu::kernels::CpuConcatenateWidthKernel, @ref cpu::kernels::CpuConcatenateHeightKernel,
+     *       @ref cpu::kernels::CpuConcatenateDepthKernel and @ref cpu::kernels::CpuConcatenateBatchKernel.
      *
      * @param[in] inputs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in] output        Output tensor info. Data types supported: Same as @p input.
diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h
index d5f22d7..56f8bd9 100644
--- a/arm_compute/runtime/NEON/functions/NECopy.h
+++ b/arm_compute/runtime/NEON/functions/NECopy.h
@@ -35,7 +35,7 @@
 class ITensor;
 class ITensorInfo;
 
-/** Basic function to run @ref CpuCopyKernel */
+/** Basic function to run @ref cpu::kernels::CpuCopyKernel */
 class NECopy : public IFunction
 {
 public:
diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h
index 3162e26..ba5d020 100644
--- a/arm_compute/runtime/NEON/functions/NEFill.h
+++ b/arm_compute/runtime/NEON/functions/NEFill.h
@@ -35,7 +35,7 @@
 {
 class ITensor;
 
-/** Basic function to run @ref CpuFillKernel */
+/** Basic function to run @ref cpu::kernels::CpuFillKernel */
 class NEFill : public IFunction
 {
 public:
diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h
index 739225c..9560eb9 100644
--- a/arm_compute/runtime/NEON/functions/NEFloor.h
+++ b/arm_compute/runtime/NEON/functions/NEFloor.h
@@ -36,7 +36,7 @@
 class ITensor;
 class ITensorInfo;
 
-/** Basic function to run @ref NEFloorKernel */
+/** Basic function to run @ref cpu::kernels::CpuFloorKernel */
 class NEFloor : public IFunction
 {
 public:
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 124f027..c67154c 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -55,7 +55,7 @@
  * In both cases:
  *  -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0 and is not reshaped once)
  * Else:
- *  -# @ref NEArithmeticAdditionKernel (if c != nullptr and is reshaped once and not optimized assembly in place)
+ *  -# @ref NEArithmeticAddition (if c != nullptr and is reshaped once and not optimized assembly in place)
  *
  *  -# @ref NEActivationLayer (if activation is specified in GEMMInfo)
  */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 59d83ed..aadc429 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -156,7 +156,7 @@
  * -# @ref NEGEMM (if the data type is BFLOAT16/FP16/FP32)
  * -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8/QASYMM8_SIGNED)
  * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8/QASYMM8_SIGNED)
- * -# @ref NEArithmeticAdditionKernel (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout)
+ * -# @ref NEArithmeticAddition (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout)
  * -# @ref NECol2ImKernel (if NCHW data layout)
  *
  */
diff --git a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
index 7973a6e..9626ca6 100644
--- a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
@@ -37,7 +37,7 @@
 
 /** Function to perform MaxUnpooling. This function calls the following NEON kernels:
  *
- * -# @ref NEFillKernel
+ * -# @ref NEFill
  * -# @ref NEMaxUnpoolingLayerKernel
  */
 class NEMaxUnpoolingLayer : public IFunction
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index 998a1d6..e2e50f4 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -36,7 +36,7 @@
 class ITensor;
 class ITensorInfo;
 
-/** Basic function to run @ref CpuPermuteKernel */
+/** Basic function to run @ref cpu::kernels::CpuPermuteKernel */
 class NEPermute : public IFunction
 {
 public:
diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
index e0054bc..34f51d3 100644
--- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,13 +50,13 @@
  * This function calls the following NEON functions/kernels:
  *
  * -# @ref NEActivationLayer                                     Activation functions (tanh and logistic)
- * -# @ref NEArithmeticAddition                            Elementwise addition
+ * -# @ref NEArithmeticAddition                                  Elementwise addition
  * -# @ref NEArithmeticSubtractionKernel                         Elementwise subtraction
- * -# @ref NECopyKernel                                          Copy kernel for copying output_state_out to output
+ * -# @ref NECopy                                                Copy kernel for copying output_state_out to output
  * -# @ref NEGEMMLowpMatrixMultiplyCore                          Quantized matrix multiplication core. Accumulators are 32-bit integers
  * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint   Convert 32-bit integers into QSYMM16
  * -# @ref NEGEMMLowpMatrixAReductionKernel                      For precomputing effective biases to use
- * -# @ref NEPixelWiseMultiplication                       Elementwise multiplication
+ * -# @ref NEPixelWiseMultiplication                             Elementwise multiplication
  * -# @ref NETranspose                                           Transpose function for reshaping the weights
  * */
 class NEQLSTMLayer : public IFunction
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index b4c3af1..b8c0a84 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -34,7 +34,7 @@
 // Forward declarations
 class ITensor;
 
-/** Basic function to run @ref NEReshapeLayerKernel */
+/** Basic function to run @ref cpu::kernels::CpuReshapeKernel */
 class NEReshapeLayer : public IFunction
 {
 public:
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
index 3a6f8d7..6a38dfb 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
@@ -38,7 +38,7 @@
 
 /** Basic function to spatial divide a tensor. This function calls the following NEON kernels/functions:
  *
- *  -# @ref NEMemsetKernel
+ *  -# @ref NEFill
  *  -# @ref NESpaceToBatchLayerKernel
  */
 class NESpaceToBatchLayer : public IFunction
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 5e8769c..effeb7b 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -110,8 +110,8 @@
    This is planned to be resolved in 21.02 release.
  - Added new data type QASYMM8_SIGNED support for @ref NEROIAlignLayer.
  - Added new data type S32 support for:
-   - @ref NEArithmeticSubtraction
-   - @ref NEArithmeticSubtractionKernel
+   - NEArithmeticSubtraction
+   - NEArithmeticSubtractionKernel
    - @ref NEPixelWiseMultiplication
    - @ref NEPixelWiseMultiplicationKernel
    - @ref NEElementwiseDivision
@@ -430,12 +430,12 @@
    - More robust script for running benchmarks
  - Removed padding from:
    - @ref NEPixelWiseMultiplicationKernel
-   - @ref NEHeightConcatenateLayerKernel
+   - NEHeightConcatenateLayerKernel
    - @ref NEThresholdKernel
-   - @ref NEBatchConcatenateLayerKernel
+   - NEBatchConcatenateLayerKernel
    - @ref NETransposeKernel
    - @ref NEBatchNormalizationLayerKernel
-   - @ref NEArithmeticSubtractionKernel
+   - NEArithmeticSubtractionKernel
    - @ref NEBoundingBoxTransformKernel
    - @ref NELogits1DMaxKernel
    - @ref NELogits1DSoftmaxKernel
@@ -444,8 +444,8 @@
    - NEYOLOLayerKernel
    - NEUpsampleLayerKernel
    - NEFloorKernel
-   - @ref NEWidthConcatenateLayerKernel
-   - @ref NEDepthConcatenateLayerKernel
+   - NEWidthConcatenateLayerKernel
+   - NEDepthConcatenateLayerKernel
    - @ref NENormalizationLayerKernel
    - @ref NEL2NormalizeLayerKernel
    - @ref NEFillArrayKernel
@@ -526,7 +526,7 @@
      - @ref NEQLSTMLayerNormalizationKernel
  - Added HARD_SWISH support in:
      - @ref CLActivationLayerKernel
-     - @ref NEActivationLayerKernel
+     - NEActivationLayerKernel
  - Deprecated OpenCL kernels / functions:
      - CLGEMMLowpQuantizeDownInt32ToUint8Scale
      - CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFloat
@@ -697,7 +697,7 @@
     - @ref NENegLayer
     - @ref NEPReluLayer
     - @ref NESinLayer
-    - @ref NEBatchConcatenateLayerKernel
+    - NEBatchConcatenateLayerKernel
     - @ref NEDepthToSpaceLayerKernel / @ref NEDepthToSpaceLayer
     - @ref NEDepthwiseConvolutionLayerNativeKernel
     - @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
@@ -749,7 +749,7 @@
     - @ref NEFFTRadixStageKernel
     - @ref NEFFTScaleKernel
     - @ref NEGEMMLowpOffsetContributionOutputStageKernel
-    - @ref NEHeightConcatenateLayerKernel
+    - NEHeightConcatenateLayerKernel
     - @ref NESpaceToBatchLayerKernel / @ref NESpaceToBatchLayer
     - @ref NEFFT1D
     - @ref NEFFT2D
@@ -882,7 +882,7 @@
     - @ref CLROIAlignLayer
     - @ref CLGenerateProposalsLayer
  - Added QASYMM8 support to the following kernels:
-    - @ref NEArithmeticAdditionKernel
+    - NEArithmeticAdditionKernel
     - @ref NEScale
  - Added new tests and improved validation and benchmarking suites.
  - Deprecated functions/interfaces
@@ -1062,7 +1062,7 @@
  - Added FP16 support to:
     - CLDepthwiseConvolutionLayer3x3
     - @ref CLDepthwiseConvolutionLayer
- - Added broadcasting support to @ref NEArithmeticAddition / @ref CLArithmeticAddition / @ref CLPixelWiseMultiplication
+ - Added broadcasting support to NEArithmeticAddition / @ref CLArithmeticAddition / @ref CLPixelWiseMultiplication
  - Added fused batched normalization and activation to @ref CLBatchNormalizationLayer and @ref NEBatchNormalizationLayer
  - Added support for non-square pooling to @ref NEPoolingLayer and @ref CLPoolingLayer
  - New OpenCL kernels / functions:
@@ -1218,7 +1218,7 @@
     - @ref CPPDetectionWindowNonMaximaSuppressionKernel
  - New NEON kernels / functions:
     - @ref NEBatchNormalizationLayerKernel / @ref NEBatchNormalizationLayer
-    - @ref NEDepthConcatenateLayerKernel / NEDepthConcatenateLayer
+    - NEDepthConcatenateLayerKernel / NEDepthConcatenateLayer
     - @ref NEDirectConvolutionLayerKernel / @ref NEDirectConvolutionLayer
     - NELocallyConnectedMatrixMultiplyKernel / NELocallyConnectedLayer
     - @ref NEWeightsReshapeKernel / @ref NEConvolutionLayerReshapeWeights
@@ -1276,7 +1276,7 @@
    - @ref CLNormalizationLayerKernel / @ref CLNormalizationLayer
    - @ref CLLaplacianPyramid, @ref CLLaplacianReconstruct
  - New NEON kernels / functions:
-   - @ref NEActivationLayerKernel / @ref NEActivationLayer
+   - NEActivationLayerKernel / @ref NEActivationLayer
    - GEMM refactoring + FP16 support (Requires armv8.2 CPU): @ref NEGEMMInterleave4x4Kernel, @ref NEGEMMTranspose1xWKernel, @ref NEGEMMMatrixMultiplyKernel, @ref NEGEMMMatrixAdditionKernel / @ref NEGEMM
    - @ref NEPoolingLayerKernel / @ref NEPoolingLayer
 
diff --git a/docs/04_adding_operator.dox b/docs/04_adding_operator.dox
index 13be712..9e6f375 100644
--- a/docs/04_adding_operator.dox
+++ b/docs/04_adding_operator.dox
@@ -121,7 +121,7 @@
 The run will call the function defined in the .cl file.
 
 For the NEON backend case:
-@snippet src/core/NEON/kernels/NEReshapeLayerKernel.cpp NEReshapeLayerKernel Kernel
+@snippet src/core/cpu/kernels/CpuReshapeKernel.cpp NEReshapeLayerKernel Kernel
 
 In the NEON case, there is no need to add an extra file and we implement the kernel in the same NEReshapeLayerKernel.cpp file.
 If the tests are already in place, the new kernel can be tested using the existing tests by adding the configure and run of the kernel to the compute_target() in the fixture.
diff --git a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h
index 2f80361..39e5300 100644
--- a/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h
+++ b/src/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,7 +58,7 @@
      * @param[out] output Destination tensor. Data types supported: opposite of @p input.
      */
     void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel
+    /** Static function to check if given info will lead to a valid configuration of @ref NEConvertQuantizedSignednessKernel
      *
      * @param[in] input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED.
      * @param[in] output Destination tensor. Data types supported: opposite of @p input.
diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.h b/src/core/NEON/kernels/NEReorgLayerKernel.h
index eac9115..38a7d9f 100644
--- a/src/core/NEON/kernels/NEReorgLayerKernel.h
+++ b/src/core/NEON/kernels/NEReorgLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -60,7 +60,7 @@
      */
     void configure(const ITensor *input, ITensor *output, int32_t stride);
 
-    /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel
+    /** Static function to check if given info will lead to a valid configuration of @ref cpu::kernels::CpuReshapeKernel
      *
      * @param[in] input  Source tensor info. Data type supported: All
      * @param[in] output Destination tensor info. Data type supported: Same as @p input
diff --git a/src/core/SubTensorInfo.cpp b/src/core/SubTensorInfo.cpp
index 6279992..fd3ebf2 100644
--- a/src/core/SubTensorInfo.cpp
+++ b/src/core/SubTensorInfo.cpp
@@ -27,8 +27,8 @@
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/Validate.h"
 
-using namespace arm_compute;
-
+namespace arm_compute
+{
 namespace
 {
 /** Extends parent shape depending on subtensor's coordinates and shape
@@ -149,3 +149,4 @@
 
     return offset;
 }
+} // namespace arm_compute
diff --git a/src/core/cpu/kernels/CpuActivationKernel.cpp b/src/core/cpu/kernels/CpuActivationKernel.cpp
index abdba3a..efdb42b 100644
--- a/src/core/cpu/kernels/CpuActivationKernel.cpp
+++ b/src/core/cpu/kernels/CpuActivationKernel.cpp
@@ -150,16 +150,16 @@
     ActivationLayerInfo::ActivationFunction::HARD_SWISH
 };
 
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &activation_info)
+Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &activation_info)
 {
-    ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(src);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::QSYMM16, DataType::F16, DataType::F32);
 
-    const auto *uk = get_implementation(ActivationSelectorData{ input->data_type() });
+    const auto *uk = get_implementation(ActivationSelectorData{ src->data_type() });
     ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
 
-    const DataType                                data_type = input->data_type();
-    const QuantizationInfo                       &oq_info   = (output != nullptr) ? output->quantization_info() : input->quantization_info();
+    const DataType                                data_type = src->data_type();
+    const QuantizationInfo                       &oq_info   = (dst != nullptr) ? dst->quantization_info() : src->quantization_info();
     const ActivationLayerInfo::ActivationFunction f_act     = activation_info.activation();
 
     ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized_asymmetric(data_type) && (std::find(std::begin(qasymm8_activations), std::end(qasymm8_activations), f_act) == std::end(qasymm8_activations)),
@@ -178,54 +178,54 @@
     ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::TANH) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
     ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_quantized_symmetric(data_type) && (f_act == ActivationLayerInfo::ActivationFunction::LOGISTIC) && (oq_info != QuantizationInfo(1.f / 32768.f, 0)));
 
-    // Checks performed when output is configured
-    if((output != nullptr) && (output->total_size() != 0))
+    // Checks performed when dst is configured
+    if((dst != nullptr) && (dst->total_size() != 0))
     {
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
-        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
     }
 
     return Status{};
 }
 
-std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input, ITensorInfo *output)
+std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *src, ITensorInfo *dst)
 {
     // Configure kernel window
-    Window win = calculate_max_window(*input, Steps());
+    Window win = calculate_max_window(*src, Steps());
 
-    if(output != nullptr)
+    if(dst != nullptr)
     {
-        // Output auto inizialitation if not yet initialized
-        auto_init_if_empty(*output, *input->clone());
+        // dst auto inizialitation if not yet initialized
+        auto_init_if_empty(*dst, *src->clone());
 
         Coordinates coord;
-        coord.set_num_dimensions(output->num_dimensions());
-        output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
+        coord.set_num_dimensions(dst->num_dimensions());
+        dst->set_valid_region(ValidRegion(coord, dst->tensor_shape()));
     }
 
     return std::make_pair(Status{}, win);
 }
 } // namespace
 
-void CpuActivationKernel::configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info)
+void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, ActivationLayerInfo activation_info)
 {
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
 
     _act_info = activation_info;
 
-    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input, output, activation_info));
+    ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, dst, activation_info));
 
     // Configure kernel window
-    auto win_config = validate_and_configure_window(input, output);
+    auto win_config = validate_and_configure_window(src, dst);
     ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
     ICPPKernel::configure(win_config.second);
 }
 
-Status CpuActivationKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info)
+Status CpuActivationKernel::validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info)
 {
     ARM_COMPUTE_UNUSED(act_info);
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info));
-    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output != nullptr) ? output->clone().get() : nullptr).first);
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(src, dst, act_info));
+    ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(src->clone().get(), (dst != nullptr) ? dst->clone().get() : nullptr).first);
 
     return Status{};
 }
diff --git a/src/core/cpu/kernels/CpuActivationKernel.h b/src/core/cpu/kernels/CpuActivationKernel.h
index e49171b..de71014 100644
--- a/src/core/cpu/kernels/CpuActivationKernel.h
+++ b/src/core/cpu/kernels/CpuActivationKernel.h
@@ -49,7 +49,7 @@
      * @param[in]      activation_info Activation layer information.
      */
     void configure(const ITensorInfo *src, ITensorInfo *dst, ActivationLayerInfo activation_info);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel
+    /** Static function to check if given info will lead to a valid configuration of @ref CpuActivationKernel
      *
      * @param[in] src      Source tensor info. In case of @p dst tensor info = nullptr, this tensor will store the result
      *                     of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
@@ -58,7 +58,7 @@
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
+    static Status validate(const ITensorInfo *src, const ITensorInfo *dst, const ActivationLayerInfo &act_info);
 
     // Inherited methods overridden:
     void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
diff --git a/src/core/cpu/kernels/CpuCopyKernel.h b/src/core/cpu/kernels/CpuCopyKernel.h
index 7e33bf4..98b79a9 100644
--- a/src/core/cpu/kernels/CpuCopyKernel.h
+++ b/src/core/cpu/kernels/CpuCopyKernel.h
@@ -48,7 +48,7 @@
     void configure(const ITensorInfo *src, ITensorInfo *dst, const PaddingList &padding = PaddingList());
     /** Static function to check if given info will lead to a valid configuration of @ref CpuCopyKernel
      *
-     * @param[in] srd     Source tensor. Data types supported: All
+     * @param[in] src     Source tensor. Data types supported: All
      * @param[in] dst     Destination tensor. Data types supported: same as @p src.
      * @param[in] padding (Optional) Padding to be applied to the input tensor
      *
diff --git a/src/core/helpers/ScaleHelpers.h b/src/core/helpers/ScaleHelpers.h
index f19a8b8..c500f64 100644
--- a/src/core/helpers/ScaleHelpers.h
+++ b/src/core/helpers/ScaleHelpers.h
@@ -329,12 +329,12 @@
 /** Computes bilinear interpolation using the top-left, top-right, bottom-left, bottom-right pixels and the pixel's distance between
  * the real coordinates and the smallest following integer coordinates.
  *
- * @param[in] a00 The top-left pixel value.
- * @param[in] a01 The top-right pixel value.
- * @param[in] a10 The bottom-left pixel value.
- * @param[in] a11 The bottom-right pixel value.
- * @param[in] dx  Pixel's distance between the X real coordinate and the smallest X following integer
- * @param[in] dy  Pixel's distance between the Y real coordinate and the smallest Y following integer
+ * @param[in] a00    The top-left pixel value.
+ * @param[in] a01    The top-right pixel value.
+ * @param[in] a10    The bottom-left pixel value.
+ * @param[in] a11    The bottom-right pixel value.
+ * @param[in] dx_val Pixel's distance between the X real coordinate and the smallest X following integer
+ * @param[in] dy_val Pixel's distance between the Y real coordinate and the smallest Y following integer
  *
  * @note dx and dy must be in the range [0, 1.0]
  *
diff --git a/src/runtime/cpu/operators/CpuActivation.h b/src/runtime/cpu/operators/CpuActivation.h
index a357b32..0ae16bf 100644
--- a/src/runtime/cpu/operators/CpuActivation.h
+++ b/src/runtime/cpu/operators/CpuActivation.h
@@ -30,7 +30,7 @@
 {
 namespace cpu
 {
-/** Basic function to run @ref CpuActivationKernel */
+/** Basic function to run @ref kernels::CpuActivationKernel */
 class CpuActivation : public ICpuOperator
 {
 public:
@@ -43,7 +43,7 @@
      * @param[in]  activation_info Activation layer parameters.
      */
     void configure(const ITensorInfo *input, ITensorInfo *output, const ActivationLayerInfo &activation_info);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayer
+    /** Static function to check if given info will lead to a valid configuration of @ref CpuActivation
      *
      * @param[in] input    Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
      * @param[in] output   Destination tensor info. Data type supported: same as @p src
diff --git a/src/runtime/cpu/operators/CpuAdd.h b/src/runtime/cpu/operators/CpuAdd.h
index 7ddc69b..8ae7833 100644
--- a/src/runtime/cpu/operators/CpuAdd.h
+++ b/src/runtime/cpu/operators/CpuAdd.h
@@ -30,7 +30,7 @@
 {
 namespace cpu
 {
-/** Basic function to run @ref CpuAddKernel */
+/** Basic function to run @ref kernels::CpuAddKernel */
 class CpuAdd : public ICpuOperator
 {
 public:
@@ -60,7 +60,7 @@
      *
      */
     void configure(const ITensorInfo *src0, const ITensorInfo *src1, ITensorInfo *dst, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo());
-    /** Static function to check if given info will lead to a valid configuration of @ref CpuAddKernel
+    /** Static function to check if given info will lead to a valid configuration of @ref CpuAdd
      *
      * @param[in] src0     First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
      * @param[in] src1     Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
diff --git a/src/runtime/cpu/operators/CpuConcatenate.h b/src/runtime/cpu/operators/CpuConcatenate.h
index 3765342..d2af3e2 100644
--- a/src/runtime/cpu/operators/CpuConcatenate.h
+++ b/src/runtime/cpu/operators/CpuConcatenate.h
@@ -35,10 +35,10 @@
 {
 /** Basic function to execute concatenate tensors along a given axis. This function calls the following kernels:
  *
- * -# @ref CpuConcatenateWidthKernel (if underlying concatenation axis is 0).
- * -# @ref CpuConcatenateHeightKernel (if underlying concatenation axis is 1).
- * -# @ref CpuConcatenateDepthKernel (if underlying concatenation axis is 2).
- * -# @ref CpuConcatenateBatchKernel (if underlying concatenation axis is 3).
+ * -# @ref kernels::CpuConcatenateWidthKernel (if underlying concatenation axis is 0).
+ * -# @ref kernels::CpuConcatenateHeightKernel (if underlying concatenation axis is 1).
+ * -# @ref kernels::CpuConcatenateDepthKernel (if underlying concatenation axis is 2).
+ * -# @ref kernels::CpuConcatenateBatchKernel (if underlying concatenation axis is 3).
  */
 class CpuConcatenate : public ICpuOperator
 {
@@ -48,7 +48,8 @@
     /** Configure operator for a given list of arguments
      *
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
-     * @note Preconditions can be found respectively at @ref CpuConcatenateWidthKernel, @ref CpuConcatenateHeightKernel, @ref CpuConcatenateDepthKernel and @ref CpuConcatenateBatchKernel.
+     * @note Preconditions can be found respectively at @ref kernels::CpuConcatenateWidthKernel, @ref kernels::CpuConcatenateHeightKernel,
+     *       @ref kernels::CpuConcatenateDepthKernel and @ref kernels::CpuConcatenateBatchKernel.
      *
      * @param[in,out] srcs_vector The vectors containing all the tensors to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[out]    dst         Output tensor. Data types supported: Same as @p srcs_vector.
@@ -58,7 +59,8 @@
     /** Static function to check if given info will lead to a valid configuration of @ref NEConcatenateLayer
      *
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
-     * @note Preconditions can be found respectively at @ref CpuConcatenateWidthKernel, @ref CpuConcatenateHeightKernel, @ref CpuConcatenateDepthKernel and @ref CpuConcatenateBatchKernel.
+     * @note Preconditions can be found respectively at @ref kernels::CpuConcatenateWidthKernel, @ref kernels::CpuConcatenateHeightKernel,
+     *       @ref kernels::CpuConcatenateDepthKernel and @ref kernels::CpuConcatenateBatchKernel.
      *
      * @param[in] srcs_vector The vectors containing all the tensors info to concatenate. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      * @param[in] dst         Output tensor info. Data types supported: Same as @p srcs_vector.
diff --git a/src/runtime/cpu/operators/CpuCopy.h b/src/runtime/cpu/operators/CpuCopy.h
index 5764613..057bb6e 100644
--- a/src/runtime/cpu/operators/CpuCopy.h
+++ b/src/runtime/cpu/operators/CpuCopy.h
@@ -30,7 +30,7 @@
 {
 namespace cpu
 {
-/** Basic function to run @ref CpuCopyKernel */
+/** Basic function to run @ref kernels::CpuCopyKernel */
 class CpuCopy : public ICpuOperator
 {
 public:
diff --git a/src/runtime/cpu/operators/CpuFill.h b/src/runtime/cpu/operators/CpuFill.h
index 7a75f42..fac8e76 100644
--- a/src/runtime/cpu/operators/CpuFill.h
+++ b/src/runtime/cpu/operators/CpuFill.h
@@ -30,7 +30,7 @@
 {
 namespace cpu
 {
-/** Basic function to run @ref CpuFillKernel */
+/** Basic function to run @ref kernels::CpuFillKernel */
 class CpuFill : public ICpuOperator
 {
 public:
diff --git a/src/runtime/cpu/operators/CpuFloor.h b/src/runtime/cpu/operators/CpuFloor.h
index 86a01e3..cbb9d56 100644
--- a/src/runtime/cpu/operators/CpuFloor.h
+++ b/src/runtime/cpu/operators/CpuFloor.h
@@ -30,7 +30,7 @@
 {
 namespace cpu
 {
-/** Basic function to run @ref CpuFloorKernel */
+/** Basic function to run @ref kernels::CpuFloorKernel */
 class CpuFloor : public ICpuOperator
 {
 public:
@@ -49,7 +49,7 @@
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
+    static Status validate(const ITensorInfo *src, const ITensorInfo *dst);
 };
 } // namespace cpu
 } // namespace arm_compute
diff --git a/src/runtime/cpu/operators/CpuPermute.h b/src/runtime/cpu/operators/CpuPermute.h
index 31ad77e..12f0cf6 100644
--- a/src/runtime/cpu/operators/CpuPermute.h
+++ b/src/runtime/cpu/operators/CpuPermute.h
@@ -30,7 +30,7 @@
 {
 namespace cpu
 {
-/** Basic function to run @ref CpuPermuteKernel */
+/** Basic function to run @ref kernels::CpuPermuteKernel */
 class CpuPermute : public ICpuOperator
 {
 public:
diff --git a/src/runtime/cpu/operators/CpuReshape.h b/src/runtime/cpu/operators/CpuReshape.h
index b718b07..e136043 100644
--- a/src/runtime/cpu/operators/CpuReshape.h
+++ b/src/runtime/cpu/operators/CpuReshape.h
@@ -30,7 +30,7 @@
 {
 namespace cpu
 {
-/** Basic function to run @ref CpuReshapeKernel */
+/** Basic function to run @ref kernels::CpuReshapeKernel */
 class CpuReshape : public ICpuOperator
 {
 public: