Align kernel/operator header layout

- Redirect validate documentation to configure
- Align header names
- Align class layout

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: Ia40f67383826a66e9f9a33745d66805551e31a3a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5897
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
diff --git a/src/core/cpu/kernels/CpuAddKernel.h b/src/core/cpu/kernels/CpuAddKernel.h
index 3ebaa46..717d013 100644
--- a/src/core/cpu/kernels/CpuAddKernel.h
+++ b/src/core/cpu/kernels/CpuAddKernel.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPUADDKERNEL_H
-#define ARM_COMPUTE_CPUADDKERNEL_H
+#ifndef ARM_COMPUTE_CPU_ADD_KERNEL_H
+#define ARM_COMPUTE_CPU_ADD_KERNEL_H
 
 #include "src/core/common/Macros.h"
 #include "src/core/cpu/ICpuKernel.h"
@@ -84,4 +84,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /*ARM_COMPUTE_CPUADDKERNEL_H */
+#endif /* ARM_COMPUTE_CPU_ADD_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp b/src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp
index 5df5ac3..16c0efc 100644
--- a/src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp
+++ b/src/core/cpu/kernels/CpuConcatenateBatchKernel.cpp
@@ -146,11 +146,6 @@
 }
 } // namespace
 
-CpuConcatenateBatchKernel::CpuConcatenateBatchKernel()
-    : _func(nullptr), _batch_offset(0)
-{
-}
-
 void CpuConcatenateBatchKernel::configure(const ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/cpu/kernels/CpuConcatenateBatchKernel.h b/src/core/cpu/kernels/CpuConcatenateBatchKernel.h
index 99e8d84..1706926 100644
--- a/src/core/cpu/kernels/CpuConcatenateBatchKernel.h
+++ b/src/core/cpu/kernels/CpuConcatenateBatchKernel.h
@@ -21,17 +21,14 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_CONCATENATEBATCH_KERNEL_H
-#define ARM_COMPUTE_CPU_CONCATENATEBATCH_KERNEL_H
+#ifndef ARM_COMPUTE_CPU_CONCATENATE_BATCH_KERNEL_H
+#define ARM_COMPUTE_CPU_CONCATENATE_BATCH_KERNEL_H
 
 #include "src/core/common/Macros.h"
 #include "src/core/cpu/ICpuKernel.h"
 
 namespace arm_compute
 {
-// Forward declarations
-class ITensor;
-
 namespace cpu
 {
 namespace kernels
@@ -42,7 +39,7 @@
 class CpuConcatenateBatchKernel : public ICpuKernel
 {
 public:
-    CpuConcatenateBatchKernel();
+    CpuConcatenateBatchKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuConcatenateBatchKernel);
     /** Configure kernel for a given list of arguments
      *
@@ -51,11 +48,9 @@
      * @param[in,out] dst          Destination tensor info. Data types supported: Same as @p src.
      */
     void configure(const ITensorInfo *src, unsigned int batch_offset, ITensorInfo *dst);
-    /**  Static function to check if given info will lead to a valid configuration of @ref CpuConcatenateBatchKernel
+    /** Static function to check if given info will lead to a valid configuration
      *
-     * @param[in] src          Source tensor info. Data types supported: All.
-     * @param[in] batch_offset The offset on axis # 3.
-     * @param[in] dst          Destination tensor info. Data types supported: Same as @p src.
+     * Similar to @ref CpuConcatenateBatchKernel::configure()
      *
      * @return a status
      */
@@ -69,10 +64,10 @@
     using BatchConcatFunction = void(const ITensor *, ITensor *, unsigned int, const Window &);
 
 private:
-    BatchConcatFunction *_func;
-    unsigned int         _batch_offset;
+    BatchConcatFunction *_func{ nullptr };
+    unsigned int         _batch_offset{ 0 };
 };
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_CONCATENATEBATCH_KERNEL_H */
+#endif /* ARM_COMPUTE_CPU_CONCATENATE_BATCH_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp b/src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp
index a7e5cd8..133499d 100644
--- a/src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp
+++ b/src/core/cpu/kernels/CpuConcatenateDepthKernel.cpp
@@ -146,11 +146,6 @@
 }
 } // namespace
 
-CpuConcatenateDepthKernel::CpuConcatenateDepthKernel()
-    : _func(nullptr), _depth_offset(0)
-{
-}
-
 void CpuConcatenateDepthKernel::configure(const ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/cpu/kernels/CpuConcatenateDepthKernel.h b/src/core/cpu/kernels/CpuConcatenateDepthKernel.h
index af89c24..3ec19a8 100644
--- a/src/core/cpu/kernels/CpuConcatenateDepthKernel.h
+++ b/src/core/cpu/kernels/CpuConcatenateDepthKernel.h
@@ -22,8 +22,8 @@
  * SOFTWARE.
  */
 
-#ifndef ARM_COMPUTE_CPU_CONCATENATEDEPTH_KERNEL_H
-#define ARM_COMPUTE_CPU_CONCATENATEDEPTH_KERNEL_H
+#ifndef ARM_COMPUTE_CPU_CONCATENATE_DEPTH_KERNEL_H
+#define ARM_COMPUTE_CPU_CONCATENATE_DEPTH_KERNEL_H
 
 #include "src/core/common/Macros.h"
 #include "src/core/cpu/ICpuKernel.h"
@@ -43,7 +43,7 @@
 class CpuConcatenateDepthKernel : public ICpuKernel
 {
 public:
-    CpuConcatenateDepthKernel();
+    CpuConcatenateDepthKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuConcatenateDepthKernel);
     /** Configure kernel for a given list of arguments
      *
@@ -56,11 +56,9 @@
      *
      */
     void configure(const ITensorInfo *src, unsigned int depth_offset, ITensorInfo *dst);
-    /**  Static function to check if given info will lead to a valid configuration of @ref CpuConcatenateDepthKernel
+    /** Static function to check if given info will lead to a valid configuration
      *
-     * @param[in] src          Source tensor info. Data types supported:  QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] depth_offset The offset on the Z axis.
-     * @param[in] dst          Destination tensor info. Data types supported: Same as @p src.
+     * Similar to @ref CpuConcatenateDepthKernel::configure()
      *
      * @return a status
      */
@@ -74,10 +72,10 @@
     using DepthConcatFunction = void(const ITensor *, ITensor *, unsigned int, const Window &);
 
 private:
-    DepthConcatFunction *_func;
-    unsigned int         _depth_offset;
+    DepthConcatFunction *_func{ nullptr };
+    unsigned int         _depth_offset{ 0 };
 };
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_CONCATENATEDEPTH_KERNEL_H */
+#endif /* ARM_COMPUTE_CPU_CONCATENATE_DEPTH_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp b/src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp
index 54b9726..dfd442b 100644
--- a/src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp
+++ b/src/core/cpu/kernels/CpuConcatenateHeightKernel.cpp
@@ -62,11 +62,6 @@
 }
 } // namespace
 
-CpuConcatenateHeightKernel::CpuConcatenateHeightKernel()
-    : _height_offset(0)
-{
-}
-
 void CpuConcatenateHeightKernel::configure(const ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst)
 {
     ARM_COMPUTE_UNUSED(src);
diff --git a/src/core/cpu/kernels/CpuConcatenateHeightKernel.h b/src/core/cpu/kernels/CpuConcatenateHeightKernel.h
index 609bb21..e5e15e1 100644
--- a/src/core/cpu/kernels/CpuConcatenateHeightKernel.h
+++ b/src/core/cpu/kernels/CpuConcatenateHeightKernel.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_CONCATENATEHEIGHT_KERNEL_H
-#define ARM_COMPUTE_CPU_CONCATENATEHEIGHT_KERNEL_H
+#ifndef ARM_COMPUTE_CPU_CONCATENATE_HEIGHT_KERNEL_H
+#define ARM_COMPUTE_CPU_CONCATENATE_HEIGHT_KERNEL_H
 
 #include "src/core/common/Macros.h"
 #include "src/core/cpu/ICpuKernel.h"
@@ -39,7 +39,7 @@
 class CpuConcatenateHeightKernel : public ICpuKernel
 {
 public:
-    CpuConcatenateHeightKernel();
+    CpuConcatenateHeightKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuConcatenateHeightKernel);
     /** Configure kernel for a given list of arguments
      *
@@ -49,11 +49,9 @@
      *
      */
     void configure(const ITensorInfo *src, unsigned int height_offset, ITensorInfo *dst);
-    /**  Static function to check if given info will lead to a valid configuration of @ref CpuConcatenateHeightKernel
+    /** Static function to check if given info will lead to a valid configuration
      *
-     * @param[in] src           Source tensor info. Data types supported: All
-     * @param[in] height_offset The starting offset on the Y axis for the output tensor.
-     * @param[in] dst           Destination tensor info. Data types supported: Same as @p src.
+     * Similar to @ref CpuConcatenateHeightKernel::configure()
      *
      * @return a status
      */
@@ -64,9 +62,9 @@
     const char *name() const override;
 
 private:
-    unsigned int _height_offset;
+    unsigned int _height_offset{ 0 };
 };
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_CONCATENATEHEIGHT_KERNEL_H */
+#endif /* ARM_COMPUTE_CPU_CONCATENATE_HEIGHT_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp b/src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp
index effcbc3..ad33b0c 100644
--- a/src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp
+++ b/src/core/cpu/kernels/CpuConcatenateWidthKernel.cpp
@@ -62,11 +62,6 @@
 }
 } // namespace
 
-CpuConcatenateWidthKernel::CpuConcatenateWidthKernel()
-    : _width_offset(0)
-{
-}
-
 void CpuConcatenateWidthKernel::configure(const ITensorInfo *src, unsigned int width_offset, ITensorInfo *dst)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
diff --git a/src/core/cpu/kernels/CpuConcatenateWidthKernel.h b/src/core/cpu/kernels/CpuConcatenateWidthKernel.h
index afdc3cc..f64191e 100644
--- a/src/core/cpu/kernels/CpuConcatenateWidthKernel.h
+++ b/src/core/cpu/kernels/CpuConcatenateWidthKernel.h
@@ -22,8 +22,8 @@
  * SOFTWARE.
  */
 
-#ifndef ARM_COMPUTE_CPU_CONCATENATEWIDTH_KERNEL_H
-#define ARM_COMPUTE_CPU_CONCATENATEWIDTH_KERNEL_H
+#ifndef ARM_COMPUTE_CPU_CONCATENATE_WIDTH_KERNEL_H
+#define ARM_COMPUTE_CPU_CONCATENATE_WIDTH_KERNEL_H
 
 #include "src/core/common/Macros.h"
 #include "src/core/cpu/ICpuKernel.h"
@@ -40,7 +40,7 @@
 class CpuConcatenateWidthKernel : public ICPPKernel
 {
 public:
-    CpuConcatenateWidthKernel();
+    CpuConcatenateWidthKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuConcatenateWidthKernel);
     /** Configure kernel for a given list of arguments
      *
@@ -49,11 +49,9 @@
      * @param[in,out] dst          Destination tensor info. Data types supported: Same as @p src.
      */
     void configure(const ITensorInfo *src, unsigned int width_offset, ITensorInfo *dst);
-    /**  Static function to check if given info will lead to a valid configuration of @ref CpuConcatenateWidthKernel
+    /** Static function to check if given info will lead to a valid configuration
      *
-     * @param[in] src          Source tensor info. Data types supported: All
-     * @param[in] width_offset The offset on the X axis.
-     * @param[in] dst          Destination tensor info. Data types supported: Same as @p src.
+     * Similar to @ref CpuConcatenateWidthKernel::configure()
      *
      * @return a status
      */
@@ -64,9 +62,9 @@
     const char *name() const override;
 
 private:
-    unsigned int _width_offset;
+    unsigned int _width_offset{ 0 };
 };
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_CONCATENATEWIDTH_KERNEL_H */
+#endif /* ARM_COMPUTE_CPU_CONCATENATE_WIDTH_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp b/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp
index d91ee64..5bf70dc 100644
--- a/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp
+++ b/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp
@@ -34,11 +34,6 @@
 {
 namespace kernels
 {
-CpuConvertFullyConnectedWeightsKernel::CpuConvertFullyConnectedWeightsKernel()
-    : _factor1(0), _factor2(0)
-{
-}
-
 void CpuConvertFullyConnectedWeightsKernel::configure(const ITensorInfo *src, ITensorInfo *dst, const TensorShape &original_input_shape,
                                                       DataLayout data_layout)
 
diff --git a/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h b/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
index c867e3d..3ba3162 100644
--- a/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
+++ b/src/core/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_CONVERTFULLYCONNECTEDWEIGHTS_KERNEL_H
-#define ARM_COMPUTE_CPU_CONVERTFULLYCONNECTEDWEIGHTS_KERNEL_H
+#ifndef ARM_COMPUTE_CPU_CONVERT_FULLYCONNECTED_WEIGHTS_KERNEL_H
+#define ARM_COMPUTE_CPU_CONVERT_FULLYCONNECTED_WEIGHTS_KERNEL_H
 
 #include "src/core/common/Macros.h"
 #include "src/core/cpu/ICpuKernel.h"
@@ -44,8 +44,7 @@
 class CpuConvertFullyConnectedWeightsKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
-    CpuConvertFullyConnectedWeightsKernel();
+    CpuConvertFullyConnectedWeightsKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuConvertFullyConnectedWeightsKernel);
     /** Set the src and dst tensor.
      *
@@ -55,12 +54,9 @@
      * @param[in] data_layout          The data layout the weights have been trained in.
      */
     void configure(const ITensorInfo *src, ITensorInfo *dst, const TensorShape &original_input_shape, DataLayout data_layout);
-    /** Static function to check if given info will lead to a valid configuration of @ref CpuConvertFullyConnectedWeightsKernel
+    /** Static function to check if given info will lead to a valid configuration
      *
-     * @param[in] src                  Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
-     * @param[in] dst                  The converted weights tensor info. Shape and Data Type: Same as @p src.
-     * @param[in] original_input_shape Shape of the original src tensor (the one entering fully connected layer).
-     * @param[in] data_layout          The data layout the weights have been trained in.
+     * Similar to @ref CpuConvertFullyConnectedWeightsKernel::configure()
      *
      * @return a status
      */
@@ -71,8 +67,8 @@
     const char *name() const override;
 
 private:
-    unsigned int _factor1; /*  equals to the number of elements per original src plane if @p data_layout == NCHW; its number of channels otherwise */
-    unsigned int _factor2; /*  equals to the number of elements per original src plane if @p data_layout == NHWC; its number of channels otherwise */
+    unsigned int _factor1{ 0 }; /* equals to the number of elements per original src plane if @p data_layout == NCHW; its number of channels otherwise */
+    unsigned int _factor2{ 0 }; /* equals to the number of elements per original src plane if @p data_layout == NHWC; its number of channels otherwise */
 
     /** Template function to run the permute
      *
@@ -86,4 +82,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /*ARM_COMPUTE_CPU_CONVERTFULLYCONNECTEDWEIGHTS_KERNEL_H */
\ No newline at end of file
+#endif /* ARM_COMPUTE_CPU_CONVERT_FULLYCONNECTED_WEIGHTS_KERNEL_H */
\ No newline at end of file
diff --git a/src/core/cpu/kernels/CpuCopyKernel.h b/src/core/cpu/kernels/CpuCopyKernel.h
index 98b79a9..e2f1ed6 100644
--- a/src/core/cpu/kernels/CpuCopyKernel.h
+++ b/src/core/cpu/kernels/CpuCopyKernel.h
@@ -46,11 +46,9 @@
      * @param[in]  padding (Optional) Padding to be applied to the input tensor
      */
     void configure(const ITensorInfo *src, ITensorInfo *dst, const PaddingList &padding = PaddingList());
-    /** Static function to check if given info will lead to a valid configuration of @ref CpuCopyKernel
+    /** Static function to check if given info will lead to a valid configuration
      *
-     * @param[in] src     Source tensor. Data types supported: All
-     * @param[in] dst     Destination tensor. Data types supported: same as @p src.
-     * @param[in] padding (Optional) Padding to be applied to the input tensor
+     * Similar to @ref CpuCopyKernel::configure()
      *
      * @return a status
      */
diff --git a/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp b/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp
index eac9baa..5530eba 100644
--- a/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp
+++ b/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp
@@ -803,11 +803,6 @@
 }
 } // namespace
 
-CpuDepthwiseConv2dNativeKernel::CpuDepthwiseConv2dNativeKernel()
-    : _func(), _conv_info(), _depth_multiplier(1), _dilation(), _output_multiplier(), _output_shift(), _has_biases()
-{
-}
-
 void CpuDepthwiseConv2dNativeKernel::configure(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *biases, ITensorInfo *dst, const ConvolutionInfo &info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
@@ -945,6 +940,11 @@
     auto       dst     = tensors.get_tensor(TensorType::ACL_DST);
     (this->*_func)(src, weights, biases, dst, window, _has_biases);
 }
+
+const char *CpuDepthwiseConv2dNativeKernel::name() const
+{
+    return "CpuDepthwiseConv2dNativeKernel";
+}
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
diff --git a/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h b/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h
index 559c46d..eb7041f 100644
--- a/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h
+++ b/src/core/cpu/kernels/CpuDepthwiseConv2dNativeKernel.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_DEPTHWISECONV2DNATIVEKERNEL_H
-#define ARM_COMPUTE_CPU_DEPTHWISECONV2DNATIVEKERNEL_H
+#ifndef ARM_COMPUTE_CPU_DEPTHWISE_CONV2D_NATIVE_KERNEL_H
+#define ARM_COMPUTE_CPU_DEPTHWISE_CONV2D_NATIVE_KERNEL_H
 
 #include "arm_compute/core/utils/misc/Traits.h"
 #include "src/core/common/Macros.h"
@@ -43,12 +43,7 @@
 class CpuDepthwiseConv2dNativeKernel : public ICpuKernel
 {
 public:
-    const char *name() const override
-    {
-        return "CpuDepthwiseConv2dNativeKernel";
-    }
-    /** Default constructor */
-    CpuDepthwiseConv2dNativeKernel();
+    CpuDepthwiseConv2dNativeKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDepthwiseConv2dNativeKernel);
 
     /** Initialize the function's source, destination and parameters.
@@ -75,6 +70,7 @@
 
     // Inherited methods overridden:
     void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+    const char *name() const override;
 
 private:
     template <typename T>
@@ -95,15 +91,15 @@
      */
     using DepthwiseFunctionPtr = void (CpuDepthwiseConv2dNativeKernel::*)(const ITensor *src, const ITensor *weights, const ITensor *bias, ITensor *dst, const Window &window, bool has_biases);
 
-    DepthwiseFunctionPtr _func;
-    PadStrideInfo        _conv_info;
-    unsigned int         _depth_multiplier;
-    Size2D               _dilation;
-    std::vector<int>     _output_multiplier;
-    std::vector<int>     _output_shift;
-    bool                 _has_biases;
+    DepthwiseFunctionPtr _func{ nullptr };
+    PadStrideInfo        _conv_info{};
+    unsigned int         _depth_multiplier{ 1 };
+    Size2D               _dilation{};
+    std::vector<int>     _output_multiplier{};
+    std::vector<int>     _output_shift{};
+    bool                 _has_biases{ false };
 };
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_DEPTHWISECONV2DNATIVEKERNEL_H */
+#endif /* ARM_COMPUTE_CPU_DEPTHWISE_CONV2D_NATIVE_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuDequantizeKernel.h b/src/core/cpu/kernels/CpuDequantizeKernel.h
index 798f32c..e80aa3a 100644
--- a/src/core/cpu/kernels/CpuDequantizeKernel.h
+++ b/src/core/cpu/kernels/CpuDequantizeKernel.h
@@ -37,7 +37,6 @@
 class CpuDequantizeKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
     CpuDequantizeKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDequantizeKernel);
     /** Set input, output tensors.
diff --git a/src/core/cpu/kernels/CpuDirectConv2dKernel.h b/src/core/cpu/kernels/CpuDirectConv2dKernel.h
index 62ed96f..9bef1c4 100644
--- a/src/core/cpu/kernels/CpuDirectConv2dKernel.h
+++ b/src/core/cpu/kernels/CpuDirectConv2dKernel.h
@@ -21,15 +21,14 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_DIRECTCONV2D_KERNEL_H
-#define ARM_COMPUTE_CPU_DIRECTCONV2D_KERNEL_H
+#ifndef ARM_COMPUTE_CPU_DIRECT_CONV2D_KERNEL_H
+#define ARM_COMPUTE_CPU_DIRECT_CONV2D_KERNEL_H
 
 #include "src/core/common/Macros.h"
 #include "src/core/cpu/ICpuKernel.h"
 
 namespace arm_compute
 {
-class ITensor;
 namespace cpu
 {
 namespace kernels
@@ -38,7 +37,6 @@
 class CpuDirectConv2dKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
     CpuDirectConv2dKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDirectConv2dKernel);
     /** Set the src, weights, and dst tensors.
diff --git a/src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h b/src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h
index 62bc5d4..749411c 100644
--- a/src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h
+++ b/src/core/cpu/kernels/CpuDirectConv2dOutputStageKernel.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_DIRECTCONV2D_OUTPUTSTAGE_KERNEL_H
-#define ARM_COMPUTE_CPU_DIRECTCONV2D_OUTPUTSTAGE_KERNEL_H
+#ifndef ARM_COMPUTE_CPU_DIRECT_CONV2D_OUTPUT_STAGE_KERNEL_H
+#define ARM_COMPUTE_CPU_DIRECT_CONV2D_OUTPUT_STAGE_KERNEL_H
 
 #include "arm_compute/core/KernelDescriptors.h"
 #include "src/core/common/Macros.h"
@@ -30,7 +30,6 @@
 
 namespace arm_compute
 {
-class ITensor;
 namespace cpu
 {
 namespace kernels
@@ -44,7 +43,6 @@
 class CpuDirectConv2dOutputStageKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
     CpuDirectConv2dOutputStageKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuDirectConv2dOutputStageKernel);
     /** Set the accumulate buffer and the biases of the kernel.
@@ -84,4 +82,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /*ARM_COMPUTE_CPU_DIRECTCONV2D_OUTPUTSTAGE_KERNEL_H */
+#endif /* ARM_COMPUTE_CPU_DIRECT_CONV2D_OUTPUT_STAGE_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuElementwiseKernel.h b/src/core/cpu/kernels/CpuElementwiseKernel.h
index 50c8d29..75137da 100644
--- a/src/core/cpu/kernels/CpuElementwiseKernel.h
+++ b/src/core/cpu/kernels/CpuElementwiseKernel.h
@@ -29,7 +29,6 @@
 
 namespace arm_compute
 {
-class ITensor;
 namespace cpu
 {
 namespace kernels
@@ -89,7 +88,6 @@
 class CpuArithmeticKernel : public CpuElementwiseKernel
 {
 public:
-    /** Default constructor */
     CpuArithmeticKernel() = default;
 
     /** Configure kernel
@@ -130,7 +128,6 @@
 class CpuDivisionKernel : public CpuArithmeticKernel
 {
 public:
-    /** Default constructor */
     CpuDivisionKernel() = default;
 
     /** Configure kernel
@@ -157,7 +154,6 @@
 class CpuPowerKernel : public CpuArithmeticKernel
 {
 public:
-    /** Default constructor */
     CpuPowerKernel() = default;
 
     /** Configure kernel
@@ -184,7 +180,6 @@
 class CpuComparisonKernel : public CpuElementwiseKernel
 {
 public:
-    /** Default constructor */
     CpuComparisonKernel() = default;
 
     /** Configure kernel
diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
index 91fa75e..56e3297 100644
--- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
+++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.cpp
@@ -105,11 +105,6 @@
 }
 } // namespace
 
-CpuElementwiseUnaryKernel::CpuElementwiseUnaryKernel()
-    : _op()
-{
-}
-
 void CpuElementwiseUnaryKernel::configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst)
 {
     ARM_COMPUTE_ERROR_THROW_ON(validate(op, src, dst));
@@ -169,6 +164,11 @@
     ARM_COMPUTE_ERROR_ON(func == nullptr);
     func(src, dst, window, _op);
 }
+
+const char *CpuElementwiseUnaryKernel::name() const
+{
+    return "CpuElementwiseUnaryKernel";
+}
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
diff --git a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h
index ceb90dc..43c6ede 100644
--- a/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h
+++ b/src/core/cpu/kernels/CpuElementwiseUnaryKernel.h
@@ -30,7 +30,6 @@
 
 namespace arm_compute
 {
-class ITensor;
 namespace cpu
 {
 namespace kernels
@@ -39,19 +38,11 @@
  *
  * Element-wise operation is computed by:
  * @f[ dst(x) = OP(src(x))@f]
- *
  */
 class CpuElementwiseUnaryKernel : public ICpuKernel
 {
 public:
-    const char *name() const override
-    {
-        return "CpuElementwiseUnaryKernel";
-    }
-    /** Default constructor */
-    CpuElementwiseUnaryKernel();
-    /** Default destructor */
-    ~CpuElementwiseUnaryKernel() = default;
+    CpuElementwiseUnaryKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuElementwiseUnaryKernel);
 
     /** Function to configure the @ref CpuElementwiseUnaryKernel
@@ -61,19 +52,17 @@
      * @param[out] dst Output tensor. Data types supported: Same as @p src.
      */
     void configure(ElementWiseUnary op, const ITensorInfo &src, ITensorInfo &dst);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref CpuElementwiseUnaryKernel
+    /** Static function to check if given info will lead to a valid configuration
      *
-     * @param[in] op  Arithmetic operation to be executed.
-     * @param[in] src First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
-     * @param[in] dst Output tensor info. Data types supported: Same as @p src.
+     * Similar to CpuElementwiseUnaryKernel::configure()
      *
-     * @return a Status
+     * @return a status
      */
     static Status validate(ElementWiseUnary op, const ITensorInfo &src, const ITensorInfo &dst);
 
     // Inherited methods overridden:
     void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
+    const char *name() const override;
 
     /** Common signature for all the specialised elementwise unary micro-kernels
      *
@@ -82,7 +71,7 @@
     using ElementwiseUnaryUkernelPtr = std::add_pointer<void(const ITensor *, ITensor *, const Window &, ElementWiseUnary)>::type;
 
 private:
-    ElementWiseUnary _op;
+    ElementWiseUnary _op{};
 };
 } // namespace kernels
 } // namespace cpu
diff --git a/src/core/cpu/kernels/CpuGemmInterleave4x4Kernel.h b/src/core/cpu/kernels/CpuGemmInterleave4x4Kernel.h
index 243aab9..8f1a543 100644
--- a/src/core/cpu/kernels/CpuGemmInterleave4x4Kernel.h
+++ b/src/core/cpu/kernels/CpuGemmInterleave4x4Kernel.h
@@ -55,7 +55,6 @@
 class CpuGemmInterleave4x4Kernel : public ICpuKernel
 {
 public:
-    /** Default Constructor */
     CpuGemmInterleave4x4Kernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuGemmInterleave4x4Kernel);
     /** Initialise the kernel's src and dst.
@@ -79,4 +78,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /*ARM_COMPUTE_CPU_GEMM_INTERLEAVE4x4_KERNEL_H*/
+#endif /* ARM_COMPUTE_CPU_GEMM_INTERLEAVE4x4_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h b/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h
index 646242d..f3cdbdc 100644
--- a/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h
+++ b/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWNINT32_SCALE_KERNEL_H
-#define ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWNINT32_SCALE_KERNEL_H
+#ifndef ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWN_INT32_SCALE_KERNEL_H
+#define ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWN_INT32_SCALE_KERNEL_H
 
 #include "arm_compute/core/KernelDescriptors.h"
 #include "src/core/common/Macros.h"
@@ -30,6 +30,7 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class ITensor;
 namespace cpu
 {
@@ -53,7 +54,6 @@
 class CpuGemmLowpQuantizeDownInt32ScaleKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
     CpuGemmLowpQuantizeDownInt32ScaleKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuGemmLowpQuantizeDownInt32ScaleKernel);
     /** Initialise the kernel's input and output.
@@ -104,4 +104,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWNINT32_SCALE_KERNEL_H */
+#endif /* ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWN_INT32_SCALE_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
index 4d743e9..7a1197d 100644
--- a/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
+++ b/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWNINT32TOINT16_SCALEBYFIXEDPOINT_KERNEL_H
-#define ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWNINT32TOINT16_SCALEBYFIXEDPOINT_KERNEL_H
+#ifndef ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWN_INT32TOINT16_SCALEBYFIXEDPOINT_KERNEL_H
+#define ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWN_INT32TOINT16_SCALEBYFIXEDPOINT_KERNEL_H
 
 #include "arm_compute/core/KernelDescriptors.h"
 #include "src/core/common/Macros.h"
@@ -30,6 +30,7 @@
 
 namespace arm_compute
 {
+// Forward declaration
 class ITensor;
 namespace cpu
 {
@@ -50,7 +51,6 @@
 class CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
     CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel);
     /** Initialise the kernel's input and output.
@@ -108,4 +108,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWNINT32TOINT16_SCALEBYFIXEDPOINT_KERNEL_H */
+#endif /* ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWN_INT32TOINT16_SCALEBYFIXEDPOINT_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
index a941f1f..9ebb529 100644
--- a/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
+++ b/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWNINT32TOINT8_SCALEBYFIXEDPOINT_KERNEL_H
-#define ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWNINT32TOINT8_SCALEBYFIXEDPOINT_KERNEL_H
+#ifndef ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWN_INT32TOINT8_SCALEBYFIXEDPOINT_KERNEL_H
+#define ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWN_INT32TOINT8_SCALEBYFIXEDPOINT_KERNEL_H
 
 #include "arm_compute/core/KernelDescriptors.h"
 #include "src/core/common/Macros.h"
@@ -30,6 +30,7 @@
 
 namespace arm_compute
 {
+// Forward declaration
 class ITensor;
 namespace cpu
 {
@@ -51,7 +52,6 @@
 class CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
     CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel);
     /** Initialise the kernel's input and output.
@@ -111,4 +111,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWNINT32TOINT8_SCALEBYFIXEDPOINT_KERNEL_H */
+#endif /* ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWN_INT32TOINT8_SCALEBYFIXEDPOINT_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
index 9b4c056..312cad9 100644
--- a/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
+++ b/src/core/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWNINT32TOUINT8_SCALEBYFIXEDPOINT_KERNEL_H
-#define ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWNINT32TOUINT8_SCALEBYFIXEDPOINT_KERNEL_H
+#ifndef ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWN_INT32TOUINT8_SCALEBYFIXEDPOINT_KERNEL_H
+#define ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWN_INT32TOUINT8_SCALEBYFIXEDPOINT_KERNEL_H
 
 #include "arm_compute/core/KernelDescriptors.h"
 #include "src/core/common/Macros.h"
@@ -30,6 +30,7 @@
 
 namespace arm_compute
 {
+// Forward declaration
 class ITensor;
 namespace cpu
 {
@@ -51,7 +52,6 @@
 class CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
     CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuGemmLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel);
     /** Initialise the kernel's input and output.
@@ -105,4 +105,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWNINT32TOUINT8_SCALEBYFIXEDPOINT_KERNEL_H */
+#endif /* ARM_COMPUTE_CPU_GEMMLOWP_QUANTIZEDOWN_INT32TOUINT8_SCALEBYFIXEDPOINT_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuGemmMatrixAdditionKernel.h b/src/core/cpu/kernels/CpuGemmMatrixAdditionKernel.h
index c8e6fa9..f9450b9 100644
--- a/src/core/cpu/kernels/CpuGemmMatrixAdditionKernel.h
+++ b/src/core/cpu/kernels/CpuGemmMatrixAdditionKernel.h
@@ -44,7 +44,6 @@
 class CpuGemmMatrixAdditionKernel : public ICpuKernel
 {
 public:
-    /** Constructor */
     CpuGemmMatrixAdditionKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuGemmMatrixAdditionKernel);
     /** Initialise the kernel's input and output.
diff --git a/src/core/cpu/kernels/CpuGemmMatrixMultiplyKernel.h b/src/core/cpu/kernels/CpuGemmMatrixMultiplyKernel.h
index bf13342..974ff85 100644
--- a/src/core/cpu/kernels/CpuGemmMatrixMultiplyKernel.h
+++ b/src/core/cpu/kernels/CpuGemmMatrixMultiplyKernel.h
@@ -42,7 +42,6 @@
 class CpuGemmMatrixMultiplyKernel : public ICpuKernel
 {
 public:
-    /** Constructor */
     CpuGemmMatrixMultiplyKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuGemmMatrixMultiplyKernel);
     /** Initialise the kernel's input and output.
@@ -89,4 +88,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /*ARM_COMPUTE_CPU_GEMM_MATRIX_MULTIPLY_KERNEL_H*/
+#endif /* ARM_COMPUTE_CPU_GEMM_MATRIX_MULTIPLY_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuGemmTranspose1xWKernel.h b/src/core/cpu/kernels/CpuGemmTranspose1xWKernel.h
index c9c22bd..1a9287f 100644
--- a/src/core/cpu/kernels/CpuGemmTranspose1xWKernel.h
+++ b/src/core/cpu/kernels/CpuGemmTranspose1xWKernel.h
@@ -71,7 +71,6 @@
 class CpuGemmTranspose1xWKernel : public ICpuKernel
 {
 public:
-    /** Constructor */
     CpuGemmTranspose1xWKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuGemmTranspose1xWKernel);
     /** Configure kernel for a given list of arguments
@@ -95,4 +94,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /*ARM_COMPUTE_CPU_GEMM_TRANSPOSE1xW_KERNEL_H */
+#endif /* ARM_COMPUTE_CPU_GEMM_TRANSPOSE1xW_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuMulKernel.h b/src/core/cpu/kernels/CpuMulKernel.h
index 3e667bc..3ea176c 100644
--- a/src/core/cpu/kernels/CpuMulKernel.h
+++ b/src/core/cpu/kernels/CpuMulKernel.h
@@ -37,7 +37,6 @@
 class CpuMulKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
     CpuMulKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuMulKernel);
     /** Initialise the kernel's input, dst and border mode.
@@ -122,7 +121,6 @@
 class CpuComplexMulKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
     CpuComplexMulKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuComplexMulKernel);
     /** Initialise the kernel's src, dst and border mode.
diff --git a/src/core/cpu/kernels/CpuPermuteKernel.h b/src/core/cpu/kernels/CpuPermuteKernel.h
index 9c59d5b..2955f38 100644
--- a/src/core/cpu/kernels/CpuPermuteKernel.h
+++ b/src/core/cpu/kernels/CpuPermuteKernel.h
@@ -48,13 +48,9 @@
      * @param[in]  perm Permutation vector
      */
     void configure(const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm);
-    /** Static function to check if given info will lead to a valid configuration of @ref CpuPermuteKernel
+    /** Static function to check if given info will lead to a valid configuration
      *
-     * @note Arbitrary permutation vectors are supported with rank not greater than 4
-     *
-     * @param[in] src  Source tensor to permute. Data types supported: All
-     * @param[in] dst  Destination tensor. Data types supported: Same as @p src
-     * @param[in] perm Permutation vector
+     * Similar to @ref CpuPermuteKernel::configure()
      *
      * @return a status
      */
diff --git a/src/core/cpu/kernels/CpuPool2dKernel.h b/src/core/cpu/kernels/CpuPool2dKernel.h
index ff7d7bb..9ed398b 100644
--- a/src/core/cpu/kernels/CpuPool2dKernel.h
+++ b/src/core/cpu/kernels/CpuPool2dKernel.h
@@ -38,7 +38,6 @@
 class CpuPool2dKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
     CpuPool2dKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuPool2dKernel);
     /** Configure kernel for a given list of arguments
@@ -80,4 +79,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /*ARM_COMPUTE_CPU_POOL2D_KERNEL_H */
+#endif /* ARM_COMPUTE_CPU_POOL2D_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuQuantizeKernel.h b/src/core/cpu/kernels/CpuQuantizeKernel.h
index d3422d3..834a2e0 100644
--- a/src/core/cpu/kernels/CpuQuantizeKernel.h
+++ b/src/core/cpu/kernels/CpuQuantizeKernel.h
@@ -40,7 +40,6 @@
 class CpuQuantizeKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
     CpuQuantizeKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuQuantizeKernel);
     /** Set the input, output.
diff --git a/src/core/cpu/kernels/CpuReshapeKernel.h b/src/core/cpu/kernels/CpuReshapeKernel.h
index add6782..1425fbe 100644
--- a/src/core/cpu/kernels/CpuReshapeKernel.h
+++ b/src/core/cpu/kernels/CpuReshapeKernel.h
@@ -46,10 +46,9 @@
      */
     void configure(const ITensorInfo *src, ITensorInfo *dst);
 
-    /** Static function to check if given info will lead to a valid configuration of @ref CpuReshapeKernel
+    /** Static function to check if given info will lead to a valid configuration
      *
-     * @param[in] src Source tensor info. Data type supported: All
-     * @param[in] dst Destination tensor info. Data type supported: Same as @p src
+     * Similar to @ref CpuReshapeKernel::configure()
      *
      * @return a status
      */
diff --git a/src/core/cpu/kernels/CpuScaleKernel.h b/src/core/cpu/kernels/CpuScaleKernel.h
index afaf074..a2b6537 100644
--- a/src/core/cpu/kernels/CpuScaleKernel.h
+++ b/src/core/cpu/kernels/CpuScaleKernel.h
@@ -38,7 +38,6 @@
 class CpuScaleKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
     CpuScaleKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuScaleKernel);
     /** Initialise the kernel's inputs, output and interpolation policy
@@ -106,4 +105,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /*ARM_COMPUTE_CPU_SCALEKERNEL_H */
+#endif /* ARM_COMPUTE_CPU_SCALEKERNEL_H */
diff --git a/src/core/cpu/kernels/CpuSoftmaxKernel.h b/src/core/cpu/kernels/CpuSoftmaxKernel.h
index 2912098..776c0d6 100644
--- a/src/core/cpu/kernels/CpuSoftmaxKernel.h
+++ b/src/core/cpu/kernels/CpuSoftmaxKernel.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_SOFTMAXKERNEL_H
-#define ARM_COMPUTE_CPU_SOFTMAXKERNEL_H
+#ifndef ARM_COMPUTE_CPU_SOFTMAX_KERNEL_H
+#define ARM_COMPUTE_CPU_SOFTMAX_KERNEL_H
 
 #include "src/core/common/Macros.h"
 #include "src/core/cpu/ICpuKernel.h"
@@ -37,7 +37,6 @@
 class CpuLogits1DMaxKernel : public ICpuKernel
 {
 public:
-    /** Constructor */
     CpuLogits1DMaxKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuLogits1DMaxKernel);
     /** Set the input and output tensors.
@@ -71,7 +70,6 @@
 class CpuLogits1DSoftmaxKernel : public ICpuKernel
 {
 public:
-    /** Default constructor */
     CpuLogits1DSoftmaxKernel() = default;
     ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(CpuLogits1DSoftmaxKernel);
 
@@ -110,4 +108,4 @@
 } // namespace kernels
 } // namespace cpu
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_CPU_SOFTMAXKERNEL_H */
+#endif /* ARM_COMPUTE_CPU_SOFTMAX_KERNEL_H */
diff --git a/src/core/cpu/kernels/CpuTransposeKernel.h b/src/core/cpu/kernels/CpuTransposeKernel.h
index f09f427..920349d 100644
--- a/src/core/cpu/kernels/CpuTransposeKernel.h
+++ b/src/core/cpu/kernels/CpuTransposeKernel.h
@@ -45,10 +45,9 @@
      * @param[out] dst Destination tensor. Data types supported: Same as @p src
      */
     void configure(const ITensorInfo *src, ITensorInfo *dst);
-    /** Static function to check if given info will lead to a valid configuration of @ref CpuTransposeKernel
+    /** Static function to check if given info will lead to a valid configuration
      *
-     * @param[in] src Source tensor to permute. Data types supported: All
-     * @param[in] dst Destination tensor. Data types supported: Same as @p src
+     * Similar to CpuTransposeKernel::configure()
      *
      * @return a status
      */