COMPMID-1065 : Create documentation explaining how to add new functions / kernels

Change-Id: I98183f95814442b6f3dbb67a1bdae99df05b9b01
diff --git a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
index c71c105..9658005 100644
--- a/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h
@@ -448,8 +448,35 @@
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
 
     // Inherited methods overridden:
+
+#ifndef DOXYGEN_SKIP_THIS
+    /** Configure the weights transform kernel.
+     *
+     * @param[in]  weights_hwio        Pointer to the weights tensor
+     * @param[out] output              Pointer to working space for the output tensor in the Winograd domain.
+     * @param[in]  matrix_stride       Stride across matrices in the output workspace.
+     * @param[in]  num_output_channels Number of filters.
+     * @param[in]  num_input_channels  Number of channels in each filter.
+     */
     void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override;
+#endif /* DOXYGEN_SKIP_THIS */
+
+    /** Determine how much memory (in units of T) to allocate for the
+     * transformed weights.
+     *
+     * @param[in] num_output_channels Number of output feature maps.
+     * @param[in] num_input_channels  Number of input feature maps.
+     *
+     * @return Storage size (in units of T) required.
+     */
     unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override;
+
+    /** Gets the stride between matrices in the input worspace
+     *
+     * @param[in] kernel_shape The shape of the weights tensor.
+     *
+     * @return Stride expressed in bytes.
+     */
     int get_matrix_stride(const KernelShape &kernel_shape) const override;
     void run(const Window &window, const ThreadInfo &info) override;
     bool is_parallelisable() const override;
diff --git a/arm_compute/core/NEON/kernels/convolution/common/shims.hpp b/arm_compute/core/NEON/kernels/convolution/common/shims.hpp
index 09e1457..243d305 100644
--- a/arm_compute/core/NEON/kernels/convolution/common/shims.hpp
+++ b/arm_compute/core/NEON/kernels/convolution/common/shims.hpp
@@ -23,7 +23,9 @@
  */
 
 #pragma once
+#ifndef DOXYGEN_SKIP_THIS
 #include <cstdint>
+#endif /* DOXYGEN_SKIP_THIS */
 #include "arm.hpp"
 
 namespace reorder {
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index ef25dc4..0f2786c 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -105,6 +105,8 @@
 /** Constant value used to indicate a ORB scaled pyramid */
 constexpr float SCALE_PYRAMID_ORB = 8.408964152537146130583778358414e-01;
 
+/** [DataLayout enum definition] **/
+
 /** Supported tensor data layouts */
 enum class DataLayout
 {
@@ -112,6 +114,7 @@
     NCHW,    /**< Num samples, channels, height, width */
     NHWC     /**< Num samples, height, width, channels */
 };
+/** [DataLayout enum definition] **/
 
 /** Supported tensor data layout dimensions */
 enum class DataLayoutDimension