COMPMID-1691: Optimize CLDepthwiseConvolutionKernel (QASYMM8/NHWC) for 3x3 kernels (stride=1 and stride=2)

Change-Id: I7d0d2dc350feeb40d253d17f9ffd5051a8fb42ef
Reviewed-on: https://review.mlplatform.org/511
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
index 85fbaae..2fc9780 100644
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,11 +53,11 @@
                    ActivationLayerInfo act_info) override;
     /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel
      *
-     * @param[in] input            Source tensor. DataType supported: QASYMM8.
-     * @param[in] weights          Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input.
-     * @param[in] biases           (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+     * @param[in] input            Source tensor info. DataType supported: QASYMM8.
+     * @param[in] weights          Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input.
+     * @param[in] biases           (Optional) Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
      *                             Data type supported: Same as @p input.
-     * @param[in] output           Destination tensor. Data type supported: Same as @p input.
+     * @param[in] output           Destination tensor info. Data type supported: Same as @p input.
      * @param[in] conv_info        Padding and stride information to use for the convolution.
      * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
      * @param[in] act_info         (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported.
@@ -67,6 +67,7 @@
     static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier,
                            ActivationLayerInfo act_info = ActivationLayerInfo());
 
+    // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
     BorderSize border_size() const override;