COMPMID-477 - Optimizing CLDirectConvolution 3x3 on OpenCL and added the auto configuration Change-Id: I3c8384dcbc9d7786943134bb658dafb35356d90d Reviewed-on: http://mpd-gerrit.cambridge.arm.com/83253 Reviewed-by: Steven Niu <steven.niu@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>

commit: 5cb4d6a1d0f39bf800edb43c0ec7c96dae10e132 [log] [tgz]
author: Gian Marco Iodice <gianmarco.iodice@arm.com> Tue Aug 08 10:53:00 2017 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> Fri Nov 02 16:35:24 2018 +0000
tree: f04f0b561e91a218aa3564b8582eecae4c154be7
parent: d4ab78a309f2932a87af7cd6854a0665f051077c [diff] [blame]
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
index e0dac98..5672782 100644
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h

@@ -48,6 +48,10 @@
     ~NEDirectConvolutionLayerKernel() = default;
     /** Set the input, weights, and output tensors.
      *
+     * @note: DirectConvolution only works in the following configurations:
+     *        1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3
+     *        3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3
+     *
      * @param[in]  input     The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
      *                       while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/F16/F32.
      * @param[in]  weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
commit	5cb4d6a1d0f39bf800edb43c0ec7c96dae10e132	[log] [tgz]
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	Tue Aug 08 10:53:00 2017 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	Fri Nov 02 16:35:24 2018 +0000
tree	f04f0b561e91a218aa3564b8582eecae4c154be7
parent	d4ab78a309f2932a87af7cd6854a0665f051077c [diff] [blame]