COMPMID-1691: Optimize CLDepthwiseConvolutionKernel (QASYMM8/NHWC) for 3x3 kernels (stride=1 and stride=2)
Change-Id: I7d0d2dc350feeb40d253d17f9ffd5051a8fb42ef
Reviewed-on: https://review.mlplatform.org/511
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 317c899..9fbd0ef 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1745,6 +1745,12 @@
const bool _reinterpret_input_as_3d;
};
+struct DepthwiseConvolutionReshapeInfo
+{
+ unsigned int c0{ 1 }; /**< Number of channels processed by the depth-wise convolution */
+ bool transpose{ false }; /**< True if the block MxC0 (where M is the area of the filter i.e. KwxKh) has to be transposed */
+};
+
/** GEMMLowp output stage type */
enum class GEMMLowpOutputStageType
{