Blame - arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h - ml/ComputeLibrary

blob: 98581a21feb5b22994e20bb00523324b22ee0ea0 [file] [log] [blame]

Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	1	/*
giuros01	6d10996	2019-01-07 17:47:19 +0000	[diff] [blame]	2	* Copyright (c) 2017-2019 ARM Limited.
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
Giorgio Arena	9fe4144	2017-08-23 16:36:24 +0100	[diff] [blame]	24	#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__
				25	#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	26
Giorgio Arena	dfca60b	2018-01-31 10:30:59 +0000	[diff] [blame]	27	#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h"
				28	#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h"
Michele Di Giorgio	a046e16	2019-10-08 09:36:26 +0100	[diff] [blame^]	29	#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.h"
giuros01	6d10996	2019-01-07 17:47:19 +0000	[diff] [blame]	30	#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h"
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	31	#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
Giorgio Arena	dfca60b	2018-01-31 10:30:59 +0000	[diff] [blame]	32	#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h"
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	33	#include "arm_compute/core/Types.h"
				34	#include "arm_compute/runtime/CL/CLTensor.h"
Georgios Pinitas	05045c1	2018-12-07 18:31:47 +0000	[diff] [blame]	35	#include "arm_compute/runtime/CL/functions/CLPermute.h"
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	36	#include "arm_compute/runtime/IFunction.h"
Georgios Pinitas	26014cf	2019-09-09 19:00:57 +0100	[diff] [blame]	37	#include "arm_compute/runtime/MemoryGroup.h"
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	38
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	39	namespace arm_compute
				40	{
				41	class ICLTensor;
				42
Giorgio Arena	dfca60b	2018-01-31 10:30:59 +0000	[diff] [blame]	43	/** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels:
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	44	*
Giorgio Arena	dfca60b	2018-01-31 10:30:59 +0000	[diff] [blame]	45	* -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW)
				46	* -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC)
giuros01	6d10996	2019-01-07 17:47:19 +0000	[diff] [blame]	47	* -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC)
Giorgio Arena	9fe4144	2017-08-23 16:36:24 +0100	[diff] [blame]	48	* -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	49	*
				50	*/
Giorgio Arena	04a8f8c	2017-11-23 11:45:24 +0000	[diff] [blame]	51	class CLDepthwiseConvolutionLayer3x3 : public IFunction
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	52	{
				53	public:
				54	/** Default constructor */
Georgios Pinitas	05045c1	2018-12-07 18:31:47 +0000	[diff] [blame]	55	CLDepthwiseConvolutionLayer3x3(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
				56	/** Prevent instances of this class from being copied (As this class contains pointers) */
				57	CLDepthwiseConvolutionLayer3x3(const CLDepthwiseConvolutionLayer3x3 &) = delete;
				58	/** Default move constructor */
				59	CLDepthwiseConvolutionLayer3x3(CLDepthwiseConvolutionLayer3x3 &&) = default;
				60	/** Prevent instances of this class from being copied (As this class contains pointers) */
				61	CLDepthwiseConvolutionLayer3x3 &operator=(const CLDepthwiseConvolutionLayer3x3 &) = delete;
				62	/** Default move assignment operator */
				63	CLDepthwiseConvolutionLayer3x3 &operator=(CLDepthwiseConvolutionLayer3x3 &&) = default;
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	64	/** Initialize the function's source, destination, conv and border_size.
				65	*
Giorgio Arena	7657224	2018-04-04 17:44:26 +0100	[diff] [blame]	66	* @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling).
				67	* @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
Usama Arif	e73686a	2019-04-08 17:30:48 +0100	[diff] [blame]	68	* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Giorgio Arena	7657224	2018-04-04 17:44:26 +0100	[diff] [blame]	69	* Data type supported: Same as @p input.
				70	* @param[out] output Destination tensor. Data type supported: same as @p input.
				71	* @param[in] conv_info Padding and stride information to use for the convolution.
				72	* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
				73	* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
Usama Arif	e73686a	2019-04-08 17:30:48 +0100	[diff] [blame]	74	* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	75	*/
Giorgio Arena	7657224	2018-04-04 17:44:26 +0100	[diff] [blame]	76	void configure(ICLTensor input, const ICLTensor weights, const ICLTensor biases, ICLTensor output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
Usama Arif	e73686a	2019-04-08 17:30:48 +0100	[diff] [blame]	77	ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	78
Giorgio Arena	ad0c738	2018-04-23 16:16:21 +0100	[diff] [blame]	79	/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3
				80	*
Usama Arif	e73686a	2019-04-08 17:30:48 +0100	[diff] [blame]	81	* @param[in] input Source tensor info. Data type supported: QASYMM8 for all layouts, F16/F32 for NCHW.
				82	* @param[in] weights Weights tensor info. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
				83	* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Giorgio Arena	ad0c738	2018-04-23 16:16:21 +0100	[diff] [blame]	84	* Data type supported: Same as @p input, S32 when input is QASYMM8.
				85	* @param[in] output Destination tensor. Data type supported: same as @p input.
				86	* @param[in] conv_info Padding and stride information to use for the convolution.
				87	* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
				88	* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported.
				89	* @param[in] gpu_target (Optional) GPU target to validate the kernel for. Defaults to midgard.
Usama Arif	e73686a	2019-04-08 17:30:48 +0100	[diff] [blame]	90	* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Giorgio Arena	ad0c738	2018-04-23 16:16:21 +0100	[diff] [blame]	91	*
				92	* @return a status
				93	*/
				94	static Status validate(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
Usama Arif	e73686a	2019-04-08 17:30:48 +0100	[diff] [blame]	95	ActivationLayerInfo act_info = ActivationLayerInfo(), GPUTarget gpu_target = GPUTarget::MIDGARD, const Size2D &dilation = Size2D(1U, 1U));
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	96	// Inherited methods overriden:
				97	void run() override;
Georgios Pinitas	05045c1	2018-12-07 18:31:47 +0000	[diff] [blame]	98	void prepare() override;
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	99
				100	private:
Georgios Pinitas	26014cf	2019-09-09 19:00:57 +0100	[diff] [blame]	101	MemoryGroup _memory_group;
Giorgio Arena	dfca60b	2018-01-31 10:30:59 +0000	[diff] [blame]	102	std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel;
				103	CLFillBorderKernel _border_handler;
Georgios Pinitas	05045c1	2018-12-07 18:31:47 +0000	[diff] [blame]	104	CLPermute _permute_input_to_nchw;
				105	CLPermute _permute_weights_to_nchw;
				106	CLPermute _permute_output_to_nhwc;
giuros01	6d10996	2019-01-07 17:47:19 +0000	[diff] [blame]	107	CLDepthwiseConvolutionLayerReshapeWeightsKernel _reshape_weights;
Georgios Pinitas	05045c1	2018-12-07 18:31:47 +0000	[diff] [blame]	108	CLTensor _permuted_input;
				109	CLTensor _permuted_weights;
				110	CLTensor _permuted_output;
				111	const ITensor *_original_weights;
				112	bool _needs_permute;
giuros01	6d10996	2019-01-07 17:47:19 +0000	[diff] [blame]	113	bool _needs_weights_reshape;
Georgios Pinitas	05045c1	2018-12-07 18:31:47 +0000	[diff] [blame]	114	bool _is_prepared;
Giorgio Arena	9fe4144	2017-08-23 16:36:24 +0100	[diff] [blame]	115	};
				116
				117	/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
				118	*
Michele Di Giorgio	a046e16	2019-10-08 09:36:26 +0100	[diff] [blame^]	119	* -# @ref CLDepthwiseConvolutionLayerNativeKernel
				120	* -# @ref CLPermute (x 3) if the data layout is NCHW
Giorgio Arena	9fe4144	2017-08-23 16:36:24 +0100	[diff] [blame]	121	*
				122	*/
Giorgio Arena	04a8f8c	2017-11-23 11:45:24 +0000	[diff] [blame]	123	class CLDepthwiseConvolutionLayer : public IFunction
Giorgio Arena	9fe4144	2017-08-23 16:36:24 +0100	[diff] [blame]	124	{
				125	public:
				126	/** Default constructor */
Michele Di Giorgio	a046e16	2019-10-08 09:36:26 +0100	[diff] [blame^]	127	CLDepthwiseConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
Georgios Pinitas	1562be3	2018-03-08 19:09:19 +0000	[diff] [blame]	128	/** Prevent instances of this class from being copied (As this class contains pointers) */
				129	CLDepthwiseConvolutionLayer(const CLDepthwiseConvolutionLayer &) = delete;
				130	/** Default move constructor */
				131	CLDepthwiseConvolutionLayer(CLDepthwiseConvolutionLayer &&) = default;
				132	/** Prevent instances of this class from being copied (As this class contains pointers) */
				133	CLDepthwiseConvolutionLayer &operator=(const CLDepthwiseConvolutionLayer &) = delete;
				134	/** Default move assignment operator */
				135	CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default;
Giorgio Arena	9fe4144	2017-08-23 16:36:24 +0100	[diff] [blame]	136	/** Initialize the function's source, destination, weights and convolution information.
				137	*
Giorgio Arena	7657224	2018-04-04 17:44:26 +0100	[diff] [blame]	138	* @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
				139	* @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
Usama Arif	e73686a	2019-04-08 17:30:48 +0100	[diff] [blame]	140	* @param[in] biases Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Giorgio Arena	7657224	2018-04-04 17:44:26 +0100	[diff] [blame]	141	* Data type supported: Same as @p input, S32 when input is QASYMM8.
				142	* @param[out] output Destination tensor. Data type supported: same as @p input.
				143	* @param[in] conv_info Padding and stride information to use for the convolution.
				144	* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas	60e9825	2018-10-22 16:17:20 +0100	[diff] [blame]	145	* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif	e73686a	2019-04-08 17:30:48 +0100	[diff] [blame]	146	* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Giorgio Arena	9fe4144	2017-08-23 16:36:24 +0100	[diff] [blame]	147	*/
Georgios Pinitas	60e9825	2018-10-22 16:17:20 +0100	[diff] [blame]	148	void configure(ICLTensor input, const ICLTensor weights, const ICLTensor biases, ICLTensor output, const PadStrideInfo &conv_info,
Usama Arif	e73686a	2019-04-08 17:30:48 +0100	[diff] [blame]	149	unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Giorgio Arena	9fe4144	2017-08-23 16:36:24 +0100	[diff] [blame]	150
Giorgio Arena	ad0c738	2018-04-23 16:16:21 +0100	[diff] [blame]	151	/** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer
				152	*
Usama Arif	e73686a	2019-04-08 17:30:48 +0100	[diff] [blame]	153	* @param[in] input Source tensor info. Data type supported: QASYMM8/F32.
				154	* @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
				155	* @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
Giorgio Arena	ad0c738	2018-04-23 16:16:21 +0100	[diff] [blame]	156	* Data type supported: Same as @p input, S32 when input is QASYMM8.
				157	* @param[in] output Destination tensor. Data type supported: same as @p input.
				158	* @param[in] conv_info Padding and stride information to use for the convolution.
				159	* @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
Georgios Pinitas	60e9825	2018-10-22 16:17:20 +0100	[diff] [blame]	160	* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
Usama Arif	e73686a	2019-04-08 17:30:48 +0100	[diff] [blame]	161	* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
Giorgio Arena	ad0c738	2018-04-23 16:16:21 +0100	[diff] [blame]	162	*
				163	* @return a status
				164	*/
Georgios Pinitas	60e9825	2018-10-22 16:17:20 +0100	[diff] [blame]	165	static Status validate(const ITensorInfo input, const ITensorInfo weights, const ITensorInfo biases, const ITensorInfo output, const PadStrideInfo &conv_info,
Usama Arif	e73686a	2019-04-08 17:30:48 +0100	[diff] [blame]	166	unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U));
Giorgio Arena	ad0c738	2018-04-23 16:16:21 +0100	[diff] [blame]	167
Giorgio Arena	9fe4144	2017-08-23 16:36:24 +0100	[diff] [blame]	168	// Inherited methods overriden:
				169	void run() override;
Georgios Pinitas	7221933	2018-06-05 14:56:06 +0100	[diff] [blame]	170	void prepare() override;
Giorgio Arena	9fe4144	2017-08-23 16:36:24 +0100	[diff] [blame]	171
				172	private:
Michele Di Giorgio	a046e16	2019-10-08 09:36:26 +0100	[diff] [blame^]	173	MemoryGroup _memory_group;
				174
				175	std::unique_ptr<IFunction> _optimised_function;
				176	CLDepthwiseConvolutionLayerNativeKernel _dwc_native_kernel;
				177	CLPermute _permute_input_to_nhwc;
				178	CLPermute _permute_weights_to_nhwc;
				179	CLPermute _permute_output_to_nchw;
				180
				181	CLTensor _permuted_input;
				182	CLTensor _permuted_weights;
				183	CLTensor _permuted_output;
				184	const ITensor *_original_weights;
				185
				186	bool _needs_permute;
				187	bool _is_prepared;
Giorgio Arena	93a690e	2017-08-01 16:09:33 +0100	[diff] [blame]	188	};
Georgios Pinitas	05045c1	2018-12-07 18:31:47 +0000	[diff] [blame]	189	} // namespace arm_compute
Giorgio Arena	9fe4144	2017-08-23 16:36:24 +0100	[diff] [blame]	190	#endif /__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ /