Blame - src/gpu/cl/operators/ClWinogradConv2d.h - ml/ComputeLibrary - Gitiles

blob: 54ec1a173780175e1b30a65ab9a1a108dde5f363 [file] [log] [blame]

Manuel Bottini	c6f4ec3	2021-05-18 18:41:56 +0100	[diff] [blame]	1	/*
				2	* Copyright (c) 2018-2021 Arm Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#ifndef ARM_COMPUTE_CL_WINOGRADCONV2D_H
				25	#define ARM_COMPUTE_CL_WINOGRADCONV2D_H
				26
				27	#include "arm_compute/runtime/CL/CLTensor.h"
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	28
Manuel Bottini	c6f4ec3	2021-05-18 18:41:56 +0100	[diff] [blame]	29	#include "src/core/CL/kernels/CLFillBorderKernel.h"
Georgios Pinitas	7891a73	2021-08-20 21:39:25 +0100	[diff] [blame]	30	#include "src/gpu/cl/ClCompileContext.h"
				31	#include "src/gpu/cl/IClOperator.h"
				32	#include "src/gpu/cl/operators/ClGemm.h"
Manuel Bottini	c6f4ec3	2021-05-18 18:41:56 +0100	[diff] [blame]	33
				34	namespace arm_compute
				35	{
				36	class CLCompileContext;
				37	class ITensorInfo;
				38	namespace opencl
				39	{
				40	namespace kernels
				41	{
				42	class ClWinogradInputTransformKernel;
				43	class ClWinogradFilterTransformKernel;
				44	class ClWinogradOutputTransformKernel;
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	45	} // namespace kernels
Manuel Bottini	c6f4ec3	2021-05-18 18:41:56 +0100	[diff] [blame]	46	/** Basic function to execute Winograd-based convolution on OpenCL. This function calls the following OpenCL functions/kernels:
				47	*
				48	* -# @ref kernels::ClWinogradInputTransformKernel
				49	* -# @ref kernels::ClWinogradFilterTransformKernel (only once)
				50	* -# @ref ClGemm
				51	* -# @ref kernels::ClWinogradOutputTransformKernel
				52	*
				53	*/
				54	class ClWinogradConv2d : public IClOperator
				55	{
				56	public:
				57	/** Default constructor */
				58	ClWinogradConv2d();
				59	/** Default destructor */
				60	~ClWinogradConv2d();
				61	/** Prevent instances of this class from being copied (As this class contains pointers) */
				62	ClWinogradConv2d(const ClWinogradConv2d &) = delete;
				63	/** Default move constructor */
				64	ClWinogradConv2d(ClWinogradConv2d &&) = default;
				65	/** Prevent instances of this class from being copied (As this class contains pointers) */
				66	ClWinogradConv2d &operator=(const ClWinogradConv2d &) = delete;
				67	/** Default move assignment operator */
				68	ClWinogradConv2d &operator=(ClWinogradConv2d &&) = default;
				69	/** Set the input and output tensors.
				70	*
				71	* Valid data layouts:
				72	* - NHWC
				73	* - NCHW
				74	*
				75	* Valid data type configurations:
				76	* \|src0 \|src1 \|src2 \|dst \|
				77	* \|:--------------\|:--------------\|:------\|:--------------\|
				78	* \|F16 \|F16 \|F16 \|F16 \|
				79	* \|F32 \|F32 \|F32 \|F32 \|
				80	*
				81	* @note: This function only works with 3x3,3x1,1x3,5x5,5x1,1x5,7x1 and 1x7 kernels along with unit strides for both NCHW and NHWC data layout
				82	* @note Some Winograd configurations (i.e. F(4x4, 5x5)) are supported only with enable_fast_math = true
				83	*
				84	* @param[in] compile_context The compile context to be used.
				85	* @param[in] src Source tensor info. 3 lower dimensions represent a single input [width, height, IFM],
				86	* while every optional dimension from 4 and above represent a batch of inputs.
				87	* Data types supported: F16/F32.
				88	* @param[in] weights Weights tensor info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p src.
				89	* @param[in] biases Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM].Data type supported: Same as @p src
				90	* @param[out] dst Destination tensor info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
				91	* Data types supported: Same as @p src.
				92	* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
				93	* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
				94	* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
				95	* available which may introduce a drop of accuracy as well. Default is false
				96	*/
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	97	void configure(const ClCompileContext &compile_context,
				98	ITensorInfo *src,
				99	ITensorInfo *weights,
				100	ITensorInfo *biases,
				101	ITensorInfo *dst,
				102	const PadStrideInfo &conv_info,
				103	const ActivationLayerInfo &act_info = ActivationLayerInfo(),
				104	bool enable_fast_math = false);
Manuel Bottini	c6f4ec3	2021-05-18 18:41:56 +0100	[diff] [blame]	105	/** Static function to check if given info will lead to a valid configuration
				106	*
				107	* Similar to ClWinogradConv2d::configure()
				108	*
				109	* @return a status
				110	*/
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	111	static Status validate(const ITensorInfo *src,
				112	const ITensorInfo *weights,
				113	const ITensorInfo *biases,
				114	const ITensorInfo *dst,
				115	const PadStrideInfo &conv_info,
				116	const ActivationLayerInfo &act_info = ActivationLayerInfo(),
				117	bool enable_fast_math = false);
Manuel Bottini	c6f4ec3	2021-05-18 18:41:56 +0100	[diff] [blame]	118
				119	// Inherited method overridden
Felix Thomasmathibalan	afd38f0	2023-09-27 17:46:17 +0100	[diff] [blame]	120	void run(ITensorPack &tensors) override;
				121	void prepare(ITensorPack &tensors) override;
Manuel Bottini	c6f4ec3	2021-05-18 18:41:56 +0100	[diff] [blame]	122	experimental::MemoryRequirements workspace() const override;
				123
				124	private:
				125	ClGemm _batched_mm;
				126	std::unique_ptr<kernels::ClWinogradInputTransformKernel> _input_transform;
				127	std::unique_ptr<kernels::ClWinogradFilterTransformKernel> _filter_transform;
				128	std::unique_ptr<kernels::ClWinogradOutputTransformKernel> _output_transform;
				129	CLFillBorderKernel _border_handler;
				130	TensorInfo _input0;
				131	TensorInfo _input1;
				132	TensorInfo _batched_mm_output;
				133	bool _is_prepared;
				134	experimental::MemoryRequirements _aux_mem{};
				135	};
				136	} // namespace opencl
				137	} // namespace arm_compute
				138	#endif /* ARM_COMPUTE_CL_WINOGRADCONV2D_H */