Blame - src/cpu/kernels/CpuWinogradConv2dKernel.h - ml/ComputeLibrary

blob: db2d8acfdbdeb4bb7913dd2ee028b7594f6e040a [file] [log] [blame]

Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	1	/*
Sheri Zhang	ac6499a	2021-02-10 15:32:38 +0000	[diff] [blame]	2	* Copyright (c) 2017-2021 Arm Limited.
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	24	#ifndef ARM_COMPUTE_CPUWINOGRADCONV2DKERNEL_H
				25	#define ARM_COMPUTE_CPUWINOGRADCONV2DKERNEL_H
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	26
Sang-Hoon Park	68dd25f	2020-10-19 16:00:11 +0100	[diff] [blame]	27	#include "src/core/NEON/kernels/convolution/common/convolution.hpp"
				28	#include "src/core/NEON/kernels/convolution/common/tensor.hpp"
Georgios Pinitas	7891a73	2021-08-20 21:39:25 +0100	[diff] [blame]	29	#include "src/cpu/ICpuKernel.h"
Michele Di Giorgio	6ad60af	2020-06-09 14:52:15 +0100	[diff] [blame]	30
				31	#include "src/core/NEON/kernels/convolution/winograd/winograd_layer.hpp"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	32
				33	namespace arm_compute
				34	{
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	35	namespace cpu
				36	{
Michele Di Giorgio	33f41fa	2021-03-09 14:09:08 +0000	[diff] [blame]	37	/** Interface for the kernel to perform Winograd input transform. */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	38	class ICpuWinogradConv2dTransformInputKernel : public ICpuKernel
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	39	{
				40	public:
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	41	/** Get the working space required to perform the transformation.
				42	*
				43	* Note, the working space is only required when performing the
				44	* transformation - hence it can be reused whenever the transformation is
				45	* not running.
				46	*
				47	* @param num_threads The greatest number of threads that will be used to execute the transform.
				48	* @return Size of working space required in bytes.
				49	*/
				50	virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
				51
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	52	/** Determine how much memory (in units of TIn) to allocate for the
				53	* transformed input.
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	54	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	55	* @param[in] num_batches Number of batches in the input tensor.
				56	* @param[in] num_channels Number of feature maps in the input tensor.
				57	* @param[in] num_rows Number of rows in each feature map.
				58	* @param[in] num_cols Number of columns in each feature map.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	59	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	60	*
				61	* @return Storage size (in units of TIn) required.
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	62	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	63	virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	64
				65	/** Gets the stride between matrices in the input worspace
				66	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	67	* @param[in] num_batches Number of batches in the input tensor.
				68	* @param[in] num_channels Number of feature maps in the input tensor.
				69	* @param[in] num_rows Number of rows in each feature map.
				70	* @param[in] num_cols Number of columns in each feature map.
				71	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	72	*
				73	* @return Stride expressed in bytes.
				74	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	75	virtual int get_matrix_stride(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	76
				77	/** Configure the output transform kernel.
				78	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	79	* @param[in] input_nhwc Input tensor in NHWC data layout format.
				80	* @param[in] num_batches Number of batches in input tensor.
				81	* @param[in] num_rows Number of rows in input tensor.
				82	* @param[in] num_cols Number of columns in input tensor.
				83	* @param[in] num_channels Number of channels in input tensor.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	84	* @param[in] padding Padding type.
				85	* @param[out] output Base of output matrices.
				86	* @param[in] matrix_stride Stride between output matrices.
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	87	* @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	88	*/
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	89	virtual void configure(const ITensorInfo *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels,
				90	const PaddingType padding, ITensorInfo output, const int matrix_stride, ITensorInfo workspace) = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	91
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	92	/** Destructor */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	93	virtual ~ICpuWinogradConv2dTransformInputKernel()
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	94	{
				95	}
				96	};
				97
Michele Di Giorgio	33f41fa	2021-03-09 14:09:08 +0000	[diff] [blame]	98	/** Kernel to perform Winograd input transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	99	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	100	class CpuWinogradConv2dTransformInputKernel : public ICpuWinogradConv2dTransformInputKernel
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	101	{
				102	public:
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	103	/** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	104	CpuWinogradConv2dTransformInputKernel(const CpuWinogradConv2dTransformInputKernel &) = delete;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	105	/** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	106	CpuWinogradConv2dTransformInputKernel &operator=(const CpuWinogradConv2dTransformInputKernel &) = delete;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	107	/** Allow instances of this class to be moved */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	108	CpuWinogradConv2dTransformInputKernel(CpuWinogradConv2dTransformInputKernel &&) = default;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	109	/** Allow instances of this class to be moved */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	110	CpuWinogradConv2dTransformInputKernel &operator=(CpuWinogradConv2dTransformInputKernel &&) = default;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	111	/** Default destructor */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	112	~CpuWinogradConv2dTransformInputKernel() = default;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	113
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	114	/** Determine how much memory (in units of TIn) to allocate for the
				115	* transformed input.
				116	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	117	* @param[in] num_batches Number of batches in the input tensor.
				118	* @param[in] num_channels Number of feature maps in the input tensor.
				119	* @param[in] num_rows Number of rows in each feature map.
				120	* @param[in] num_cols Number of columns in each feature map.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	121	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	122	*
				123	* @return Storage size (in units of TIn) required.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	124	*/
				125	unsigned int get_input_storage_size(
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	126	int num_batches,
				127	int num_channels,
				128	int num_rows,
				129	int num_cols,
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	130	bool same_padding) const override;
				131
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	132	/** Get the working space required to perform the transformation.
				133	*
				134	* Note, the working space is only required when performing the
				135	* transformation - hence it can be reused whenever the transformation is
				136	* not running.
				137	*
				138	* @param[in] num_threads The greatest number of threads that will be used to execute the transform.
				139	*
				140	* @return Size of working space required in bytes.
				141	*/
				142	unsigned int get_working_space_size(unsigned int num_threads) const override;
				143
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	144	/** Gets the stride between matrices in the input worspace
				145	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	146	* @param[in] num_batches Number of batches in the input tensor.
				147	* @param[in] num_channels Number of feature maps in the input tensor.
				148	* @param[in] num_rows Number of rows in each feature map.
				149	* @param[in] num_cols Number of columns in each feature map.
				150	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	151	*
				152	* @return Stride expressed in bytes.
				153	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	154	int get_matrix_stride(
				155	int num_batches,
				156	int num_channels,
				157	int num_rows,
				158	int num_cols,
				159	bool same_padding) const override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	160
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	161	/** Default constructor */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	162	CpuWinogradConv2dTransformInputKernel();
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	163
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	164	const char *name() const override
				165	{
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	166	return "CpuWinogradConv2dTransformInputKernel";
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	167	}
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	168
				169	/** Configure the output transform kernel.
				170	*
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	171	* @param[in] input_nhwc Input tensor. Data types supported: F16/F32. Layout supported NHWC.
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	172	* @param[in] num_batches Number of batches in input tensor.
				173	* @param[in] num_rows Number of rows in input tensor.
				174	* @param[in] num_cols Number of columns in input tensor.
				175	* @param[in] num_channels Number of channels in input tensor.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	176	* @param[in] padding Padding type.
				177	* @param[out] output Base of output matrices.
				178	* @param[in] matrix_stride Stride between output matrices.
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	179	* @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	180	*/
				181	void configure(
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	182	const ITensorInfo *input_nhwc,
				183	const int num_batches,
				184	const int num_rows,
				185	const int num_cols,
				186	const int num_channels,
				187	const PaddingType padding,
				188	ITensorInfo *output,
				189	const int matrix_stride,
				190	ITensorInfo *workspace) override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	191
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	192	// Inherited methods overridden:
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	193	void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	194
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	195	/** Winograd base kernel */
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	196	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	197	/** Winograd convolution kernel */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	198	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
				199
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	200	/** Static function to check if given info will lead to a valid configuration of @ref CpuWinogradConv2dTransformInputKernel
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	201	*
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	202	* @param[in] input First tensor input info. Data types supported: F16/F32.
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	203	* @param[in] output Output tensor info. Data types supported: same as @p input.
				204	* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	205	*
				206	* @return a status
				207	*/
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	208	static Status validate(const ITensorInfo input, const ITensorInfo output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	209
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	210	private:
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	211	using InputTransform = typename WinogradBase::template InputTransform<T, T>;
				212
				213	std::unique_ptr<InputTransform> _transform{ nullptr };
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	214	int _num_channels; /*< Number of channels in input tensor. /
				215	int _matrix_stride; /*< Stride between output matrices. /
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	216	};
				217
Michele Di Giorgio	33f41fa	2021-03-09 14:09:08 +0000	[diff] [blame]	218	/** Interface for the kernel to perform Winograd output transform. */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	219	class ICpuWinogradConv2dTransformOutputKernel : public ICpuKernel
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	220	{
				221	public:
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	222	/** Get the working space required to perform the transformation.
				223	*
				224	* Note, the working space is only required when performing the
				225	* transformation - hence it can be reused whenever the transformation is
				226	* not running.
				227	*
				228	* @param[in] num_threads The greatest number of threads that will be used to execute the transform.
				229	*
				230	* @return Size of working space required in bytes.
				231	*/
				232	virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
				233
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	234	/** Determine how much memory (in units of TOut) to allocate for the
				235	* (Winograd domain) output.
				236	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	237	* @param[in] num_batches Number of batches in the output tensor.
				238	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				239	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				240	* @param[in] num_output_channels Number of feature maps in the output tensor.
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	241	*
				242	* @return Storage size (in units of TOut) required.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	243	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	244	virtual unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	245
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	246	/** Gets the stride between matrices in the output worspace
				247	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	248	* @param[in] num_batches Number of batches in the output tensor.
				249	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				250	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				251	* @param[in] num_output_channels Number of feature maps in the output tensor.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	252	*
				253	* @return Stride expressed in bytes.
				254	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	255	virtual int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	256
				257	/** Get the output shape of a convolution.
				258	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	259	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				260	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				261	* @param[in] padding_same True if padding is SAME, false otherwise
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	262	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	263	* @return Shape of the output tensor
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	264	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	265	virtual std::pair<unsigned int, unsigned int> get_output_shape(
				266	int num_rows, /* Number of rows in each feature map of the input tensor. */
				267	int num_cols, /* Number of columns in each feature map of the input tensor. */
				268	bool padding_same /* True if padding is SAME, false otherwise */
				269	) const = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	270
				271	/** Configure the output transform kernel.
				272	*
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	273	* @param[in] biases Pointer to the biases tensor.
				274	* @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
				275	* @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
				276	* @param[out] output_nhwc Pointer to a tensor in NHWC data layout ordered output tensor, in the spatial domain.
				277	* @param[in] num_batches Number of batches in the input tensor.
				278	* @param[in] num_rows Number of rows in output tensor.
				279	* @param[in] num_cols Number of columns in output tensor.
				280	* @param[in] num_channels Number of feature maps in the output tensor.
				281	* @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	282	* @param[in] activation Activation to be used
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	283	*/
				284	virtual void configure(
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	285	const ITensorInfo *biases,
				286	const ITensorInfo *transformed_output,
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	287	const int matrix_stride,
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	288	ITensorInfo *output_nhwc,
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	289	const int num_batches,
				290	const int num_rows,
				291	const int num_cols,
				292	const int num_channels,
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	293	ITensorInfo *workspace,
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	294	const arm_gemm::Activation &activation) = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	295
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	296	virtual ~ICpuWinogradConv2dTransformOutputKernel()
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	297	{
				298	}
				299	};
				300
Michele Di Giorgio	33f41fa	2021-03-09 14:09:08 +0000	[diff] [blame]	301	/** Kernel to perform Winograd output transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	302	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	303	class CpuWinogradConv2dTransformOutputKernel : public ICpuWinogradConv2dTransformOutputKernel
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	304	{
				305	public:
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	306	const char *name() const override
				307	{
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	308	return "CpuWinogradConv2dTransformOutputKernel";
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	309	}
				310	/** Constructor */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	311	CpuWinogradConv2dTransformOutputKernel();
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	312
				313	/** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	314	CpuWinogradConv2dTransformOutputKernel(const CpuWinogradConv2dTransformOutputKernel &) = delete;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	315	/** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	316	CpuWinogradConv2dTransformOutputKernel &operator=(const CpuWinogradConv2dTransformOutputKernel &) = delete;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	317	/** Allow instances of this class to be moved */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	318	CpuWinogradConv2dTransformOutputKernel(CpuWinogradConv2dTransformOutputKernel &&) = default;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	319	/** Allow instances of this class to be moved */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	320	CpuWinogradConv2dTransformOutputKernel &operator=(CpuWinogradConv2dTransformOutputKernel &&) = default;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	321	/** Default destructor */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	322	~CpuWinogradConv2dTransformOutputKernel() = default;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	323
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	324	// Inherited methods overridden:
				325	/** Determine how much memory (in units of TOut) to allocate for the
				326	* (Winograd domain) output.
				327	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	328	* @param[in] num_batches Number of batches in the output tensor.
				329	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				330	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				331	* @param[in] num_output_channels Number of feature maps in the output tensor.
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	332	*
				333	* @return Storage size (in units of TOut) required.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	334	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	335	unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	336
				337	/** Gets the stride between matrices in the output worspace
				338	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	339	* @param[in] num_batches Number of batches in the output tensor.
				340	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				341	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				342	* @param[in] num_output_channels Number of feature maps in the output tensor.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	343	*
				344	* @return Stride expressed in bytes.
				345	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	346	int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	347	/** Get the output shape of a convolution.
				348	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	349	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				350	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				351	* @param[in] padding_same True if padding is SAME, false otherwise
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	352	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	353	* @return Shape of the output tensor
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	354	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	355	std::pair<unsigned int, unsigned int> get_output_shape(
				356	int num_rows, /* Number of rows in each feature map of the input tensor. */
				357	int num_cols, /* Number of columns in each feature map of the input tensor. */
				358	bool padding_same) const override;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	359
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	360	/** Get the working space required to perform the transformation.
				361	*
				362	* Note, the working space is only required when performing the
				363	* transformation - hence it can be reused whenever the transformation is
				364	* not running.
				365	*
				366	* @param[in] num_threads The greatest number of threads that will be used to execute the transform.
				367	*
				368	* @return Size of working space required in bytes.
				369	*/
				370	unsigned int get_working_space_size(unsigned int num_threads) const override;
				371
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	372	/** Configure the output transform kernel.
				373	*
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	374	* @param[in] biases Pointer to the biases tensor.
				375	* @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
				376	* @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
				377	* @param[out] output_nhwc Pointer to a tensor with NHWC data layout, in the spatial domain.
				378	* @param[in] num_batches Number of batches in the input tensor.
				379	* @param[in] num_rows Number of rows in output tensor.
				380	* @param[in] num_cols Number of columns in output tensor.
				381	* @param[in] num_channels Number of feature maps in the output tensor.
				382	* @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	383	* @param[in] activation Activation to be used
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	384	*/
				385	void configure(
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	386	const ITensorInfo *biases,
				387	const ITensorInfo *transformed_output,
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	388	const int matrix_stride,
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	389	ITensorInfo *output_nhwc,
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	390	const int num_batches,
				391	const int num_rows,
				392	const int num_cols,
				393	const int num_channels,
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	394	ITensorInfo *workspace,
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	395	const arm_gemm::Activation &activation) override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	396
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	397	void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	398
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	399	/** Static function to check if given info will lead to a valid configuration of @ref CpuWinogradConv2dTransformOutputKernel
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	400	*
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	401	* @param[in] input Source tensor info with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F16/F32.
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	402	* @param[in] bias Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
				403	* @param[in] output Destination tensor info with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
				404	* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	405	*
				406	* @return a status
				407	*/
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	408	static Status validate(const ITensorInfo input, const ITensorInfo bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	409
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	410	private:
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	411	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	412	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	413	using OutputTransform = typename WinogradBase::template OutputTransform<T, T>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	414
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	415	std::unique_ptr<OutputTransform> _transform{ nullptr };
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	416	int _matrix_stride;
				417	int _matrix_row_stride;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	418	};
				419
Michele Di Giorgio	33f41fa	2021-03-09 14:09:08 +0000	[diff] [blame]	420	/** Interface for the kernel to perform Winograd weights transform. */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	421	class ICpuWinogradConv2dTransformWeightsKernel : public ICpuKernel
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	422	{
				423	public:
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	424	/** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	425	ICpuWinogradConv2dTransformWeightsKernel(const ICpuWinogradConv2dTransformWeightsKernel &) = default;
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	426	/** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	427	ICpuWinogradConv2dTransformWeightsKernel &operator=(const ICpuWinogradConv2dTransformWeightsKernel &) = default;
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	428	/** Allow instances of this class to be moved */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	429	ICpuWinogradConv2dTransformWeightsKernel(ICpuWinogradConv2dTransformWeightsKernel &&) = default;
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	430	/** Allow instances of this class to be moved */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	431	ICpuWinogradConv2dTransformWeightsKernel &operator=(ICpuWinogradConv2dTransformWeightsKernel &&) = default;
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	432
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	433	ICpuWinogradConv2dTransformWeightsKernel()
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	434	{
				435	}
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	436	virtual ~ICpuWinogradConv2dTransformWeightsKernel()
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	437	{
				438	}
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	439	/** Determine how much memory (in units of T) to allocate for the
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	440	* transformed weights.
				441	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	442	* @param[in] num_output_channels Number of output feature maps.
				443	* @param[in] num_input_channels Number of input feature maps.
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	444	*
				445	* @return Storage size (in units of T) required.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	446	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	447	virtual unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	448	/** Gets the stride between matrices in the kernel worspace
				449	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	450	* @param[in] num_output_channels Number of output feature maps.
				451	* @param[in] num_input_channels Number of input feature maps.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	452	*
				453	* @return Stride expressed in bytes.
				454	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	455	virtual int get_matrix_stride(int num_output_channels, int num_input_channels) const = 0;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	456
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	457	/** Configure the weights transform kernel.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	458	*
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	459	* @param[in] weights_hwio Pointer to the weights tensor info
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	460	* @param[out] output Pointer to working space for the output tensor in the Winograd domain.
				461	* @param[in] matrix_stride Stride across matrices in the output workspace.
				462	* @param[in] num_output_channels Number of filters.
				463	* @param[in] num_input_channels Number of channels in each filter.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	464	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	465
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	466	virtual void configure(const ITensorInfo weights_hwio, ITensorInfo output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	467
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	468	/** Static function to check if given info will lead to a valid configuration of @ref CpuWinogradConv2dTransformWeightsKernel
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	469	*
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	470	* @param[in] input First tensor input info. Data types supported: F16/F32.
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	471	* @param[in] weights Weights tensor info. Data types supported: same as @p input.
				472	*
				473	* @return a status
				474	*/
				475	static Status validate(const ITensorInfo input, const ITensorInfo weights);
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	476	};
				477
Michele Di Giorgio	33f41fa	2021-03-09 14:09:08 +0000	[diff] [blame]	478	/** Kernel to perform Winograd weights transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	479	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	480	class CpuWinogradConv2dTransformWeightsKernel final : public ICpuWinogradConv2dTransformWeightsKernel
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	481	{
				482	public:
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	483	/** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	484	CpuWinogradConv2dTransformWeightsKernel(const CpuWinogradConv2dTransformWeightsKernel &) = delete;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	485	/** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	486	CpuWinogradConv2dTransformWeightsKernel &operator=(const CpuWinogradConv2dTransformWeightsKernel &) = delete;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	487	/** Allow instances of this class to be moved */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	488	CpuWinogradConv2dTransformWeightsKernel(CpuWinogradConv2dTransformWeightsKernel &&) = default;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	489	/** Allow instances of this class to be moved */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	490	CpuWinogradConv2dTransformWeightsKernel &operator=(CpuWinogradConv2dTransformWeightsKernel &&) = default;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	491	/** Default destructor */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	492	~CpuWinogradConv2dTransformWeightsKernel() = default;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	493
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	494	/** Default constructor. */
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	495	CpuWinogradConv2dTransformWeightsKernel();
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	496	const char *name() const override
				497	{
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	498	return "CpuWinogradConv2dTransformWeightsKernel";
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	499	}
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	500
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	501	/** Static function to check if given info will lead to a valid configuration of @ref CpuWinogradConv2dTransformWeightsKernel
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	502	*
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	503	* @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	504	* kernel_x must be 3 and equal to kernel_y. Data types supported: F16/F32.
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	505	* @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
				506	* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	507	*
				508	* @return a status
				509	*/
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	510	static Status validate(const ITensorInfo input, const ITensorInfo output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	511
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	512	// Inherited methods overridden:
Vidhya Sudhan Loganathan	d646ae1	2018-11-19 15:18:20 +0000	[diff] [blame]	513
				514	#ifndef DOXYGEN_SKIP_THIS
				515	/** Configure the weights transform kernel.
				516	*
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	517	* @param[in] weights_hwio Pointer to the weights tensor info
Vidhya Sudhan Loganathan	d646ae1	2018-11-19 15:18:20 +0000	[diff] [blame]	518	* @param[out] output Pointer to working space for the output tensor in the Winograd domain.
				519	* @param[in] matrix_stride Stride across matrices in the output workspace.
				520	* @param[in] num_output_channels Number of filters.
				521	* @param[in] num_input_channels Number of channels in each filter.
				522	*/
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	523	void configure(const ITensorInfo weights_hwio, ITensorInfo output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override;
Vidhya Sudhan Loganathan	d646ae1	2018-11-19 15:18:20 +0000	[diff] [blame]	524	#endif /* DOXYGEN_SKIP_THIS */
				525
				526	/** Determine how much memory (in units of T) to allocate for the
				527	* transformed weights.
				528	*
				529	* @param[in] num_output_channels Number of output feature maps.
				530	* @param[in] num_input_channels Number of input feature maps.
				531	*
				532	* @return Storage size (in units of T) required.
				533	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	534	unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override;
Vidhya Sudhan Loganathan	d646ae1	2018-11-19 15:18:20 +0000	[diff] [blame]	535
				536	/** Gets the stride between matrices in the input worspace
				537	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	538	* @param[in] num_output_channels Number of output feature maps.
				539	* @param[in] num_input_channels Number of input feature maps.
Vidhya Sudhan Loganathan	d646ae1	2018-11-19 15:18:20 +0000	[diff] [blame]	540	*
				541	* @return Stride expressed in bytes.
				542	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	543	int get_matrix_stride(int num_output_channels, int num_input_channels) const override;
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	544	void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	545	bool is_parallelisable() const override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	546
				547	private:
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	548	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	549	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	550	using WeightsTransform = typename WinogradBase::template WeightsTransform<T, T>;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	551
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	552	std::unique_ptr<WeightsTransform> _transform{ nullptr };
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	553	int _num_output_channels;
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	554	int _matrix_stride;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	555	};
				556
Michele Di Giorgio	33f41fa	2021-03-09 14:09:08 +0000	[diff] [blame]	557	/** Kernel to perform Winograd. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	558	template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	559	class CpuWinogradConv2dConfiguration
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	560	{
				561	public:
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	562	/** Winograd base kernel */
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	563	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	564	/** Winograd convolution kernel */
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	565
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	566	using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	567
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	568	using TransformInputKernel = CpuWinogradConv2dTransformInputKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
				569	using TransformWeightsKernel = CpuWinogradConv2dTransformWeightsKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
				570	using TransformOutputKernel = CpuWinogradConv2dTransformOutputKernel<TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	571	};
				572
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	573	} // namespace cpu
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	574	} // namespace arm_compute
Michalis Spyrou	96f977e	2021-07-01 12:20:56 +0100	[diff] [blame]	575	#endif /ARM_COMPUTE_CPUWINOGRADCONV2DKERNEL_H/