Blame - src/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h - ml/ComputeLibrary

blob: bf5d77fc439ebdf1d95ac03f303668ddffef45ca [file] [log] [blame]

Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	1	/*
Michele Di Giorgio	d9eaf61	2020-07-08 11:12:57 +0100	[diff] [blame]	2	* Copyright (c) 2017-2020 Arm Limited.
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
Michalis Spyrou	f464337	2019-11-29 16:17:13 +0000	[diff] [blame]	24	#ifndef ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H
				25	#define ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	26
				27	#include "arm_compute/core/NEON/INEKernel.h"
Sang-Hoon Park	68dd25f	2020-10-19 16:00:11 +0100	[diff] [blame^]	28	#include "src/core/NEON/kernels/convolution/common/convolution.hpp"
				29	#include "src/core/NEON/kernels/convolution/common/tensor.hpp"
Michele Di Giorgio	6ad60af	2020-06-09 14:52:15 +0100	[diff] [blame]	30
				31	#include "src/core/NEON/kernels/convolution/winograd/winograd_layer.hpp"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	32
				33	namespace arm_compute
				34	{
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	35	// Forward declarations
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	36	class ITensor;
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	37
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	38	/** Interface for the NEON kernel to perform Winograd input transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	39	class INEWinogradLayerTransformInputKernel : public INEKernel
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	40	{
				41	public:
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	42	/** Get the working space required to perform the transformation.
				43	*
				44	* Note, the working space is only required when performing the
				45	* transformation - hence it can be reused whenever the transformation is
				46	* not running.
				47	*
				48	* @param num_threads The greatest number of threads that will be used to execute the transform.
				49	* @return Size of working space required in bytes.
				50	*/
				51	virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
				52
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	53	/** Determine how much memory (in units of TIn) to allocate for the
				54	* transformed input.
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	55	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	56	* @param[in] num_batches Number of batches in the input tensor.
				57	* @param[in] num_channels Number of feature maps in the input tensor.
				58	* @param[in] num_rows Number of rows in each feature map.
				59	* @param[in] num_cols Number of columns in each feature map.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	60	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	61	*
				62	* @return Storage size (in units of TIn) required.
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	63	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	64	virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	65
				66	/** Gets the stride between matrices in the input worspace
				67	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	68	* @param[in] num_batches Number of batches in the input tensor.
				69	* @param[in] num_channels Number of feature maps in the input tensor.
				70	* @param[in] num_rows Number of rows in each feature map.
				71	* @param[in] num_cols Number of columns in each feature map.
				72	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	73	*
				74	* @return Stride expressed in bytes.
				75	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	76	virtual int get_matrix_stride(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	77
				78	/** Configure the output transform kernel.
				79	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	80	* @param[in] input_nhwc Input tensor in NHWC data layout format.
				81	* @param[in] num_batches Number of batches in input tensor.
				82	* @param[in] num_rows Number of rows in input tensor.
				83	* @param[in] num_cols Number of columns in input tensor.
				84	* @param[in] num_channels Number of channels in input tensor.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	85	* @param[in] padding Padding type.
				86	* @param[out] output Base of output matrices.
				87	* @param[in] matrix_stride Stride between output matrices.
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	88	* @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	89	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	90	virtual void configure(const ITensor *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels,
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	91	const PaddingType padding, ITensor output, const int matrix_stride, ITensor workspace) = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	92
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	93	/** Destructor */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	94	virtual ~INEWinogradLayerTransformInputKernel()
				95	{
				96	}
				97	};
				98
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	99	/** NEON kernel to perform Winograd input transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	100	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	101	class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	102	{
				103	public:
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	104	/** Prevent instances of this class from being copied (As this class contains pointers) */
				105	NEWinogradLayerTransformInputKernel(const NEWinogradLayerTransformInputKernel &) = delete;
				106	/** Prevent instances of this class from being copied (As this class contains pointers) */
				107	NEWinogradLayerTransformInputKernel &operator=(const NEWinogradLayerTransformInputKernel &) = delete;
				108	/** Allow instances of this class to be moved */
				109	NEWinogradLayerTransformInputKernel(NEWinogradLayerTransformInputKernel &&) = default;
				110	/** Allow instances of this class to be moved */
				111	NEWinogradLayerTransformInputKernel &operator=(NEWinogradLayerTransformInputKernel &&) = default;
				112	/** Default destructor */
				113	~NEWinogradLayerTransformInputKernel() = default;
				114
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	115	/** Determine how much memory (in units of TIn) to allocate for the
				116	* transformed input.
				117	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	118	* @param[in] num_batches Number of batches in the input tensor.
				119	* @param[in] num_channels Number of feature maps in the input tensor.
				120	* @param[in] num_rows Number of rows in each feature map.
				121	* @param[in] num_cols Number of columns in each feature map.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	122	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	123	*
				124	* @return Storage size (in units of TIn) required.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	125	*/
				126	unsigned int get_input_storage_size(
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	127	int num_batches,
				128	int num_channels,
				129	int num_rows,
				130	int num_cols,
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	131	bool same_padding) const override;
				132
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	133	/** Get the working space required to perform the transformation.
				134	*
				135	* Note, the working space is only required when performing the
				136	* transformation - hence it can be reused whenever the transformation is
				137	* not running.
				138	*
				139	* @param[in] num_threads The greatest number of threads that will be used to execute the transform.
				140	*
				141	* @return Size of working space required in bytes.
				142	*/
				143	unsigned int get_working_space_size(unsigned int num_threads) const override;
				144
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	145	/** Gets the stride between matrices in the input worspace
				146	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	147	* @param[in] num_batches Number of batches in the input tensor.
				148	* @param[in] num_channels Number of feature maps in the input tensor.
				149	* @param[in] num_rows Number of rows in each feature map.
				150	* @param[in] num_cols Number of columns in each feature map.
				151	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	152	*
				153	* @return Stride expressed in bytes.
				154	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	155	int get_matrix_stride(
				156	int num_batches,
				157	int num_channels,
				158	int num_rows,
				159	int num_cols,
				160	bool same_padding) const override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	161
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	162	/** Default constructor */
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	163	NEWinogradLayerTransformInputKernel();
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	164
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	165	const char *name() const override
				166	{
				167	return "NEWinogradLayerTransformInputKernel";
				168	}
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	169
				170	/** Configure the output transform kernel.
				171	*
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	172	* @param[in] input_nhwc Input tensor. Data types supported: F16/F32. Layout supported NHWC.
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	173	* @param[in] num_batches Number of batches in input tensor.
				174	* @param[in] num_rows Number of rows in input tensor.
				175	* @param[in] num_cols Number of columns in input tensor.
				176	* @param[in] num_channels Number of channels in input tensor.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	177	* @param[in] padding Padding type.
				178	* @param[out] output Base of output matrices.
				179	* @param[in] matrix_stride Stride between output matrices.
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	180	* @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	181	*/
				182	void configure(
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	183	const ITensor *input_nhwc,
				184	const int num_batches,
				185	const int num_rows,
				186	const int num_cols,
				187	const int num_channels,
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	188	const PaddingType padding,
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	189	ITensor *output,
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	190	const int matrix_stride,
				191	ITensor *workspace) override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	192
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	193	// Inherited methods overridden:
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	194	void run(const Window &window, const ThreadInfo &info) override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	195
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	196	/** Winograd base kernel */
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	197	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	198	/** Winograd convolution kernel */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	199	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
				200
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	201	/** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformInputKernel
				202	*
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	203	* @param[in] input First tensor input info. Data types supported: F16/F32.
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	204	* @param[in] output Output tensor info. Data types supported: same as @p input.
				205	* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	206	*
				207	* @return a status
				208	*/
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	209	static Status validate(const ITensorInfo input, const ITensorInfo output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	210
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	211	private:
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	212	using InputTransform = typename WinogradBase::template InputTransform<T, T>;
				213
				214	std::unique_ptr<InputTransform> _transform{ nullptr };
				215	const ITensor *_input_nhwc;
				216	int _num_batches; /*< Number of batches in input tensor. /
				217	int _num_rows; /*< Number of rows in input tensor. /
				218	int _num_cols; /*< Number of columns in input tensor. /
				219	int _num_channels; /*< Number of channels in input tensor. /
				220	PaddingType _padding; /*< Padding type. /
				221	ITensor _output; /< Base of output matrices. /
				222	int _matrix_stride; /*< Stride between output matrices. /
				223	int _padding_top; /*< Padding to apply to the top of the image. /
				224	int _padding_left; /*< Padding to apply to the left of the image. /
				225	int _padding_right; /*< Padding to apply to the right of the image. /
				226	int _padding_bottom; /*< Padding to apply to the bottom of the image. /
				227	ITensor *_workspace;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	228	};
				229
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	230	/** Interface for the NEON kernel to perform Winograd output transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	231	class INEWinogradLayerTransformOutputKernel : public INEKernel
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	232	{
				233	public:
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	234	/** Get the working space required to perform the transformation.
				235	*
				236	* Note, the working space is only required when performing the
				237	* transformation - hence it can be reused whenever the transformation is
				238	* not running.
				239	*
				240	* @param[in] num_threads The greatest number of threads that will be used to execute the transform.
				241	*
				242	* @return Size of working space required in bytes.
				243	*/
				244	virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
				245
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	246	/** Determine how much memory (in units of TOut) to allocate for the
				247	* (Winograd domain) output.
				248	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	249	* @param[in] num_batches Number of batches in the output tensor.
				250	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				251	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				252	* @param[in] num_output_channels Number of feature maps in the output tensor.
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	253	*
				254	* @return Storage size (in units of TOut) required.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	255	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	256	virtual unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	257
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	258	/** Gets the stride between matrices in the output worspace
				259	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	260	* @param[in] num_batches Number of batches in the output tensor.
				261	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				262	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				263	* @param[in] num_output_channels Number of feature maps in the output tensor.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	264	*
				265	* @return Stride expressed in bytes.
				266	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	267	virtual int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	268
				269	/** Get the output shape of a convolution.
				270	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	271	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				272	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				273	* @param[in] padding_same True if padding is SAME, false otherwise
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	274	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	275	* @return Shape of the output tensor
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	276	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	277	virtual std::pair<unsigned int, unsigned int> get_output_shape(
				278	int num_rows, /* Number of rows in each feature map of the input tensor. */
				279	int num_cols, /* Number of columns in each feature map of the input tensor. */
				280	bool padding_same /* True if padding is SAME, false otherwise */
				281	) const = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	282
				283	/** Configure the output transform kernel.
				284	*
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	285	* @param[in] biases Pointer to the biases tensor.
				286	* @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
				287	* @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
				288	* @param[out] output_nhwc Pointer to a tensor in NHWC data layout ordered output tensor, in the spatial domain.
				289	* @param[in] num_batches Number of batches in the input tensor.
				290	* @param[in] num_rows Number of rows in output tensor.
				291	* @param[in] num_cols Number of columns in output tensor.
				292	* @param[in] num_channels Number of feature maps in the output tensor.
				293	* @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	294	* @param[in] activation Activation to be used
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	295	*/
				296	virtual void configure(
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	297	const ITensor *biases,
				298	const ITensor *transformed_output,
				299	const int matrix_stride,
				300	ITensor *output_nhwc,
				301	const int num_batches,
				302	const int num_rows,
				303	const int num_cols,
				304	const int num_channels,
				305	ITensor *workspace,
				306	const arm_gemm::Activation &activation) = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	307
				308	virtual ~INEWinogradLayerTransformOutputKernel()
				309	{
				310	}
				311	};
				312
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	313	/** NEON kernel to perform Winograd output transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	314	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	315	class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	316	{
				317	public:
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	318	const char *name() const override
				319	{
				320	return "NEWinogradLayerTransformOutputKernel";
				321	}
				322	/** Constructor */
				323	NEWinogradLayerTransformOutputKernel();
				324
				325	/** Prevent instances of this class from being copied (As this class contains pointers) */
				326	NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
				327	/** Prevent instances of this class from being copied (As this class contains pointers) */
				328	NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
				329	/** Allow instances of this class to be moved */
				330	NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
				331	/** Allow instances of this class to be moved */
				332	NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	333	/** Default destructor */
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	334	~NEWinogradLayerTransformOutputKernel() = default;
				335
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	336	// Inherited methods overridden:
				337	/** Determine how much memory (in units of TOut) to allocate for the
				338	* (Winograd domain) output.
				339	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	340	* @param[in] num_batches Number of batches in the output tensor.
				341	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				342	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				343	* @param[in] num_output_channels Number of feature maps in the output tensor.
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	344	*
				345	* @return Storage size (in units of TOut) required.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	346	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	347	unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	348
				349	/** Gets the stride between matrices in the output worspace
				350	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	351	* @param[in] num_batches Number of batches in the output tensor.
				352	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				353	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				354	* @param[in] num_output_channels Number of feature maps in the output tensor.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	355	*
				356	* @return Stride expressed in bytes.
				357	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	358	int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	359	/** Get the output shape of a convolution.
				360	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	361	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				362	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				363	* @param[in] padding_same True if padding is SAME, false otherwise
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	364	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	365	* @return Shape of the output tensor
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	366	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	367	std::pair<unsigned int, unsigned int> get_output_shape(
				368	int num_rows, /* Number of rows in each feature map of the input tensor. */
				369	int num_cols, /* Number of columns in each feature map of the input tensor. */
				370	bool padding_same) const override;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	371
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	372	/** Get the working space required to perform the transformation.
				373	*
				374	* Note, the working space is only required when performing the
				375	* transformation - hence it can be reused whenever the transformation is
				376	* not running.
				377	*
				378	* @param[in] num_threads The greatest number of threads that will be used to execute the transform.
				379	*
				380	* @return Size of working space required in bytes.
				381	*/
				382	unsigned int get_working_space_size(unsigned int num_threads) const override;
				383
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	384	/** Configure the output transform kernel.
				385	*
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	386	* @param[in] biases Pointer to the biases tensor.
				387	* @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
				388	* @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
				389	* @param[out] output_nhwc Pointer to a tensor with NHWC data layout, in the spatial domain.
				390	* @param[in] num_batches Number of batches in the input tensor.
				391	* @param[in] num_rows Number of rows in output tensor.
				392	* @param[in] num_cols Number of columns in output tensor.
				393	* @param[in] num_channels Number of feature maps in the output tensor.
				394	* @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	395	* @param[in] activation Activation to be used
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	396	*/
				397	void configure(
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	398	const ITensor *biases,
				399	const ITensor *transformed_output,
				400	const int matrix_stride,
				401	ITensor *output_nhwc,
				402	const int num_batches,
				403	const int num_rows,
				404	const int num_cols,
				405	const int num_channels,
				406	ITensor *workspace,
				407	const arm_gemm::Activation &activation) override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	408
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	409	void run(const Window &window, const ThreadInfo &info) override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	410
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	411	/** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformOutputKernel
				412	*
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	413	* @param[in] input Source tensor info with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F16/F32.
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	414	* @param[in] bias Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
				415	* @param[in] output Destination tensor info with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
				416	* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	417	*
				418	* @return a status
				419	*/
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	420	static Status validate(const ITensorInfo input, const ITensorInfo bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	421
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	422	private:
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	423	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	424	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	425	using OutputTransform = typename WinogradBase::template OutputTransform<T, T>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	426
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	427	std::unique_ptr<OutputTransform> _transform{ nullptr };
				428	const ITensor *_biases;
				429	const ITensor *_transformed_output;
				430	ITensor *_workspace;
				431	int _matrix_stride;
				432	int _matrix_row_stride;
				433	ITensor *_output_nhwc;
				434	int _num_batches;
				435	int _num_rows;
				436	int _num_cols;
				437	int _num_channels;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	438	};
				439
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	440	/** Interface for the NEON kernel to perform Winograd weights transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	441	class INEWinogradLayerTransformWeightsKernel : public INEKernel
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	442	{
				443	public:
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	444	/** Prevent instances of this class from being copied (As this class contains pointers) */
				445	INEWinogradLayerTransformWeightsKernel(const INEWinogradLayerTransformWeightsKernel &) = default;
				446	/** Prevent instances of this class from being copied (As this class contains pointers) */
				447	INEWinogradLayerTransformWeightsKernel &operator=(const INEWinogradLayerTransformWeightsKernel &) = default;
				448	/** Allow instances of this class to be moved */
				449	INEWinogradLayerTransformWeightsKernel(INEWinogradLayerTransformWeightsKernel &&) = default;
				450	/** Allow instances of this class to be moved */
				451	INEWinogradLayerTransformWeightsKernel &operator=(INEWinogradLayerTransformWeightsKernel &&) = default;
				452
				453	INEWinogradLayerTransformWeightsKernel()
				454	{
				455	}
				456	virtual ~INEWinogradLayerTransformWeightsKernel()
				457	{
				458	}
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	459	/** Determine how much memory (in units of T) to allocate for the
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	460	* transformed weights.
				461	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	462	* @param[in] num_output_channels Number of output feature maps.
				463	* @param[in] num_input_channels Number of input feature maps.
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	464	*
				465	* @return Storage size (in units of T) required.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	466	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	467	virtual unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	468	/** Gets the stride between matrices in the kernel worspace
				469	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	470	* @param[in] num_output_channels Number of output feature maps.
				471	* @param[in] num_input_channels Number of input feature maps.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	472	*
				473	* @return Stride expressed in bytes.
				474	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	475	virtual int get_matrix_stride(int num_output_channels, int num_input_channels) const = 0;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	476
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	477	/** Configure the weights transform kernel.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	478	*
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	479	* @param[in] weights_hwio Pointer to the weights tensor
				480	* @param[out] output Pointer to working space for the output tensor in the Winograd domain.
				481	* @param[in] matrix_stride Stride across matrices in the output workspace.
				482	* @param[in] num_output_channels Number of filters.
				483	* @param[in] num_input_channels Number of channels in each filter.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	484	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	485
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	486	virtual void configure(const ITensor weights_hwio, ITensor output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	487
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	488	/** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
				489	*
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	490	* @param[in] input First tensor input info. Data types supported: F16/F32.
Pablo Tello	bda6e4b	2018-08-22 11:40:33 +0100	[diff] [blame]	491	* @param[in] weights Weights tensor info. Data types supported: same as @p input.
				492	*
				493	* @return a status
				494	*/
				495	static Status validate(const ITensorInfo input, const ITensorInfo weights);
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	496	};
				497
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	498	/** NEON kernel to perform Winograd weights transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	499	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	500	class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	501	{
				502	public:
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	503	/** Prevent instances of this class from being copied (As this class contains pointers) */
				504	NEWinogradLayerTransformWeightsKernel(const NEWinogradLayerTransformWeightsKernel &) = delete;
				505	/** Prevent instances of this class from being copied (As this class contains pointers) */
				506	NEWinogradLayerTransformWeightsKernel &operator=(const NEWinogradLayerTransformWeightsKernel &) = delete;
				507	/** Allow instances of this class to be moved */
				508	NEWinogradLayerTransformWeightsKernel(NEWinogradLayerTransformWeightsKernel &&) = default;
				509	/** Allow instances of this class to be moved */
				510	NEWinogradLayerTransformWeightsKernel &operator=(NEWinogradLayerTransformWeightsKernel &&) = default;
				511	/** Default destructor */
				512	~NEWinogradLayerTransformWeightsKernel() = default;
				513
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	514	/** Default constructor. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	515	NEWinogradLayerTransformWeightsKernel();
				516	const char *name() const override
				517	{
				518	return "NEWinogradLayerTransformWeightsKernel";
				519	}
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	520
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	521	/** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
				522	*
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	523	* @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
Georgios Pinitas	5ce897f	2020-04-29 11:44:10 +0100	[diff] [blame]	524	* kernel_x must be 3 and equal to kernel_y. Data types supported: F16/F32.
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	525	* @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
				526	* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	527	*
				528	* @return a status
				529	*/
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	530	static Status validate(const ITensorInfo input, const ITensorInfo output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	531
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	532	// Inherited methods overridden:
Vidhya Sudhan Loganathan	d646ae1	2018-11-19 15:18:20 +0000	[diff] [blame]	533
				534	#ifndef DOXYGEN_SKIP_THIS
				535	/** Configure the weights transform kernel.
				536	*
				537	* @param[in] weights_hwio Pointer to the weights tensor
				538	* @param[out] output Pointer to working space for the output tensor in the Winograd domain.
				539	* @param[in] matrix_stride Stride across matrices in the output workspace.
				540	* @param[in] num_output_channels Number of filters.
				541	* @param[in] num_input_channels Number of channels in each filter.
				542	*/
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	543	void configure(const ITensor weights_hwio, ITensor output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override;
Vidhya Sudhan Loganathan	d646ae1	2018-11-19 15:18:20 +0000	[diff] [blame]	544	#endif /* DOXYGEN_SKIP_THIS */
				545
				546	/** Determine how much memory (in units of T) to allocate for the
				547	* transformed weights.
				548	*
				549	* @param[in] num_output_channels Number of output feature maps.
				550	* @param[in] num_input_channels Number of input feature maps.
				551	*
				552	* @return Storage size (in units of T) required.
				553	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	554	unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override;
Vidhya Sudhan Loganathan	d646ae1	2018-11-19 15:18:20 +0000	[diff] [blame]	555
				556	/** Gets the stride between matrices in the input worspace
				557	*
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	558	* @param[in] num_output_channels Number of output feature maps.
				559	* @param[in] num_input_channels Number of input feature maps.
Vidhya Sudhan Loganathan	d646ae1	2018-11-19 15:18:20 +0000	[diff] [blame]	560	*
				561	* @return Stride expressed in bytes.
				562	*/
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	563	int get_matrix_stride(int num_output_channels, int num_input_channels) const override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	564	void run(const Window &window, const ThreadInfo &info) override;
				565	bool is_parallelisable() const override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	566
				567	private:
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	568	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	569	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	570	using WeightsTransform = typename WinogradBase::template WeightsTransform<T, T>;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	571
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	572	std::unique_ptr<WeightsTransform> _transform{ nullptr };
				573	const ITensor *_weights_hwio;
				574	ITensor *_output;
				575	int _matrix_stride;
				576	int _num_output_channels;
				577	int _num_input_channels;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	578	};
				579
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	580	/** NEON kernel to perform Winograd. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	581	template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	582	class NEWinogradLayerConfiguration
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	583	{
				584	public:
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	585	/** Winograd base kernel */
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	586	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	587	/** Winograd convolution kernel */
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	588
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	589	using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	590
Anthony Barbier	e155337	2018-07-16 18:53:52 +0100	[diff] [blame]	591	using TransformInputKernel = NEWinogradLayerTransformInputKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
				592	using TransformWeightsKernel = NEWinogradLayerTransformWeightsKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
				593	using TransformOutputKernel = NEWinogradLayerTransformOutputKernel<TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	594	};
				595
				596	} // namespace arm_compute
Michalis Spyrou	f464337	2019-11-29 16:17:13 +0000	[diff] [blame]	597	#endif /ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H/