Blame - arm_compute/core/NEON/kernels/NEWinogradConvolutionLayerKernel.h - ml/ComputeLibrary

blob: 68c133ee371f685d00750c0439ef9263ea9a54d9 [file] [log] [blame]

Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	1	/*
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	2	* Copyright (c) 2017-2018 ARM Limited.
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
Georgios Pinitas	9fb1159	2018-04-26 20:34:58 +0100	[diff] [blame]	24	#ifndef __ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H__
				25	#define __ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H__
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	26
				27	#include "arm_compute/core/NEON/INEKernel.h"
Georgios Pinitas	4074c99	2018-01-30 18:13:46 +0000	[diff] [blame]	28	#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp"
				29	#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp"
				30	#include "arm_compute/core/NEON/kernels/convolution/winograd/batched_blocked_gemm.hpp"
				31	#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	32
				33	namespace arm_compute
				34	{
				35	class ITensor;
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	36
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	37	/** Interface for the NEON kernel to perform Winograd input transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	38	template <typename T>
				39	class INEWinogradLayerTransformInputKernel : public INEKernel
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	40	{
				41	public:
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	42	/** Determine how much memory (in units of TIn) to allocate for the
				43	* transformed input.
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	44	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	45	* @param[in] num_batches Number of batches in the input tensor.
				46	* @param[in] num_channels Number of feature maps in the input tensor.
				47	* @param[in] num_rows Number of rows in each feature map.
				48	* @param[in] num_cols Number of columns in each feature map.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	49	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	50	*
				51	* @return Storage size (in units of TIn) required.
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	52	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	53	virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	54
				55	/** Gets the stride between matrices in the input worspace
				56	*
				57	* @param[in] kernel_shape The shape of the weights tensor.
				58	* @param[in] input_shape The shape of the input tensor.
				59	* @param[in] padding_type The type of padding to be used.
				60	*
				61	* @return Stride expressed in bytes.
				62	*/
				63	virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
				64
				65	/** Configure the output transform kernel.
				66	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	67	* @param[in] input_nhwc Input tensor in NHWC data layout format.
				68	* @param[in] num_batches Number of batches in input tensor.
				69	* @param[in] num_rows Number of rows in input tensor.
				70	* @param[in] num_cols Number of columns in input tensor.
				71	* @param[in] num_channels Number of channels in input tensor.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	72	* @param[in] padding Padding type.
				73	* @param[out] output Base of output matrices.
				74	* @param[in] matrix_stride Stride between output matrices.
				75	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	76	virtual void configure(const ITensor *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels,
				77	const PaddingType padding, T *const output, const int matrix_stride) = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	78
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	79	/** Destructor */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	80	virtual ~INEWinogradLayerTransformInputKernel()
				81	{
				82	}
				83	};
				84
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	85	/** NEON kernel to perform Winograd input transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	86	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
				87	class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel<T>
				88	{
				89	public:
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	90	/** Prevent instances of this class from being copied (As this class contains pointers) */
				91	NEWinogradLayerTransformInputKernel(const NEWinogradLayerTransformInputKernel &) = delete;
				92	/** Prevent instances of this class from being copied (As this class contains pointers) */
				93	NEWinogradLayerTransformInputKernel &operator=(const NEWinogradLayerTransformInputKernel &) = delete;
				94	/** Allow instances of this class to be moved */
				95	NEWinogradLayerTransformInputKernel(NEWinogradLayerTransformInputKernel &&) = default;
				96	/** Allow instances of this class to be moved */
				97	NEWinogradLayerTransformInputKernel &operator=(NEWinogradLayerTransformInputKernel &&) = default;
				98	/** Default destructor */
				99	~NEWinogradLayerTransformInputKernel() = default;
				100
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	101	/** Determine how much memory (in units of TIn) to allocate for the
				102	* transformed input.
				103	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	104	* @param[in] num_batches Number of batches in the input tensor.
				105	* @param[in] num_channels Number of feature maps in the input tensor.
				106	* @param[in] num_rows Number of rows in each feature map.
				107	* @param[in] num_cols Number of columns in each feature map.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	108	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	109	*
				110	* @return Storage size (in units of TIn) required.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	111	*/
				112	unsigned int get_input_storage_size(
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	113	int num_batches,
				114	int num_channels,
				115	int num_rows,
				116	int num_cols,
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	117	bool same_padding) const override;
				118
				119	/** Gets the stride between matrices in the input worspace
				120	*
				121	* @param[in] kernel_shape The shape of the weights tensor.
				122	* @param[in] input_shape The shape of the input tensor.
				123	* @param[in] padding_type The type of padding to be used.
				124	*
				125	* @return Stride expressed in bytes.
				126	*/
				127	int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	128
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	129	/** Default constructor */
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	130	NEWinogradLayerTransformInputKernel();
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	131
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	132	const char *name() const override
				133	{
				134	return "NEWinogradLayerTransformInputKernel";
				135	}
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	136
				137	/** Configure the output transform kernel.
				138	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	139	* @param[in] input_nhwc Input tensor. Data types supported: F32. Layout supported NHWC.
				140	* @param[in] num_batches Number of batches in input tensor.
				141	* @param[in] num_rows Number of rows in input tensor.
				142	* @param[in] num_cols Number of columns in input tensor.
				143	* @param[in] num_channels Number of channels in input tensor.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	144	* @param[in] padding Padding type.
				145	* @param[out] output Base of output matrices.
				146	* @param[in] matrix_stride Stride between output matrices.
				147	*/
				148	void configure(
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	149	const ITensor *input_nhwc,
				150	const int num_batches,
				151	const int num_rows,
				152	const int num_cols,
				153	const int num_channels,
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	154	const PaddingType padding,
				155	T *const output,
				156	const int matrix_stride) override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	157
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	158	// Inherited methods overridden:
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	159	void run(const Window &window, const ThreadInfo &info) override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	160
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	161	/** Winograd base kernel */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	162	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelCols, KernelCols>;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	163	/** Winograd convolution kernel */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	164	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
				165
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	166	/** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformInputKernel
				167	*
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	168	* @param[in] input First tensor input info. Data types supported: F32.
				169	* @param[in] output Output tensor info. Data types supported: same as @p input.
				170	* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	171	*
				172	* @return a status
				173	*/
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	174	static Status validate(const ITensorInfo input, const ITensorInfo output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	175
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	176	private:
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	177	using InputTransform = typename WinogradBase::template InputTransform<T>;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	178	const ITensor *_input_nhwc;
				179	int _num_batches; /*< Number of batches in input tensor. /
				180	int _num_rows; /*< Number of rows in input tensor. /
				181	int _num_cols; /*< Number of columns in input tensor. /
				182	int _num_channels; /*< Number of channels in input tensor. /
				183	PaddingType _padding; /*< Padding type. /
				184	T _output; /< Base of output matrices. /
				185	int _matrix_stride; /*< Stride between output matrices. /
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	186	};
				187
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	188	/** Interface for the NEON kernel to perform Winograd output transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	189	template <typename T>
				190	class INEWinogradLayerTransformOutputKernel : public INEKernel
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	191	{
				192	public:
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	193	/** Determine how much memory (in units of TOut) to allocate for the
				194	* (Winograd domain) output.
				195	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	196	* @param[in] num_batches Number of batches in the output tensor.
				197	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				198	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				199	* @param[in] num_output_channels Number of feature maps in the output tensor.
				200	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	201	*
				202	* @return Storage size (in units of TOut) required.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	203	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	204	virtual unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels, bool same_padding) const = 0;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	205
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	206	/** Gets the stride between matrices in the output worspace
				207	*
				208	* @param[in] kernel_shape The shape of the weights tensor.
				209	* @param[in] input_shape The shape of the input tensor.
				210	* @param[in] padding_type The type of padding to be used.
				211	*
				212	* @return Stride expressed in bytes.
				213	*/
				214	virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
				215
				216	/** Get the output shape of a convolution.
				217	*
				218	* @param[in] kernel_shape The shape of the weights tensor.
				219	* @param[in] in_shape The shape of the input tensor.
				220	* @param[in] padding The type of padding to be used.
				221	*
				222	* @return Stride expressed in bytes.
				223	*/
				224	virtual Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const = 0;
				225
				226	/** Configure the output transform kernel.
				227	*
				228	* @param[in] biases Pointer to the biases tensor.
				229	* @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
				230	* @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	231	* @param[out] output_nhwc Pointer to a tensor in NHWC data layout ordered output tensor, in the spatial domain.
				232	* @param[in] num_batches Number of batches in the input tensor.
				233	* @param[in] num_rows Number of rows in output tensor.
				234	* @param[in] num_cols Number of columns in output tensor.
				235	* @param[in] num_channels Number of feature maps in the output tensor.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	236	*/
				237	virtual void configure(
				238	const ITensor *biases,
				239	const T *const output_workingspace,
				240	const int matrix_stride,
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	241	ITensor *const output_nhwc,
				242	const int num_batches,
				243	const int num_rows,
				244	const int num_cols,
				245	const int num_channels) = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	246
				247	virtual ~INEWinogradLayerTransformOutputKernel()
				248	{
				249	}
				250	};
				251
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	252	/** NEON kernel to perform Winograd output transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	253	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
				254	class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel<T>
				255	{
				256	public:
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	257	const char *name() const override
				258	{
				259	return "NEWinogradLayerTransformOutputKernel";
				260	}
				261	/** Constructor */
				262	NEWinogradLayerTransformOutputKernel();
				263
				264	/** Prevent instances of this class from being copied (As this class contains pointers) */
				265	NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
				266	/** Prevent instances of this class from being copied (As this class contains pointers) */
				267	NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
				268	/** Allow instances of this class to be moved */
				269	NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
				270	/** Allow instances of this class to be moved */
				271	NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	272	/** Default destructor */
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	273	~NEWinogradLayerTransformOutputKernel() = default;
				274
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	275	// Inherited methods overridden:
				276	/** Determine how much memory (in units of TOut) to allocate for the
				277	* (Winograd domain) output.
				278	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	279	* @param[in] num_batches Number of batches in the output tensor.
				280	* @param[in] num_rows Number of rows in each feature map of the input tensor.
				281	* @param[in] num_cols Number of columns in each feature map of the input tensor.
				282	* @param[in] num_output_channels Number of feature maps in the output tensor.
				283	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	284	*
				285	* @return Storage size (in units of TOut) required.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	286	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	287	unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels, bool same_padding) const override;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	288
				289	/** Gets the stride between matrices in the output worspace
				290	*
				291	* @param[in] kernel_shape The shape of the weights tensor.
				292	* @param[in] input_shape The shape of the input tensor.
				293	* @param[in] padding_type The type of padding to be used.
				294	*
				295	* @return Stride expressed in bytes.
				296	*/
				297	int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
				298	/** Get the output shape of a convolution.
				299	*
				300	* @param[in] kernel_shape The shape of the weights tensor.
				301	* @param[in] in_shape The shape of the input tensor.
				302	* @param[in] padding The type of padding to be used.
				303	*
				304	* @return Stride expressed in bytes.
				305	*/
				306	Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const override;
				307
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	308	/** Configure the output transform kernel.
				309	*
				310	* @param[in] biases Pointer to the biases tensor.
				311	* @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
				312	* @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	313	* @param[out] output_nhwc Pointer to a tensor with NHWC data layout, in the spatial domain.
				314	* @param[in] num_batches Number of batches in the input tensor.
				315	* @param[in] num_rows Number of rows in output tensor.
				316	* @param[in] num_cols Number of columns in output tensor.
				317	* @param[in] num_channels Number of feature maps in the output tensor.
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	318	*/
				319	void configure(
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	320	const ITensor *biases,
				321	const T *const output_workingspace,
				322	const int matrix_stride,
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	323	ITensor *const output_nhwc,
				324	const int num_batches,
				325	const int num_rows,
				326	const int num_cols,
				327	const int num_channels) override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	328
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	329	void run(const Window &window, const ThreadInfo &info) override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	330
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	331	/** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformOutputKernel
				332	*
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	333	* @param[in] input Source tensor with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F32.
				334	* @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
				335	* @param[out] output Destination tensor with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
				336	* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	337	*
				338	* @return a status
				339	*/
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	340	static Status validate(const ITensorInfo input, const ITensorInfo bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	341
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	342	private:
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	343	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	344	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
				345	using OutputTransform = typename WinogradBase::template OutputTransform<T>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	346
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	347	const ITensor *_biases;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	348	const T *_output_workspace;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	349	int _matrix_stride;
				350	int _matrix_row_stride;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	351	ITensor *_output_nhwc;
				352	int _num_batches;
				353	int _num_rows;
				354	int _num_cols;
				355	int _num_channels;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	356	};
				357
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	358	/** Interface for the NEON kernel to perform Winograd weights transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	359	template <typename T>
				360	class INEWinogradLayerTransformWeightsKernel : public INEKernel
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	361	{
				362	public:
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	363	/** Determine how much memory (in units of T) to allocate for the
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	364	* transformed weights.
				365	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	366	* @param[in] num_output_channels Number of output feature maps.
				367	* @param[in] num_input_channels Number of input feature maps.
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	368	*
				369	* @return Storage size (in units of T) required.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	370	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	371	virtual unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	372	/** Gets the stride between matrices in the kernel worspace
				373	*
				374	* @param[in] kernel_shape The shape of the weights tensor.
				375	*
				376	* @return Stride expressed in bytes.
				377	*/
				378	virtual int get_matrix_stride(const KernelShape &kernel_shape) const = 0;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	379
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	380	/** Configure the weights transform kernel.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	381	*
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	382	* @param[in] weights_hwio Pointer to the weights tensor
				383	* @param[in] output Pointer to working space for the output tensor in the Winograd domain.
				384	* @param[in] matrix_stride Stride across matrices in the output workspace.
				385	* @param[in] num_output_channels Number of filters.
				386	* @param[in] num_input_channels Number of channels in each filter.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	387	*/
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	388
				389	virtual void configure(const ITensor weights_hwio, T const output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	390
				391	virtual ~INEWinogradLayerTransformWeightsKernel()
				392	{
				393	}
				394	};
				395
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	396	/** NEON kernel to perform Winograd weights transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	397	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
				398	class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel<T>
				399	{
				400	public:
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	401	/** Prevent instances of this class from being copied (As this class contains pointers) */
				402	NEWinogradLayerTransformWeightsKernel(const NEWinogradLayerTransformWeightsKernel &) = delete;
				403	/** Prevent instances of this class from being copied (As this class contains pointers) */
				404	NEWinogradLayerTransformWeightsKernel &operator=(const NEWinogradLayerTransformWeightsKernel &) = delete;
				405	/** Allow instances of this class to be moved */
				406	NEWinogradLayerTransformWeightsKernel(NEWinogradLayerTransformWeightsKernel &&) = default;
				407	/** Allow instances of this class to be moved */
				408	NEWinogradLayerTransformWeightsKernel &operator=(NEWinogradLayerTransformWeightsKernel &&) = default;
				409	/** Default destructor */
				410	~NEWinogradLayerTransformWeightsKernel() = default;
				411
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	412	/** Default constructor. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	413	NEWinogradLayerTransformWeightsKernel();
				414	const char *name() const override
				415	{
				416	return "NEWinogradLayerTransformWeightsKernel";
				417	}
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	418
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	419	/** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
				420	*
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	421	* @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
				422	* kernel_x must be 3 and equal to kernel_y. Data types supported: F32.
				423	* @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
				424	* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	425	*
				426	* @return a status
				427	*/
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame]	428	static Status validate(const ITensorInfo input, const ITensorInfo output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	429
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	430	// Inherited methods overridden:
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	431	void configure(const ITensor weights_hwio, T const output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override;
				432	unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	433	int get_matrix_stride(const KernelShape &kernel_shape) const override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	434	void run(const Window &window, const ThreadInfo &info) override;
				435	bool is_parallelisable() const override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	436
				437	private:
				438	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	439	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
				440	using WeightsTransform = typename WinogradBase::template WeightsTransform<T>;
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	441
				442	const ITensor *_weights_hwio;
				443	T *_output;
				444	int _matrix_stride;
				445	int _num_output_channels;
				446	int _num_input_channels;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	447	};
				448
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	449	/** Interface for the NEON kernel to perform Winograd. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	450	template <typename TIn, typename TOut>
				451	class INEWinogradLayerBatchedGEMMKernel : public INEKernel
				452	{
				453	public:
				454	/** Get the number of GEMMs to compute
				455	*/
				456	virtual unsigned int get_number_gemms() const = 0;
				457	/** Initialise the kernel
				458	*
				459	* @param[in] n_gemms Number of GEMMs to compute.
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	460	* @param[in] M in_shape.num_batches * tile_rows * tile_cols.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	461	* @param[in] K Number of channels in the input tensor.
				462	* @param[in] N Number of channels in the output tensor.
				463	* @param[in] a_matrix_stride Stride between input matrices.
				464	* @param[in] a_row_stride Row stride inside input matrix.
				465	* @param[in] b_matrix_stride Stride between weights matrices.
				466	* @param[in] b_row_stride Row stride inside the weights matrix.
				467	* @param[in] c_matrix_stride Stride between output matrices.
				468	* @param[in] c_row_stride Row stride inside the output matrix.
				469	* @param[out] a_ptr Input workspace.
				470	* @param[out] b_ptr Kernel workspace.
				471	* @param[out] c_ptr Output workspace.
				472	*/
				473	virtual void configure(
				474	const unsigned int n_gemms,
				475	const int M, const int K, const int N,
				476	const int a_matrix_stride,
				477	const int a_row_stride,
				478	const int b_matrix_stride,
				479	const int b_row_stride,
				480	const int c_matrix_stride,
				481	const int c_row_stride,
				482	const TIn *const a_ptr,
				483	const TIn *const b_ptr,
				484	TOut *const c_ptr) = 0;
				485
				486	/** Get the number of tiles per row
				487	*/
				488	virtual int get_output_tile_rows() const = 0;
				489	/** Get the number of tiles per columns
				490	*/
				491	virtual int get_output_tile_cols() const = 0;
				492	/** Get the number of blocks
				493	*/
				494	virtual int get_number_blocks() const = 0;
				495	};
				496
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	497	/** NEON kernel to perform Winograd. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	498	template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
				499	class NEWinogradLayerBatchedGEMMKernel : public INEWinogradLayerBatchedGEMMKernel<TIn, TOut>
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	500	{
				501	public:
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	502	/** Winograd base kernel */
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	503	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	504	/** Winograd convolution kernel */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	505	using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	506	/** Winograd batched blocked GEMM operator */
				507	using MultiGEMM = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, TIn, TOut>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	508
Anthony Barbier	e8a4983	2018-01-18 10:04:05 +0000	[diff] [blame]	509	const char *name() const override
				510	{
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	511	return "NEWinogradLayerBatchedGEMMKernel";
Anthony Barbier	e8a4983	2018-01-18 10:04:05 +0000	[diff] [blame]	512	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	513	/** Constructor */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	514	NEWinogradLayerBatchedGEMMKernel();
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	515
				516	/** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	517	NEWinogradLayerBatchedGEMMKernel(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	518	/** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	519	NEWinogradLayerBatchedGEMMKernel &operator=(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	520	/** Allow instances of this class to be moved */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	521	NEWinogradLayerBatchedGEMMKernel(NEWinogradLayerBatchedGEMMKernel &&) = default;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	522	/** Allow instances of this class to be moved */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	523	NEWinogradLayerBatchedGEMMKernel &operator=(NEWinogradLayerBatchedGEMMKernel &&) = default;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	524	/** Default destructor. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	525	~NEWinogradLayerBatchedGEMMKernel() = default;
				526
				527	// Inherited methods overridden:
				528
				529	unsigned int get_number_gemms() const override;
				530	int get_output_tile_rows() const override;
				531	int get_output_tile_cols() const override;
				532	int get_number_blocks() const override;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	533
				534	/** Initialise the kernel
				535	*
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	536	* @param[in] n_gemms Number of GEMMs to compute.
Pablo Tello	7df2786	2018-05-30 11:44:26 +0100	[diff] [blame]	537	* @param[in] M in_shape.num_batches * tile_rows * tile_cols.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	538	* @param[in] K Number of channels in the input tensor.
				539	* @param[in] N Number of channels in the output tensor.
				540	* @param[in] a_matrix_stride Stride between input matrices.
				541	* @param[in] a_row_stride Row stride inside input matrix.
				542	* @param[in] b_matrix_stride Stride between weights matrices.
				543	* @param[in] b_row_stride Row stride inside the weights matrix.
				544	* @param[in] c_matrix_stride Stride between output matrices.
				545	* @param[in] c_row_stride Row stride inside the output matrix.
				546	* @param[out] a_ptr Input workspace.
				547	* @param[out] b_ptr Kernel workspace.
				548	* @param[out] c_ptr Output workspace.
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	549	*/
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	550	void configure(
				551	const unsigned int n_gemms,
				552	const int M, const int K, const int N,
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	553	const int a_matrix_stride,
				554	const int a_row_stride,
				555	const int b_matrix_stride,
				556	const int b_row_stride,
				557	const int c_matrix_stride,
				558	const int c_row_stride,
				559	const TIn *const a_ptr,
				560	const TIn *const b_ptr,
				561	TOut *const c_ptr) override;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	562
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	563	void run(const Window &window, const ThreadInfo &info) override;
				564
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	565	/** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerBatchedGEMMKernel.
				566	*
				567	* @param[in] a First input tensor (Matrix or Vector A). Data types supported: F32
				568	* @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a.
				569	* @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
				570	* @param[out] output Output tensor. Data type supported: same as @p a
				571	* @param[in] alpha Weight of the matrix product
				572	* @param[in] beta Weight of matrix C
				573	* @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
				574	* if the reshape of matrix B should happen only for the first run
				575	*
				576	* @return a status
				577	*/
				578	static Status validate(const ITensorInfo a, const ITensorInfo b, const ITensor c, const ITensorInfo output, const float alpha, const float beta, const GEMMInfo &gemm_info = GEMMInfo());
				579
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	580	private:
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	581	static const int _output_tile_rows = OutputTileRows;
				582	static const int _output_tile_cols = OutputTileCols;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	583	std::unique_ptr<MultiGEMM> _gemms;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	584	};
				585
				586	} // namespace arm_compute
Georgios Pinitas	9fb1159	2018-04-26 20:34:58 +0100	[diff] [blame]	587	#endif /__ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H__/