Blame - arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h - ml/ComputeLibrary

blob: 7284f9fdc4a9b2885a70ceb3c8d0f352b382b181 [file] [log] [blame]

Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	1	/*
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	2	* Copyright (c) 2017-2018 ARM Limited.
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#ifndef __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
				25	#define __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
				26
				27	#include "arm_compute/core/NEON/INEKernel.h"
Georgios Pinitas	4074c99	2018-01-30 18:13:46 +0000	[diff] [blame]	28	#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp"
				29	#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp"
				30	#include "arm_compute/core/NEON/kernels/convolution/winograd/batched_blocked_gemm.hpp"
				31	#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	32
				33	namespace arm_compute
				34	{
				35	class ITensor;
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	36
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	37	/** Interface for the NEON kernel to perform Winograd input transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	38	template <typename T>
				39	class INEWinogradLayerTransformInputKernel : public INEKernel
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	40	{
				41	public:
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	42	/** Determine how much memory (in units of TIn) to allocate for the
				43	* transformed input.
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	44	*
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	45	* @param[in] n_batches Number of batches in the input tensor.
				46	* @param[in] n_channels Number of feature maps in the input tensor.
				47	* @param[in] n_rows Number of rows in each feature map.
				48	* @param[in] n_cols Number of columns in each feature map.
				49	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	50	*
				51	* @return Storage size (in units of TIn) required.
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	52	*/
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	53	virtual unsigned int get_input_storage_size(int n_batches, int n_channels, int n_rows, int n_cols, bool same_padding) const = 0;
				54
				55	/** Gets the stride between matrices in the input worspace
				56	*
				57	* @param[in] kernel_shape The shape of the weights tensor.
				58	* @param[in] input_shape The shape of the input tensor.
				59	* @param[in] padding_type The type of padding to be used.
				60	*
				61	* @return Stride expressed in bytes.
				62	*/
				63	virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
				64
				65	/** Configure the output transform kernel.
				66	*
				67	* @param[in] input Input tensor data
				68	* @param[in] n_batches Number of batches in input tensor.
				69	* @param[in] n_rows Number of rows in input tensor.
				70	* @param[in] n_cols Number of columns in input tensor.
				71	* @param[in] n_channels Number of channels in input tensor.
				72	* @param[in] padding Padding type.
				73	* @param[out] output Base of output matrices.
				74	* @param[in] matrix_stride Stride between output matrices.
				75	*/
				76	virtual void configure(const T const input, const int n_batches, const int n_rows, const int n_cols, const int n_channels, const PaddingType padding, T const output, const int matrix_stride) = 0;
				77
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	78	/** Destructor */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	79	virtual ~INEWinogradLayerTransformInputKernel()
				80	{
				81	}
				82	};
				83
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	84	/** NEON kernel to perform Winograd input transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	85	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
				86	class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel<T>
				87	{
				88	public:
				89	/** Determine how much memory (in units of TIn) to allocate for the
				90	* transformed input.
				91	*
				92	* @param[in] n_batches Number of batches in the input tensor.
				93	* @param[in] n_channels Number of feature maps in the input tensor.
				94	* @param[in] n_rows Number of rows in each feature map.
				95	* @param[in] n_cols Number of columns in each feature map.
				96	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	97	*
				98	* @return Storage size (in units of TIn) required.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	99	*/
				100	unsigned int get_input_storage_size(
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	101	int n_batches,
				102	int n_channels,
				103	int n_rows,
				104	int n_cols,
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	105	bool same_padding) const override;
				106
				107	/** Gets the stride between matrices in the input worspace
				108	*
				109	* @param[in] kernel_shape The shape of the weights tensor.
				110	* @param[in] input_shape The shape of the input tensor.
				111	* @param[in] padding_type The type of padding to be used.
				112	*
				113	* @return Stride expressed in bytes.
				114	*/
				115	int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	116
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	117	/** Default constructor */
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	118	NEWinogradLayerTransformInputKernel();
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	119
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	120	const char *name() const override
				121	{
				122	return "NEWinogradLayerTransformInputKernel";
				123	}
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	124
				125	/** Configure the output transform kernel.
				126	*
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	127	* @param[in] input Input tensor data. Data types supported: F32.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	128	* @param[in] n_batches Number of batches in input tensor.
				129	* @param[in] n_rows Number of rows in input tensor.
				130	* @param[in] n_cols Number of columns in input tensor.
				131	* @param[in] n_channels Number of channels in input tensor.
				132	* @param[in] padding Padding type.
				133	* @param[out] output Base of output matrices.
				134	* @param[in] matrix_stride Stride between output matrices.
				135	*/
				136	void configure(
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	137	const T *const input,
				138	const int n_batches,
				139	const int n_rows,
				140	const int n_cols,
				141	const int n_channels,
				142	const PaddingType padding,
				143	T *const output,
				144	const int matrix_stride) override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	145
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	146	// Inherited methods overridden:
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	147	void run(const Window &window, const ThreadInfo &info) override;
				148	bool is_parallelisable() const override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	149
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	150	/** Winograd base kernel */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	151	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelCols, KernelCols>;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	152	/** Winograd convolution kernel */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	153	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
				154
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	155	/** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformInputKernel
				156	*
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame^]	157	* @param[in] input First tensor input info. Data types supported: F32.
				158	* @param[in] output Output tensor info. Data types supported: same as @p input.
				159	* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	160	*
				161	* @return a status
				162	*/
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame^]	163	static Status validate(const ITensorInfo input, const ITensorInfo output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	164
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	165	private:
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	166	using InputTransform = typename WinogradBase::template InputTransform<T>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	167	std::unique_ptr<InputTransform> _transform;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	168	};
				169
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	170	/** Interface for the NEON kernel to perform Winograd output transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	171	template <typename T>
				172	class INEWinogradLayerTransformOutputKernel : public INEKernel
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	173	{
				174	public:
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	175	/** Determine how much memory (in units of TOut) to allocate for the
				176	* (Winograd domain) output.
				177	*
				178	* @param[in] n_batches Number of batches in the output tensor.
				179	* @param[in] n_rows Number of rows in each feature map of the input tensor.
				180	* @param[in] n_cols Number of columns in each feature map of the input tensor.
				181	* @param[in] n_output_channels Number of feature maps in the output tensor.
				182	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	183	*
				184	* @return Storage size (in units of TOut) required.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	185	*/
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	186	virtual unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const = 0;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	187
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	188	/** Gets the stride between matrices in the output worspace
				189	*
				190	* @param[in] kernel_shape The shape of the weights tensor.
				191	* @param[in] input_shape The shape of the input tensor.
				192	* @param[in] padding_type The type of padding to be used.
				193	*
				194	* @return Stride expressed in bytes.
				195	*/
				196	virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
				197
				198	/** Get the output shape of a convolution.
				199	*
				200	* @param[in] kernel_shape The shape of the weights tensor.
				201	* @param[in] in_shape The shape of the input tensor.
				202	* @param[in] padding The type of padding to be used.
				203	*
				204	* @return Stride expressed in bytes.
				205	*/
				206	virtual Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const = 0;
				207
				208	/** Configure the output transform kernel.
				209	*
				210	* @param[in] biases Pointer to the biases tensor.
				211	* @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
				212	* @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
				213	* @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
				214	* @param[in] n_batches Number of batches in the input tensor.
				215	* @param[in] n_rows Number of rows in output tensor.
				216	* @param[in] n_cols Number of columns in output tensor.
				217	* @param[in] n_channels Number of feature maps in the output tensor.
				218	*/
				219	virtual void configure(
				220	const ITensor *biases,
				221	const T *const output_workingspace,
				222	const int matrix_stride,
				223	T *const output,
				224	const int n_batches,
				225	const int n_rows,
				226	const int n_cols,
				227	const int n_channels) = 0;
				228
				229	virtual ~INEWinogradLayerTransformOutputKernel()
				230	{
				231	}
				232	};
				233
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	234	/** NEON kernel to perform Winograd output transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	235	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
				236	class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel<T>
				237	{
				238	public:
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	239	const char *name() const override
				240	{
				241	return "NEWinogradLayerTransformOutputKernel";
				242	}
				243	/** Constructor */
				244	NEWinogradLayerTransformOutputKernel();
				245
				246	/** Prevent instances of this class from being copied (As this class contains pointers) */
				247	NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
				248	/** Prevent instances of this class from being copied (As this class contains pointers) */
				249	NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
				250	/** Allow instances of this class to be moved */
				251	NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
				252	/** Allow instances of this class to be moved */
				253	NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	254	/** Default destructor */
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	255	~NEWinogradLayerTransformOutputKernel() = default;
				256
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	257	// Inherited methods overridden:
				258	/** Determine how much memory (in units of TOut) to allocate for the
				259	* (Winograd domain) output.
				260	*
				261	* @param[in] n_batches Number of batches in the output tensor.
				262	* @param[in] n_rows Number of rows in each feature map of the input tensor.
				263	* @param[in] n_cols Number of columns in each feature map of the input tensor.
				264	* @param[in] n_output_channels Number of feature maps in the output tensor.
				265	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	266	*
				267	* @return Storage size (in units of TOut) required.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	268	*/
				269	unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const override;
				270
				271	/** Gets the stride between matrices in the output worspace
				272	*
				273	* @param[in] kernel_shape The shape of the weights tensor.
				274	* @param[in] input_shape The shape of the input tensor.
				275	* @param[in] padding_type The type of padding to be used.
				276	*
				277	* @return Stride expressed in bytes.
				278	*/
				279	int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
				280	/** Get the output shape of a convolution.
				281	*
				282	* @param[in] kernel_shape The shape of the weights tensor.
				283	* @param[in] in_shape The shape of the input tensor.
				284	* @param[in] padding The type of padding to be used.
				285	*
				286	* @return Stride expressed in bytes.
				287	*/
				288	Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const override;
				289
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	290	/** Configure the output transform kernel.
				291	*
				292	* @param[in] biases Pointer to the biases tensor.
				293	* @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
				294	* @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
				295	* @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
				296	* @param[in] n_batches Number of batches in the input tensor.
				297	* @param[in] n_rows Number of rows in output tensor.
				298	* @param[in] n_cols Number of columns in output tensor.
				299	* @param[in] n_channels Number of feature maps in the output tensor.
				300	*/
				301	void configure(
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	302	const ITensor *biases,
				303	const T *const output_workingspace,
				304	const int matrix_stride,
				305	T *const output,
				306	const int n_batches,
				307	const int n_rows,
				308	const int n_cols,
				309	const int n_channels) override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	310
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	311	void run(const Window &window, const ThreadInfo &info) override;
				312	bool is_parallelisable() const override;
				313
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	314	/** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformOutputKernel
				315	*
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame^]	316	* @param[in] input Source tensor with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F32.
				317	* @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
				318	* @param[out] output Destination tensor with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
				319	* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	320	*
				321	* @return a status
				322	*/
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame^]	323	static Status validate(const ITensorInfo input, const ITensorInfo bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	324
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	325	private:
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	326	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	327	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
				328	using OutputTransform = typename WinogradBase::template OutputTransform<T>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	329
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	330	const ITensor *_biases;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	331	const T *_output_workspace;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	332	int _matrix_stride;
				333	int _matrix_row_stride;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	334	T *_output;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	335	int _n_batches;
				336	int _n_rows;
				337	int _n_cols;
				338	int _n_channels;
				339	};
				340
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	341	/** Interface for the NEON kernel to perform Winograd weights transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	342	template <typename T>
				343	class INEWinogradLayerTransformWeightsKernel : public INEKernel
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	344	{
				345	public:
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	346	/** Determine how much memory (in units of T) to allocate for the
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	347	* transformed weights.
				348	*
				349	* @param[in] n_output_channels Number of output feature maps.
				350	* @param[in] n_input_channels Number of input feature maps.
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	351	*
				352	* @return Storage size (in units of T) required.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	353	*/
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	354	virtual unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const = 0;
				355	/** Gets the stride between matrices in the kernel worspace
				356	*
				357	* @param[in] kernel_shape The shape of the weights tensor.
				358	*
				359	* @return Stride expressed in bytes.
				360	*/
				361	virtual int get_matrix_stride(const KernelShape &kernel_shape) const = 0;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	362
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	363	/** Configure the weights transform kernel.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	364	*
				365	* @param[in] weights_hwio Pointer to the weights tensor
				366	* @param[in] output Pointer to working space for the output tensor in the Winograd domain.
				367	* @param[in] matrix_stride Stride across matrices in the output workspace.
				368	* @param[in] n_output_channels Number of filters.
				369	* @param[in] n_input_channels Number of channels in each filter.
				370	*/
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	371	virtual void configure(const ITensor weights_hwio, T const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) = 0;
				372
				373	virtual ~INEWinogradLayerTransformWeightsKernel()
				374	{
				375	}
				376	};
				377
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	378	/** NEON kernel to perform Winograd weights transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	379	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
				380	class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel<T>
				381	{
				382	public:
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	383	/** Default constructor. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	384	NEWinogradLayerTransformWeightsKernel();
				385	const char *name() const override
				386	{
				387	return "NEWinogradLayerTransformWeightsKernel";
				388	}
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	389
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	390	/** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
				391	*
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame^]	392	* @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
				393	* kernel_x must be 3 and equal to kernel_y. Data types supported: F32.
				394	* @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
				395	* @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	396	*
				397	* @return a status
				398	*/
Vidhya Sudhan Loganathan	84ce1f9	2018-04-25 13:00:09 +0100	[diff] [blame^]	399	static Status validate(const ITensorInfo input, const ITensorInfo output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	400
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	401	// Inherited methods overridden:
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	402	void configure(const ITensor weights_hwio, T const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) override;
				403	unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const override;
				404	int get_matrix_stride(const KernelShape &kernel_shape) const override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	405	void run(const Window &window, const ThreadInfo &info) override;
				406	bool is_parallelisable() const override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	407
				408	private:
				409	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	410	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
				411	using WeightsTransform = typename WinogradBase::template WeightsTransform<T>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	412	std::unique_ptr<WeightsTransform> _transform;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	413	};
				414
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	415	/** Interface for the NEON kernel to perform Winograd. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	416	template <typename TIn, typename TOut>
				417	class INEWinogradLayerBatchedGEMMKernel : public INEKernel
				418	{
				419	public:
				420	/** Get the number of GEMMs to compute
				421	*/
				422	virtual unsigned int get_number_gemms() const = 0;
				423	/** Initialise the kernel
				424	*
				425	* @param[in] n_gemms Number of GEMMs to compute.
				426	* @param[in] M in_shape.n_batches * tile_rows * tile_cols.
				427	* @param[in] K Number of channels in the input tensor.
				428	* @param[in] N Number of channels in the output tensor.
				429	* @param[in] a_matrix_stride Stride between input matrices.
				430	* @param[in] a_row_stride Row stride inside input matrix.
				431	* @param[in] b_matrix_stride Stride between weights matrices.
				432	* @param[in] b_row_stride Row stride inside the weights matrix.
				433	* @param[in] c_matrix_stride Stride between output matrices.
				434	* @param[in] c_row_stride Row stride inside the output matrix.
				435	* @param[out] a_ptr Input workspace.
				436	* @param[out] b_ptr Kernel workspace.
				437	* @param[out] c_ptr Output workspace.
				438	*/
				439	virtual void configure(
				440	const unsigned int n_gemms,
				441	const int M, const int K, const int N,
				442	const int a_matrix_stride,
				443	const int a_row_stride,
				444	const int b_matrix_stride,
				445	const int b_row_stride,
				446	const int c_matrix_stride,
				447	const int c_row_stride,
				448	const TIn *const a_ptr,
				449	const TIn *const b_ptr,
				450	TOut *const c_ptr) = 0;
				451
				452	/** Get the number of tiles per row
				453	*/
				454	virtual int get_output_tile_rows() const = 0;
				455	/** Get the number of tiles per columns
				456	*/
				457	virtual int get_output_tile_cols() const = 0;
				458	/** Get the number of blocks
				459	*/
				460	virtual int get_number_blocks() const = 0;
				461	};
				462
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	463	/** NEON kernel to perform Winograd. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	464	template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
				465	class NEWinogradLayerBatchedGEMMKernel : public INEWinogradLayerBatchedGEMMKernel<TIn, TOut>
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	466	{
				467	public:
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	468	/** Winograd base kernel */
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	469	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	470	/** Winograd convolution kernel */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	471	using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	472	/** Winograd batched blocked GEMM operator */
				473	using MultiGEMM = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, TIn, TOut>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	474
Anthony Barbier	e8a4983	2018-01-18 10:04:05 +0000	[diff] [blame]	475	const char *name() const override
				476	{
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	477	return "NEWinogradLayerBatchedGEMMKernel";
Anthony Barbier	e8a4983	2018-01-18 10:04:05 +0000	[diff] [blame]	478	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	479	/** Constructor */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	480	NEWinogradLayerBatchedGEMMKernel();
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	481
				482	/** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	483	NEWinogradLayerBatchedGEMMKernel(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	484	/** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	485	NEWinogradLayerBatchedGEMMKernel &operator=(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	486	/** Allow instances of this class to be moved */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	487	NEWinogradLayerBatchedGEMMKernel(NEWinogradLayerBatchedGEMMKernel &&) = default;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	488	/** Allow instances of this class to be moved */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	489	NEWinogradLayerBatchedGEMMKernel &operator=(NEWinogradLayerBatchedGEMMKernel &&) = default;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	490	/** Default destructor. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	491	~NEWinogradLayerBatchedGEMMKernel() = default;
				492
				493	// Inherited methods overridden:
				494
				495	unsigned int get_number_gemms() const override;
				496	int get_output_tile_rows() const override;
				497	int get_output_tile_cols() const override;
				498	int get_number_blocks() const override;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	499
				500	/** Initialise the kernel
				501	*
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	502	* @param[in] n_gemms Number of GEMMs to compute.
				503	* @param[in] M in_shape.n_batches * tile_rows * tile_cols.
				504	* @param[in] K Number of channels in the input tensor.
				505	* @param[in] N Number of channels in the output tensor.
				506	* @param[in] a_matrix_stride Stride between input matrices.
				507	* @param[in] a_row_stride Row stride inside input matrix.
				508	* @param[in] b_matrix_stride Stride between weights matrices.
				509	* @param[in] b_row_stride Row stride inside the weights matrix.
				510	* @param[in] c_matrix_stride Stride between output matrices.
				511	* @param[in] c_row_stride Row stride inside the output matrix.
				512	* @param[out] a_ptr Input workspace.
				513	* @param[out] b_ptr Kernel workspace.
				514	* @param[out] c_ptr Output workspace.
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	515	*/
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	516	void configure(
				517	const unsigned int n_gemms,
				518	const int M, const int K, const int N,
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	519	const int a_matrix_stride,
				520	const int a_row_stride,
				521	const int b_matrix_stride,
				522	const int b_row_stride,
				523	const int c_matrix_stride,
				524	const int c_row_stride,
				525	const TIn *const a_ptr,
				526	const TIn *const b_ptr,
				527	TOut *const c_ptr) override;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	528
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	529	void run(const Window &window, const ThreadInfo &info) override;
				530
Vidhya Sudhan Loganathan	3ca9786	2018-04-23 08:20:04 +0100	[diff] [blame]	531	/** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerBatchedGEMMKernel.
				532	*
				533	* @param[in] a First input tensor (Matrix or Vector A). Data types supported: F32
				534	* @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a.
				535	* @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
				536	* @param[out] output Output tensor. Data type supported: same as @p a
				537	* @param[in] alpha Weight of the matrix product
				538	* @param[in] beta Weight of matrix C
				539	* @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
				540	* if the reshape of matrix B should happen only for the first run
				541	*
				542	* @return a status
				543	*/
				544	static Status validate(const ITensorInfo a, const ITensorInfo b, const ITensor c, const ITensorInfo output, const float alpha, const float beta, const GEMMInfo &gemm_info = GEMMInfo());
				545
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	546	private:
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame]	547	static const int _output_tile_rows = OutputTileRows;
				548	static const int _output_tile_cols = OutputTileCols;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	549	std::unique_ptr<MultiGEMM> _gemms;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	550	};
				551
				552	} // namespace arm_compute
				553	#endif /__ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__/