Blame - arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h - ml/ComputeLibrary

blob: 2f44d19b4fb1f57ac9031ff3762c560988b4b7cb [file] [log] [blame]

Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	1	/*
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	2	* Copyright (c) 2017-2018 ARM Limited.
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#ifndef __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
				25	#define __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
				26
				27	#include "arm_compute/core/NEON/INEKernel.h"
Georgios Pinitas	4074c99	2018-01-30 18:13:46 +0000	[diff] [blame]	28	#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp"
				29	#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp"
				30	#include "arm_compute/core/NEON/kernels/convolution/winograd/batched_blocked_gemm.hpp"
				31	#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	32
				33	namespace arm_compute
				34	{
				35	class ITensor;
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	36
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	37	/** Interface for the NEON kernel to perform Winograd input transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	38	template <typename T>
				39	class INEWinogradLayerTransformInputKernel : public INEKernel
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	40	{
				41	public:
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	42	/** Determine how much memory (in units of TIn) to allocate for the
				43	* transformed input.
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	44	*
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	45	* @param[in] n_batches Number of batches in the input tensor.
				46	* @param[in] n_channels Number of feature maps in the input tensor.
				47	* @param[in] n_rows Number of rows in each feature map.
				48	* @param[in] n_cols Number of columns in each feature map.
				49	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	50	*
				51	* @return Storage size (in units of TIn) required.
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	52	*/
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	53	virtual unsigned int get_input_storage_size(int n_batches, int n_channels, int n_rows, int n_cols, bool same_padding) const = 0;
				54
				55	/** Gets the stride between matrices in the input worspace
				56	*
				57	* @param[in] kernel_shape The shape of the weights tensor.
				58	* @param[in] input_shape The shape of the input tensor.
				59	* @param[in] padding_type The type of padding to be used.
				60	*
				61	* @return Stride expressed in bytes.
				62	*/
				63	virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
				64
				65	/** Configure the output transform kernel.
				66	*
				67	* @param[in] input Input tensor data
				68	* @param[in] n_batches Number of batches in input tensor.
				69	* @param[in] n_rows Number of rows in input tensor.
				70	* @param[in] n_cols Number of columns in input tensor.
				71	* @param[in] n_channels Number of channels in input tensor.
				72	* @param[in] padding Padding type.
				73	* @param[out] output Base of output matrices.
				74	* @param[in] matrix_stride Stride between output matrices.
				75	*/
				76	virtual void configure(const T const input, const int n_batches, const int n_rows, const int n_cols, const int n_channels, const PaddingType padding, T const output, const int matrix_stride) = 0;
				77
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	78	/** Destructor */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	79	virtual ~INEWinogradLayerTransformInputKernel()
				80	{
				81	}
				82	};
				83
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	84	/** NEON kernel to perform Winograd input transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	85	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
				86	class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel<T>
				87	{
				88	public:
				89	/** Determine how much memory (in units of TIn) to allocate for the
				90	* transformed input.
				91	*
				92	* @param[in] n_batches Number of batches in the input tensor.
				93	* @param[in] n_channels Number of feature maps in the input tensor.
				94	* @param[in] n_rows Number of rows in each feature map.
				95	* @param[in] n_cols Number of columns in each feature map.
				96	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	97	*
				98	* @return Storage size (in units of TIn) required.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	99	*/
				100	unsigned int get_input_storage_size(
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	101	int n_batches,
				102	int n_channels,
				103	int n_rows,
				104	int n_cols,
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	105	bool same_padding) const override;
				106
				107	/** Gets the stride between matrices in the input worspace
				108	*
				109	* @param[in] kernel_shape The shape of the weights tensor.
				110	* @param[in] input_shape The shape of the input tensor.
				111	* @param[in] padding_type The type of padding to be used.
				112	*
				113	* @return Stride expressed in bytes.
				114	*/
				115	int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	116
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	117	/** Default constructor */
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	118	NEWinogradLayerTransformInputKernel();
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	119
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	120	const char *name() const override
				121	{
				122	return "NEWinogradLayerTransformInputKernel";
				123	}
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	124
				125	/** Configure the output transform kernel.
				126	*
				127	* @param[in] input Input tensor data
				128	* @param[in] n_batches Number of batches in input tensor.
				129	* @param[in] n_rows Number of rows in input tensor.
				130	* @param[in] n_cols Number of columns in input tensor.
				131	* @param[in] n_channels Number of channels in input tensor.
				132	* @param[in] padding Padding type.
				133	* @param[out] output Base of output matrices.
				134	* @param[in] matrix_stride Stride between output matrices.
				135	*/
				136	void configure(
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	137	const T *const input,
				138	const int n_batches,
				139	const int n_rows,
				140	const int n_cols,
				141	const int n_channels,
				142	const PaddingType padding,
				143	T *const output,
				144	const int matrix_stride) override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	145
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	146	// Inherited methods overridden:
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	147	void run(const Window &window, const ThreadInfo &info) override;
				148	bool is_parallelisable() const override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	149
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	150	/** Winograd base kernel */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	151	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelCols, KernelCols>;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	152	/** Winograd convolution kernel */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	153	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
				154
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	155	private:
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	156	using InputTransform = typename WinogradBase::template InputTransform<T>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	157	std::unique_ptr<InputTransform> _transform;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	158	};
				159
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	160	/** Interface for the NEON kernel to perform Winograd output transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	161	template <typename T>
				162	class INEWinogradLayerTransformOutputKernel : public INEKernel
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	163	{
				164	public:
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	165	/** Determine how much memory (in units of TOut) to allocate for the
				166	* (Winograd domain) output.
				167	*
				168	* @param[in] n_batches Number of batches in the output tensor.
				169	* @param[in] n_rows Number of rows in each feature map of the input tensor.
				170	* @param[in] n_cols Number of columns in each feature map of the input tensor.
				171	* @param[in] n_output_channels Number of feature maps in the output tensor.
				172	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	173	*
				174	* @return Storage size (in units of TOut) required.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	175	*/
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	176	virtual unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const = 0;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	177
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	178	/** Gets the stride between matrices in the output worspace
				179	*
				180	* @param[in] kernel_shape The shape of the weights tensor.
				181	* @param[in] input_shape The shape of the input tensor.
				182	* @param[in] padding_type The type of padding to be used.
				183	*
				184	* @return Stride expressed in bytes.
				185	*/
				186	virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
				187
				188	/** Get the output shape of a convolution.
				189	*
				190	* @param[in] kernel_shape The shape of the weights tensor.
				191	* @param[in] in_shape The shape of the input tensor.
				192	* @param[in] padding The type of padding to be used.
				193	*
				194	* @return Stride expressed in bytes.
				195	*/
				196	virtual Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const = 0;
				197
				198	/** Configure the output transform kernel.
				199	*
				200	* @param[in] biases Pointer to the biases tensor.
				201	* @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
				202	* @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
				203	* @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
				204	* @param[in] n_batches Number of batches in the input tensor.
				205	* @param[in] n_rows Number of rows in output tensor.
				206	* @param[in] n_cols Number of columns in output tensor.
				207	* @param[in] n_channels Number of feature maps in the output tensor.
				208	*/
				209	virtual void configure(
				210	const ITensor *biases,
				211	const T *const output_workingspace,
				212	const int matrix_stride,
				213	T *const output,
				214	const int n_batches,
				215	const int n_rows,
				216	const int n_cols,
				217	const int n_channels) = 0;
				218
				219	virtual ~INEWinogradLayerTransformOutputKernel()
				220	{
				221	}
				222	};
				223
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	224	/** NEON kernel to perform Winograd output transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	225	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
				226	class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel<T>
				227	{
				228	public:
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	229	const char *name() const override
				230	{
				231	return "NEWinogradLayerTransformOutputKernel";
				232	}
				233	/** Constructor */
				234	NEWinogradLayerTransformOutputKernel();
				235
				236	/** Prevent instances of this class from being copied (As this class contains pointers) */
				237	NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
				238	/** Prevent instances of this class from being copied (As this class contains pointers) */
				239	NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
				240	/** Allow instances of this class to be moved */
				241	NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
				242	/** Allow instances of this class to be moved */
				243	NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	244	/** Default destructor */
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	245	~NEWinogradLayerTransformOutputKernel() = default;
				246
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	247	// Inherited methods overridden:
				248	/** Determine how much memory (in units of TOut) to allocate for the
				249	* (Winograd domain) output.
				250	*
				251	* @param[in] n_batches Number of batches in the output tensor.
				252	* @param[in] n_rows Number of rows in each feature map of the input tensor.
				253	* @param[in] n_cols Number of columns in each feature map of the input tensor.
				254	* @param[in] n_output_channels Number of feature maps in the output tensor.
				255	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	256	*
				257	* @return Storage size (in units of TOut) required.
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	258	*/
				259	unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const override;
				260
				261	/** Gets the stride between matrices in the output worspace
				262	*
				263	* @param[in] kernel_shape The shape of the weights tensor.
				264	* @param[in] input_shape The shape of the input tensor.
				265	* @param[in] padding_type The type of padding to be used.
				266	*
				267	* @return Stride expressed in bytes.
				268	*/
				269	int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
				270	/** Get the output shape of a convolution.
				271	*
				272	* @param[in] kernel_shape The shape of the weights tensor.
				273	* @param[in] in_shape The shape of the input tensor.
				274	* @param[in] padding The type of padding to be used.
				275	*
				276	* @return Stride expressed in bytes.
				277	*/
				278	Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const override;
				279
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	280	/** Configure the output transform kernel.
				281	*
				282	* @param[in] biases Pointer to the biases tensor.
				283	* @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
				284	* @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
				285	* @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
				286	* @param[in] n_batches Number of batches in the input tensor.
				287	* @param[in] n_rows Number of rows in output tensor.
				288	* @param[in] n_cols Number of columns in output tensor.
				289	* @param[in] n_channels Number of feature maps in the output tensor.
				290	*/
				291	void configure(
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	292	const ITensor *biases,
				293	const T *const output_workingspace,
				294	const int matrix_stride,
				295	T *const output,
				296	const int n_batches,
				297	const int n_rows,
				298	const int n_cols,
				299	const int n_channels) override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	300
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	301	void run(const Window &window, const ThreadInfo &info) override;
				302	bool is_parallelisable() const override;
				303
				304	private:
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	305	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	306	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
				307	using OutputTransform = typename WinogradBase::template OutputTransform<T>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	308
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	309	const ITensor *_biases;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	310	const T *_output_workspace;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	311	int _matrix_stride;
				312	int _matrix_row_stride;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	313	T *_output;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	314	int _n_batches;
				315	int _n_rows;
				316	int _n_cols;
				317	int _n_channels;
				318	};
				319
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	320	/** Interface for the NEON kernel to perform Winograd weights transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	321	template <typename T>
				322	class INEWinogradLayerTransformWeightsKernel : public INEKernel
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	323	{
				324	public:
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	325	/** Determine how much memory (in units of T) to allocate for the
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	326	* transformed weights.
				327	*
				328	* @param[in] n_output_channels Number of output feature maps.
				329	* @param[in] n_input_channels Number of input feature maps.
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	330	*
				331	* @return Storage size (in units of T) required.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	332	*/
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	333	virtual unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const = 0;
				334	/** Gets the stride between matrices in the kernel worspace
				335	*
				336	* @param[in] kernel_shape The shape of the weights tensor.
				337	*
				338	* @return Stride expressed in bytes.
				339	*/
				340	virtual int get_matrix_stride(const KernelShape &kernel_shape) const = 0;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	341
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	342	/** Configure the weights transform kernel.
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	343	*
				344	* @param[in] weights_hwio Pointer to the weights tensor
				345	* @param[in] output Pointer to working space for the output tensor in the Winograd domain.
				346	* @param[in] matrix_stride Stride across matrices in the output workspace.
				347	* @param[in] n_output_channels Number of filters.
				348	* @param[in] n_input_channels Number of channels in each filter.
				349	*/
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	350	virtual void configure(const ITensor weights_hwio, T const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) = 0;
				351
				352	virtual ~INEWinogradLayerTransformWeightsKernel()
				353	{
				354	}
				355	};
				356
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	357	/** NEON kernel to perform Winograd weights transform. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	358	template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
				359	class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel<T>
				360	{
				361	public:
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	362	/** Default constructor. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	363	NEWinogradLayerTransformWeightsKernel();
				364	const char *name() const override
				365	{
				366	return "NEWinogradLayerTransformWeightsKernel";
				367	}
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	368
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	369	// Inherited methods overridden:
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	370	void configure(const ITensor weights_hwio, T const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) override;
				371	unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const override;
				372	int get_matrix_stride(const KernelShape &kernel_shape) const override;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	373	void run(const Window &window, const ThreadInfo &info) override;
				374	bool is_parallelisable() const override;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	375
				376	private:
				377	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	378	using WinogradConv = typename WinogradBase::template Convolution<T, T>;
				379	using WeightsTransform = typename WinogradBase::template WeightsTransform<T>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	380	std::unique_ptr<WeightsTransform> _transform;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame]	381	};
				382
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	383	/** Interface for the NEON kernel to perform Winograd. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	384	template <typename TIn, typename TOut>
				385	class INEWinogradLayerBatchedGEMMKernel : public INEKernel
				386	{
				387	public:
				388	/** Get the number of GEMMs to compute
				389	*/
				390	virtual unsigned int get_number_gemms() const = 0;
				391	/** Initialise the kernel
				392	*
				393	* @param[in] n_gemms Number of GEMMs to compute.
				394	* @param[in] M in_shape.n_batches * tile_rows * tile_cols.
				395	* @param[in] K Number of channels in the input tensor.
				396	* @param[in] N Number of channels in the output tensor.
				397	* @param[in] a_matrix_stride Stride between input matrices.
				398	* @param[in] a_row_stride Row stride inside input matrix.
				399	* @param[in] b_matrix_stride Stride between weights matrices.
				400	* @param[in] b_row_stride Row stride inside the weights matrix.
				401	* @param[in] c_matrix_stride Stride between output matrices.
				402	* @param[in] c_row_stride Row stride inside the output matrix.
				403	* @param[out] a_ptr Input workspace.
				404	* @param[out] b_ptr Kernel workspace.
				405	* @param[out] c_ptr Output workspace.
				406	*/
				407	virtual void configure(
				408	const unsigned int n_gemms,
				409	const int M, const int K, const int N,
				410	const int a_matrix_stride,
				411	const int a_row_stride,
				412	const int b_matrix_stride,
				413	const int b_row_stride,
				414	const int c_matrix_stride,
				415	const int c_row_stride,
				416	const TIn *const a_ptr,
				417	const TIn *const b_ptr,
				418	TOut *const c_ptr) = 0;
				419
				420	/** Get the number of tiles per row
				421	*/
				422	virtual int get_output_tile_rows() const = 0;
				423	/** Get the number of tiles per columns
				424	*/
				425	virtual int get_output_tile_cols() const = 0;
				426	/** Get the number of blocks
				427	*/
				428	virtual int get_number_blocks() const = 0;
				429	};
				430
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	431	/** NEON kernel to perform Winograd. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	432	template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
				433	class NEWinogradLayerBatchedGEMMKernel : public INEWinogradLayerBatchedGEMMKernel<TIn, TOut>
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	434	{
				435	public:
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	436	/** Winograd base kernel */
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	437	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	438	/** Winograd convolution kernel */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	439	using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	440	/** Winograd batched blocked GEMM operator */
				441	using MultiGEMM = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, TIn, TOut>;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	442
Anthony Barbier	e8a4983	2018-01-18 10:04:05 +0000	[diff] [blame]	443	const char *name() const override
				444	{
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	445	return "NEWinogradLayerBatchedGEMMKernel";
Anthony Barbier	e8a4983	2018-01-18 10:04:05 +0000	[diff] [blame]	446	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	447	/** Constructor */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	448	NEWinogradLayerBatchedGEMMKernel();
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	449
				450	/** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	451	NEWinogradLayerBatchedGEMMKernel(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	452	/** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	453	NEWinogradLayerBatchedGEMMKernel &operator=(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	454	/** Allow instances of this class to be moved */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	455	NEWinogradLayerBatchedGEMMKernel(NEWinogradLayerBatchedGEMMKernel &&) = default;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	456	/** Allow instances of this class to be moved */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	457	NEWinogradLayerBatchedGEMMKernel &operator=(NEWinogradLayerBatchedGEMMKernel &&) = default;
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	458	/** Default destructor. */
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	459	~NEWinogradLayerBatchedGEMMKernel() = default;
				460
				461	// Inherited methods overridden:
				462
				463	unsigned int get_number_gemms() const override;
				464	int get_output_tile_rows() const override;
				465	int get_output_tile_cols() const override;
				466	int get_number_blocks() const override;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	467
				468	/** Initialise the kernel
				469	*
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	470	* @param[in] n_gemms Number of GEMMs to compute.
				471	* @param[in] M in_shape.n_batches * tile_rows * tile_cols.
				472	* @param[in] K Number of channels in the input tensor.
				473	* @param[in] N Number of channels in the output tensor.
				474	* @param[in] a_matrix_stride Stride between input matrices.
				475	* @param[in] a_row_stride Row stride inside input matrix.
				476	* @param[in] b_matrix_stride Stride between weights matrices.
				477	* @param[in] b_row_stride Row stride inside the weights matrix.
				478	* @param[in] c_matrix_stride Stride between output matrices.
				479	* @param[in] c_row_stride Row stride inside the output matrix.
				480	* @param[out] a_ptr Input workspace.
				481	* @param[out] b_ptr Kernel workspace.
				482	* @param[out] c_ptr Output workspace.
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	483	*/
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	484	void configure(
				485	const unsigned int n_gemms,
				486	const int M, const int K, const int N,
Pablo Tello	f6c572c	2018-02-14 12:47:30 +0000	[diff] [blame]	487	const int a_matrix_stride,
				488	const int a_row_stride,
				489	const int b_matrix_stride,
				490	const int b_row_stride,
				491	const int c_matrix_stride,
				492	const int c_row_stride,
				493	const TIn *const a_ptr,
				494	const TIn *const b_ptr,
				495	TOut *const c_ptr) override;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	496
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	497	void run(const Window &window, const ThreadInfo &info) override;
				498
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	499	private:
Alex Gilday	c357c47	2018-03-21 13:54:09 +0000	[diff] [blame^]	500	static const int _output_tile_rows = OutputTileRows;
				501	static const int _output_tile_cols = OutputTileCols;
Pablo Tello	52140b4	2018-01-30 14:48:11 +0000	[diff] [blame]	502	std::unique_ptr<MultiGEMM> _gemms;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	503	};
				504
				505	} // namespace arm_compute
				506	#endif /__ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__/