Blame - arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h - ml/ComputeLibrary

blob: ea6c8d813d8617db886663336c5da43dd9196622 [file] [log] [blame]

Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	1	/*
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	2	* Copyright (c) 2017-2018 ARM Limited.
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24	#ifndef __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
				25	#define __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
				26
				27	#include "arm_compute/core/NEON/INEKernel.h"
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	28	#include "arm_compute/core/NEON/kernels/winograd/convolution.hpp"
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	29	#include "arm_compute/core/NEON/kernels/winograd/tensor.hpp"
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	30
				31	namespace arm_compute
				32	{
				33	class ITensor;
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	34	class NEWinogradLayerKernel;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame^]	35	class NEWinogradLayerTransformInputKernel;
				36	class NEWinogradLayerTransformWeightsKernel;
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	37
Pablo Tello	4e2c139	2018-01-09 10:30:27 +0000	[diff] [blame]	38	class Winograd3x3F32 final
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	39	{
				40	public:
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	41	/** Create a new Winograd convolution layer.
				42	*
				43	* @param[in] n_batches Number of batches in the input and output tensors.
				44	* @param[in] n_input_channels Number of feature maps in a batch of the input tensor.
				45	* @param[in] n_input_rows Number of rows in a feature map of the input tensor.
				46	* @param[in] n_input_cols Number of columns in a feature map of the input tensor.
				47	* @param[in] n_output_channels Number of feature maps in the output tensor.
				48	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
				49	* @param[in] weights Pointer to weight tensor in spatial domain. Must be ordered as "Height x Rows x Input Feature Maps x Output Feature Maps.
				50	* @param[out] weights_storage Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size
				51	* @param[in] input Pointer to NHWC ordered input tensor, in the spatial domain.
				52	* @param[out] winograd_input Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`.
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame^]	53	* @param[in] biases Pointer to the biases vector.
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	54	* @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
				55	* @param[out] winograd_output Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`.
				56	*/
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	57	friend class NEWinogradLayerKernel;
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame^]	58	friend class NEWinogradLayerTransformInputKernel;
				59	friend class NEWinogradLayerTransformOutputKernel;
				60	friend class NEWinogradLayerTransformWeightsKernel;
				61
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	62	Winograd3x3F32(
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	63	const int n_batches,
				64	const int n_input_channels,
				65	const int n_input_rows,
				66	const int n_input_cols,
				67	const int n_output_channels,
				68	const bool same_padding,
				69	const float *const weights,
				70	float *const weights_storage,
				71	const float *const input,
				72	float *const winograd_input,
				73	float *const output,
				74	float *const winograd_output);
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	75
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	76	~Winograd3x3F32();
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	77
				78	private:
				79	class Private;
				80	std::unique_ptr<Private> _pimpl;
				81	};
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	82
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame^]	83	class INEWinogradLayerTransformKernel : public INEKernel
				84	{
				85	public:
				86	/** Constructor */
				87	INEWinogradLayerTransformKernel();
				88
				89	/** Prevent instances of this class from being copied (As this class contains pointers) */
				90	INEWinogradLayerTransformKernel(const INEWinogradLayerTransformKernel &) = delete;
				91	/** Prevent instances of this class from being copied (As this class contains pointers) */
				92	INEWinogradLayerTransformKernel &operator=(const INEWinogradLayerTransformKernel &) = delete;
				93	/** Allow instances of this class to be moved */
				94	INEWinogradLayerTransformKernel(INEWinogradLayerTransformKernel &&) = default;
				95	/** Allow instances of this class to be moved */
				96	INEWinogradLayerTransformKernel &operator=(INEWinogradLayerTransformKernel &&) = default;
				97
				98	virtual ~INEWinogradLayerTransformKernel() = default;
				99
				100	/** Initialise the kernel
				101	*
				102	* @param[in] convolver A pointer to the winograd convolver, this object must have been configured and is ready to execute 16 GEMMS .
				103	*/
				104	virtual void configure(Winograd3x3F32 *convolver);
				105
				106	protected:
				107	Winograd3x3F32 *_convolver;
				108	};
				109
				110	class NEWinogradLayerTransformInputKernel final : public INEWinogradLayerTransformKernel
				111	{
				112	public:
				113	const char *name() const override
				114	{
				115	return "NEWinogradLayerTransformInputKernel";
				116	}
				117	// Inherited methods overridden:
				118	void configure(Winograd3x3F32 *convolver) override;
				119	void run(const Window &window, const ThreadInfo &info) override;
				120	bool is_parallelisable() const override;
				121	};
				122
				123	class NEWinogradLayerTransformOutputKernel final : public INEKernel
				124	{
				125	public:
				126	const char *name() const override
				127	{
				128	return "NEWinogradLayerTransformOutputKernel";
				129	}
				130	/** Constructor */
				131	NEWinogradLayerTransformOutputKernel();
				132
				133	/** Prevent instances of this class from being copied (As this class contains pointers) */
				134	NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
				135	/** Prevent instances of this class from being copied (As this class contains pointers) */
				136	NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
				137	/** Allow instances of this class to be moved */
				138	NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
				139	/** Allow instances of this class to be moved */
				140	NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
				141
				142	~NEWinogradLayerTransformOutputKernel() = default;
				143
				144	/** Configure the output transform kernel.
				145	*
				146	* @param[in] biases Pointer to the biases tensor.
				147	* @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
				148	* @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
				149	* @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
				150	* @param[in] n_batches Number of batches in the input tensor.
				151	* @param[in] n_rows Number of rows in output tensor.
				152	* @param[in] n_cols Number of columns in output tensor.
				153	* @param[in] n_channels Number of feature maps in the output tensor.
				154	*/
				155	void configure(
				156	const ITensor *biases,
				157	const float *const output_workingspace,
				158	const int matrix_stride,
				159	float *const output,
				160	const int n_batches,
				161	const int n_rows,
				162	const int n_cols,
				163	const int n_channels);
				164
				165	// Inherited methods overridden:
				166	void run(const Window &window, const ThreadInfo &info) override;
				167	bool is_parallelisable() const override;
				168
				169	private:
				170	const ITensor *_biases;
				171	const float *_output_workspace;
				172	int _matrix_stride;
				173	int _matrix_row_stride;
				174	float *_output;
				175	int _n_batches;
				176	int _n_rows;
				177	int _n_cols;
				178	int _n_channels;
				179	};
				180
				181	class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformKernel
				182	{
				183	public:
				184	const char *name() const override
				185	{
				186	return "NEWinogradLayerTransformWeightsKernel";
				187	}
				188	// Inherited methods overridden:
				189	void configure(Winograd3x3F32 *convolver) override;
				190	void run(const Window &window, const ThreadInfo &info) override;
				191	bool is_parallelisable() const override;
				192	};
				193
				194	class NEWinogradLayerKernel final : public INEKernel
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	195	{
				196	public:
Anthony Barbier	e8a4983	2018-01-18 10:04:05 +0000	[diff] [blame]	197	const char *name() const override
				198	{
				199	return "NEWinogradLayerKernel";
				200	}
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	201	/** Constructor */
				202	NEWinogradLayerKernel();
				203
				204	/** Prevent instances of this class from being copied (As this class contains pointers) */
				205	NEWinogradLayerKernel(const NEWinogradLayerKernel &) = delete;
				206	/** Prevent instances of this class from being copied (As this class contains pointers) */
				207	NEWinogradLayerKernel &operator=(const NEWinogradLayerKernel &) = delete;
				208	/** Allow instances of this class to be moved */
				209	NEWinogradLayerKernel(NEWinogradLayerKernel &&) = default;
				210	/** Allow instances of this class to be moved */
				211	NEWinogradLayerKernel &operator=(NEWinogradLayerKernel &&) = default;
				212
Pablo Tello	d6ca478	2018-01-23 09:36:04 +0000	[diff] [blame^]	213	~NEWinogradLayerKernel() = default;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	214
				215	/** Initialise the kernel
				216	*
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	217	* @param[in] convolver A pointer to the winograd convolver, this object must have been configured and is ready to execute 16 GEMMS .
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	218	*/
Pablo Tello	02541fb	2017-12-15 09:48:59 +0000	[diff] [blame]	219	void configure(Winograd3x3F32 *convolver);
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	220
				221	// Inherited methods overridden:
				222	void run(const Window &window, const ThreadInfo &info) override;
				223
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	224	/** Determine how much memory (in units of TIn) to allocate for the
				225	* transformed weights.
				226	*
				227	* @param[in] n_output_channels Number of output feature maps.
				228	* @param[in] n_input_channels Number of input feature maps.
				229	*/
				230	static unsigned int get_weight_storage_size(
				231	const int n_output_channels,
				232	const int n_input_channels);
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	233
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	234	/** Determine how much memory (in units of TIn) to allocate for the
				235	* transformed input.
				236	*
				237	* @param[in] n_batches Number of batches in the input tensor.
				238	* @param[in] n_channels Number of feature maps in the input tensor.
				239	* @param[in] n_rows Number of rows in each feature map.
				240	* @param[in] n_cols Number of columns in each feature map.
				241	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
				242	*/
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	243	static unsigned int get_input_storage_size(
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	244	const int n_batches,
				245	const int n_channels,
				246	const int n_rows,
				247	const int n_cols,
				248	const bool same_padding);
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	249
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	250	/** Determine how much memory (in units of TOut) to allocate for the
				251	* (Winograd domain) output.
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	252	*
				253	* @param[in] n_batches Number of batches in the output tensor.
				254	* @param[in] n_rows Number of rows in each feature map of the input tensor.
				255	* @param[in] n_cols Number of columns in each feature map of the input tensor.
				256	* @param[in] n_output_channels Number of feature maps in the output tensor.
				257	* @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tello	9ceebbe	2018-01-10 16:44:13 +0000	[diff] [blame]	258	*/
				259	static unsigned int get_output_storage_size(
Pablo Tello	6c6e77a	2018-01-23 10:03:27 +0000	[diff] [blame]	260	const int n_batches,
				261	const int n_rows,
				262	const int n_cols,
				263	const int n_output_channels,
				264	const bool same_padding);
Pablo Tello	3d4968a	2017-12-04 15:03:35 +0000	[diff] [blame]	265
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	266	protected:
				267	Winograd3x3F32 *_convolver;
Pablo Tello	8951933	2017-11-17 11:52:36 +0000	[diff] [blame]	268	};
				269
				270	} // namespace arm_compute
				271	#endif /__ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__/