Blame - src/core/NEON/kernels/convolution/winograd/winograd_layer.hpp - ml/ComputeLibrary

blob: 3cfb6e6646f9d06c75ee1addec82d5f1bfc2f2c7 [file] [log] [blame]

Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	1	/*
Michele Di Giorgio	d9eaf61	2020-07-08 11:12:57 +0100	[diff] [blame^]	2	* Copyright (c) 2017-2019 Arm Limited.
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24
				25	#pragma once
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	26	#include "arm_gemm_local.hpp"
				27	#include "arm_gemm.hpp"
				28	#include "winograd.hpp"
				29
				30	namespace winograd
				31	{
				32
				33
				34	class IWinogradConvolutionLayer
				35	{
				36	public:
				37	virtual ~IWinogradConvolutionLayer() = default;
				38
				39	virtual unsigned int weight_transform_get_window(void) const = 0;
				40	virtual void weight_transform_run(unsigned int start, unsigned int stop) = 0;
				41
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	42	virtual IInputTransform& input_transform(void) = 0; // Expose the input transform
				43	virtual IOutputTransform& output_transform(void) = 0; // Expose the output transform
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	44	virtual arm_gemm::IGemmCommon *gemm(void) = 0; // Expose the underlying GEMM
				45	};
				46
				47	/** Example of how to construct an ACL-like interface.
				48	*
				49	* Use `get_weight_storage_size`, `get_input_storage_size` and
				50	* `get_output_storage_size` to allocate memory for the convolution engine.
				51	* Then create a `WinogradConvolutionLayer`.
				52	*
				53	* Initialise the weights using `weights_transform.run(...)`.
				54	*
				55	* For each inference:
				56	* 1. Transform the inputs to the Winograd domain using `input_transform.run(...)`
				57	* 2. Perform a number of GEMMs using `gemms.run(...)`
				58	* 3. Transform the output to the spatial domain using `output_transform.run(...)`
				59	*/
				60	template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols,
				61	typename TIn, typename TInGEMM, typename TOutGEMM, typename TOut,
				62	WinogradRoots Roots>
				63	class WinogradConvolutionLayer : public IWinogradConvolutionLayer
				64	{
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	65	public:
				66	using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, Roots>;
				67	using WeightsTransform = typename WinogradBase::template WeightsTransform<TIn, TInGEMM>;
				68	using InputTransform = typename WinogradBase::template InputTransform<TIn, TInGEMM>;
				69	using WinogradConv = typename WinogradBase::template Convolution<TOut, TIn, TInGEMM, TOutGEMM>;
				70	using OutputTransform = typename WinogradBase::template OutputTransform<TOutGEMM, TOut>;
				71
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	72	private:
				73	static constexpr int InnerTileRows = OutputTileRows + KernelRows - 1;
				74	static constexpr int InnerTileCols = OutputTileCols + KernelCols - 1;
				75	static constexpr int N_GEMMS = InnerTileRows * InnerTileCols;
				76
				77	const int _n_output_rows, _n_output_cols;
				78	const int _kernel_matrix_stride, _kernel_matrix_row_stride;
				79	const int _input_matrix_stride, _input_matrix_row_stride;
				80	const int _output_matrix_stride, _output_matrix_row_stride;
				81	const int _tile_rows, _tile_cols;
				82	const int _m, _k, _n;
				83
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	84	WeightsTransform weights_transform; /** Operator to transform weights to Winograd domain. */
				85	InputTransform _input_transform; /** Operator to transform input to Winograd domain. */
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	86	const arm_gemm::GemmArgs gemm_args;
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	87	arm_gemm::UniqueGemmCommon<TInGEMM, TOutGEMM> gemms; /** Operator to perform multiple GEMMs. */
				88	OutputTransform _output_transform; /** Operator to transform output from Winograd domain. */
				89
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	90	public:
				91
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	92	/** Determine how much memory (in units of TIn) to allocate for the
				93	* transformed weights.
				94	*/
				95	static unsigned int get_weight_storage_size(
				96	const int n_output_channels, /** Number of output feature maps. */
				97	const int n_input_channels /** Number of input feature maps. */
				98	);
				99
				100	static unsigned int get_weight_stride(
				101	const int n_output_channels, /** Number of output feature maps. */
				102	const int n_input_channels /** Number of input feature maps. */
				103	);
				104
				105	static unsigned int get_weight_multi_stride(
				106	const int n_output_channels, /** Number of output feature maps. */
				107	const int n_input_channels /** Number of input feature maps. */
				108	);
				109
				110	/** Determine how much memory (in units of TIn) to allocate for the
				111	* transformed input.
				112	*/
				113	static unsigned int get_input_storage_size(
				114	const int n_batches, /** Number of batches in the input tensor. */
				115	const int n_channels, /** Number of feature maps in the input tensor. */
				116	const int n_rows, /** Number of rows in each feature map. */
				117	const int n_cols, /** Number of columns in each feature map. */
				118	const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
				119	);
				120
				121	/** Get the row stride for the A matrix in the Winograd domain. */
				122	static unsigned int get_input_stride(
				123	const int n_batches, /** Number of batches in the input tensor. */
				124	const int n_channels, /** Number of feature maps in the input tensor. */
				125	const int n_rows, /** Number of rows in each feature map. */
				126	const int n_cols, /** Number of columns in each feature map. */
				127	const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
				128	);
				129
				130	/** Get the stride between A matrices in the Winograd domain. */
				131	static unsigned int get_input_multi_stride(
				132	const int n_batches, /** Number of batches in the input tensor. */
				133	const int n_channels, /** Number of feature maps in the input tensor. */
				134	const int n_rows, /** Number of rows in each feature map. */
				135	const int n_cols, /** Number of columns in each feature map. */
				136	const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
				137	);
				138
				139	/** Determine how much memory (in units of TOut) to allocate for the
				140	* (Winograd domain) output.
				141	*/
				142	static unsigned int get_output_storage_size(
				143	const int n_batches, /** Number of batches in the output tensor. */
				144	const int n_rows, /** Number of rows in each feature map of the input tensor. */
				145	const int n_cols, /** Number of columns in each feature map of the input tensor. */
				146	const int n_output_channels, /** Number of feature maps in the output tensor. */
				147	const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
				148	);
				149
				150	static unsigned int get_output_stride(
				151	const int n_batches, /** Number of batches in the output tensor. */
				152	const int n_rows, /** Number of rows in each feature map of the input tensor. */
				153	const int n_cols, /** Number of columns in each feature map of the input tensor. */
				154	const int n_output_channels, /** Number of feature maps in the output tensor. */
				155	const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
				156	);
				157
				158	static unsigned int get_output_multi_stride(
				159	const int n_batches, /** Number of batches in the output tensor. */
				160	const int n_rows, /** Number of rows in each feature map of the input tensor. */
				161	const int n_cols, /** Number of columns in each feature map of the input tensor. */
				162	const int n_output_channels, /** Number of feature maps in the output tensor. */
				163	const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
				164	);
				165
				166	/** Get the shape (rows, cols) of a feature map of the output tensor. */
				167	static std::pair<int, int> get_output_feature_map_shape(
				168	const int n_input_rows, /** Number of rows in the input feature map. */
				169	const int n_input_cols, /** Number of columns in the input feature map. */
				170	const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
				171	);
				172
				173	/** Create a new Winograd convolution layer.
				174	*/
				175	WinogradConvolutionLayer(
				176	const arm_gemm::CPUInfo &cpuinfo, /** Describes CPU properties. */
				177	const int n_threads, /** Maximum number of threads used to execute the convolution. */
				178	const int n_batches, /** Number of batches in the input and output tensors. */
				179	const int n_input_channels, /** Number of feature maps in a batch of the input tensor. */
				180	const int n_input_rows, /** Number of rows in a feature map of the input tensor. */
				181	const int n_input_cols, /** Number of columns in a feature map of the input tensor. */
				182	const int n_output_channels, /** Number of feature maps in the output tensor. */
				183	const bool same_padding, /** Use "SAME" padding, otherwise use "VALID". */
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	184	const arm_gemm::Activation &activation,
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	185	const TIn* const weights, /** Pointer to weight tensor in spatial domain. Must be ordered as "Height x Rows x Input Feature Maps x Output Feature Maps. */
				186	TInGEMM* const weights_storage, /** Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size`. */
				187	const TIn* const input, /** Pointer to NHWC ordered input tensor, in the spatial domain. */
				188	TInGEMM* const winograd_input, /** Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`. */
				189	const TOut* const biases, /** Pointer to biases vector. Pass nullptr if no bias is provided. */
				190	TOut* const output, /** Pointer to NHWC ordered output tensor, in the spatial domain. */
				191	TOutGEMM* const winograd_output, /** Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`. */
				192	const bool pretranspose_B=true, /** Hint that the B matrix can be pretransposed. */
				193	arm_gemm::GemmConfig gemm_cfg=nullptr /* Pointer to GEMM configuration. */
				194	);
				195
				196	/* Utility methods for interacting with the layer. */
				197	unsigned int weight_transform_get_window(void) const;
				198	void weight_transform_run(const unsigned int start, const unsigned int stop);
				199
Pablo Tello	5264b7d	2019-10-21 14:25:41 +0100	[diff] [blame]	200	IInputTransform& input_transform(void);
				201	IOutputTransform& output_transform(void);
Pablo Tello	8f43d74	2019-03-27 09:28:32 +0000	[diff] [blame]	202
				203	/* Get a pointer to the GEMM underlying the Winograd transform. */
				204	arm_gemm::IGemmCommon *gemm(void);
				205	};
				206
				207	}