Blame - src/core/NEON/kernels/assembly/winograd.hpp - ml/ComputeLibrary

blob: 836402e83d62bd949ffdac2efdd2799c221096f4 [file] [log] [blame]

ramelg01	a1f7851	2022-06-29 16:28:10 +0100	[diff] [blame^]	1	/*
				2	* Copyright (c) 2022 Arm Limited.
				3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24
				25	#pragma once
				26
				27	#include "src/cpu/kernels/assembly/arm_gemm.hpp"
				28	#include <cstddef>
				29
				30	namespace arm_conv
				31	{
				32	struct Shape2D
				33	{
				34	unsigned int rows, cols;
				35	};
				36
				37	struct ConvolutionArgs
				38	{
				39	unsigned int n_batches;
				40	Shape2D input_shape;
				41	unsigned int n_input_channels;
				42	unsigned int pad_top, pad_left;
				43	Shape2D output_shape;
				44	unsigned int n_output_channels;
				45	Shape2D kernel_shape;
				46	arm_gemm::Activation activation;
				47
				48	ConvolutionArgs(
				49	unsigned int n_batches,
				50	const Shape2D &input_shape,
				51	unsigned int n_input_channels,
				52	unsigned int pad_top, unsigned int pad_left,
				53	const Shape2D &output_shape,
				54	unsigned int n_output_channels,
				55	const Shape2D kernel_shape,
				56	const arm_gemm::Activation &activation = {})
				57	: n_batches(n_batches), input_shape(input_shape), n_input_channels(n_input_channels), pad_top(pad_top), pad_left(pad_left), output_shape(output_shape), n_output_channels(n_output_channels),
				58	kernel_shape(kernel_shape), activation(activation)
				59	{
				60	}
				61	};
				62
				63	namespace winograd
				64	{
				65	/* Constrain the selected Winograd implementation.
				66	*/
				67	struct WinogradConfig
				68	{
				69	unsigned int output_rows = 0, output_cols = 0;
				70	std::string input_transform_filter = "";
				71	std::string output_transform_filter = "";
				72	std::string weight_transform_filter = "";
				73	};
				74
				75	/* Struct describing (suggested) memory layout within the Winograd domain.
				76	*/
				77	struct WinogradDomainSpec
				78	{
				79	size_t weight_matrix_size_bytes, input_matrix_size_bytes, output_matrix_size_bytes;
				80
				81	size_t weight_ld_matrix, weight_ld_row;
				82	size_t input_ld_batch, input_ld_matrix, input_ld_row;
				83	size_t output_ld_batch, output_ld_matrix, output_ld_row;
				84	};
				85
				86	class ITransformCommon
				87	{
				88	public:
				89	virtual ~ITransformCommon() = default;
				90
				91	// Get the name of the transform
				92	virtual const std::string &get_name(void) const = 0;
				93	};
				94
				95	namespace weight_transform
				96	{
				97	class ITransform : public ITransformCommon
				98	{
				99	public:
				100	~ITransform() = default;
				101
				102	virtual unsigned int get_kernel_rows(void) const = 0;
				103	virtual unsigned int get_kernel_cols(void) const = 0;
				104
				105	virtual unsigned int get_transformed_tile_rows(void) const = 0;
				106	virtual unsigned int get_transformed_tile_cols(void) const = 0;
				107
				108	void execute(
				109	const ConvolutionArgs &args,
				110	const void *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_input_channel,
				111	void *outptr, const WinogradDomainSpec &wds,
				112	unsigned int thread_id, unsigned int n_threads) const
				113	{
				114	this->execute(
				115	args, inptr, ld_in_row, ld_in_col, ld_input_channel,
				116	outptr, wds.weight_ld_matrix, wds.weight_ld_row,
				117	thread_id, n_threads);
				118	}
				119
				120	virtual void execute(
				121	const ConvolutionArgs &args,
				122	const void *inptr, size_t ld_in_row, size_t ld_in_col, size_t ld_input_channel,
				123	void *outptr, size_t ld_out_matrix, size_t ld_out_row,
				124	unsigned int thread_id, unsigned int n_threads) const = 0;
				125	};
				126
				127	} // namespace weight_transform
				128
				129	namespace input_transform
				130	{
				131	class ITransform : public ITransformCommon
				132	{
				133	public:
				134	~ITransform() = default;
				135
				136	virtual unsigned int get_input_rows(void) const = 0;
				137	virtual unsigned int get_input_cols(void) const = 0;
				138
				139	virtual size_t get_working_space_size(
				140	const ConvolutionArgs &args,
				141	unsigned int n_threads) const = 0;
				142
				143	void execute(
				144	const ConvolutionArgs &args,
				145	const void *inptr, size_t ld_in_batch, size_t ld_in_row, size_t ld_in_col,
				146	void *outptr, const WinogradDomainSpec &wds,
				147	void *working_space, unsigned int thread_id, unsigned int n_threads) const
				148	{
				149	this->execute(
				150	args, inptr, ld_in_batch, ld_in_row, ld_in_col,
				151	outptr, wds.input_ld_batch, wds.input_ld_matrix, wds.input_ld_row,
				152	working_space, thread_id, n_threads);
				153	}
				154
				155	virtual void execute(
				156	const ConvolutionArgs &args,
				157	const void *inptr, size_t ld_in_batch, size_t ld_in_row, size_t ld_in_col,
				158	void *outptr, size_t ld_out_batch, size_t ld_out_matrix, size_t ld_out_row,
				159	void *working_space, unsigned int thread_id, unsigned int n_threads) const = 0;
				160	};
				161
				162	} // namespace input_transform
				163
				164	namespace output_transform
				165	{
				166	class ITransform : public ITransformCommon
				167	{
				168	public:
				169	~ITransform() = default;
				170
				171	virtual unsigned int get_input_rows(void) const = 0;
				172	virtual unsigned int get_input_cols(void) const = 0;
				173
				174	virtual unsigned int get_output_rows(void) const = 0;
				175	virtual unsigned int get_output_cols(void) const = 0;
				176
				177	virtual unsigned int get_kernel_rows(void) const = 0;
				178	virtual unsigned int get_kernel_cols(void) const = 0;
				179
				180	virtual size_t get_working_space_size(
				181	const ConvolutionArgs &args,
				182	unsigned int n_threads) const = 0;
				183
				184	void execute(
				185	const ConvolutionArgs &args,
				186	const void *inptr, const WinogradDomainSpec &wds,
				187	const void *bias,
				188	void *outptr, size_t ld_out_batch, size_t ld_out_row, size_t ld_out_col,
				189	void *working_space, unsigned int thread_id, unsigned int n_threads) const
				190	{
				191	this->execute(
				192	args,
				193	inptr, wds.output_ld_batch, wds.output_ld_matrix, wds.output_ld_row,
				194	bias,
				195	outptr, ld_out_batch, ld_out_row, ld_out_col,
				196	working_space, thread_id, n_threads);
				197	}
				198
				199	virtual void execute(
				200	const ConvolutionArgs &args,
				201	const void *inptr, size_t ld_in_batch, size_t ld_in_matrix, size_t ld_in_row,
				202	const void *bias,
				203	void *outptr, size_t ld_out_batch, size_t ld_out_row, size_t ld_out_col,
				204	void *working_space, unsigned int thread_id, unsigned int n_threads) const = 0;
				205	};
				206
				207	} // namespace output_transform
				208
				209	struct WinogradImpl
				210	{
				211	const output_transform::ITransform *output_transform = nullptr;
				212	const weight_transform::ITransform *weight_transform = nullptr;
				213	const input_transform::ITransform *input_transform = nullptr;
				214	std::unique_ptr<arm_gemm::GemmArgs> gemm_args;
				215	WinogradDomainSpec winograd_spec;
				216	};
				217
				218	/* Get pointers to Winograd transforms for the given convolution problem.
				219	*
				220	* Assigns to the pointers in the `dest` struct and returns true or false to
				221	* indicate whether the given problem can be executed or not.
				222	*/
				223	template <typename TIn, typename TWeight = TIn, typename TOut = TIn, typename TWinogradIn = TIn, typename TWinogradOut = TOut>
				224	bool get_implementation(
				225	WinogradImpl &dest, // Destination for the selected implementation
				226	const CPUInfo *,
				227	const ConvolutionArgs &,
				228	int max_threads,
				229	bool fast_mode,
				230	const WinogradConfig *,
				231	const arm_gemm::GemmConfig *);
				232
				233	} // namespace winograd
				234	} // namespace arm_conv