Blame - src/core/NEON/kernels/assembly/depthwise_common.hpp - ml/ComputeLibrary

blob: fea6326897db1ef437f0d15c033cb47e7f36be35 [file] [log] [blame]

Michele Di Giorgio	d02d5ed	2021-01-22 09:47:04 +0000	[diff] [blame]	1	/*
Pablo Marquez Tello	4e2bbbb	2023-01-09 17:21:01 +0000	[diff] [blame]	2	* Copyright (c) 2021-2023 Arm Limited.
Michele Di Giorgio	d02d5ed	2021-01-22 09:47:04 +0000	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24
				25	#pragma once
				26
				27	#include "arm_gemm.hpp"
				28	#include "common.hpp"
Pablo Marquez Tello	4e2bbbb	2023-01-09 17:21:01 +0000	[diff] [blame]	29	#include <cstddef>
				30	#include <tuple>
Michele Di Giorgio	d02d5ed	2021-01-22 09:47:04 +0000	[diff] [blame]	31
				32	namespace arm_conv
				33	{
				34	namespace depthwise
				35	{
				36	using arm_gemm::Nothing;
				37
				38	enum class DepthwiseMethod
				39	{
				40	DEFAULT,
				41	DEPTHFIRST,
				42	PLANAR,
				43	};
				44
				45	struct KernelDescription
				46	{
				47	DepthwiseMethod method = DepthwiseMethod::DEFAULT;
				48	std::string name = "";
				49	bool is_default = false;
				50	uint64_t cycle_estimate = 0;
				51
				52	KernelDescription(
				53	DepthwiseMethod method,
				54	std::string name,
				55	bool is_default,
				56	uint64_t cycle_estimate)
				57	: method(method), name(name), is_default(is_default), cycle_estimate(cycle_estimate)
				58	{
				59	}
				60
				61	KernelDescription() noexcept {};
				62	};
				63
				64	class IDepthwiseCommon
				65	{
				66	public:
				67	virtual ~IDepthwiseCommon() = default;
				68
Pablo Marquez Tello	4e2bbbb	2023-01-09 17:21:01 +0000	[diff] [blame]	69	// Get the name of the depthwise implementation
				70	virtual std::string name() const = 0;
				71
Michele Di Giorgio	d02d5ed	2021-01-22 09:47:04 +0000	[diff] [blame]	72	// Determine the amount of storage space required for the rearranged weights
				73	// and bias.
				74	virtual size_t get_storage_size(void) const = 0;
				75
				76	// Rearrange the weights and biases into a storage buffer.
				77	// Accepts a pointer to a buffer into which to store the packed parameters, a
				78	// pointer the bias vector (which may be nullptr in the case of no bias) and
				79	// a pointer to the array of weights (stored in HWIO order).
				80	virtual void pack_parameters(
				81	void *buffer,
				82	const void *biases,
				83	const void *weights,
				84	size_t ld_weight_col = 0,
				85	size_t ld_weight_row = 0) = 0;
				86
				87	// Determine the amount of working space required
				88	virtual size_t get_working_size(unsigned int n_threads, unsigned int n_input_channels) const = 0;
				89
				90	// Execute the convolution over the specified area of memory.
				91	virtual void execute(
				92	const void *input, // Pointer to input tensor
				93	const void *parameters, // Packed parameters buffer
				94	void *output,
				95	void *working_space,
				96	unsigned int thread_id,
				97	unsigned int n_threads) const = 0;
				98
				99	virtual void execute(
				100	const void *input,
				101	size_t ld_input_col,
				102	size_t ld_input_row,
				103	size_t ld_input_batch,
				104	const void *parameters,
				105	void *output,
				106	size_t ld_output_col,
				107	size_t ld_output_row,
				108	size_t ld_output_batch,
				109	void *working_space,
				110	unsigned int thread_id,
				111	unsigned int n_threads) const = 0;
				112
				113	virtual void execute(
				114	unsigned int batches,
				115	unsigned int input_height,
				116	unsigned int input_width,
				117	unsigned int channels,
				118	const PaddingValues &,
				119	const void *input,
				120	size_t ld_input_col,
				121	size_t ld_input_row,
				122	size_t ld_input_batch,
				123	const void *parameters,
				124	unsigned int output_height,
				125	unsigned int output_width,
				126	void *output,
				127	size_t ld_output_col,
				128	size_t ld_output_row,
				129	size_t ld_output_batch,
				130	void *working_space,
				131	unsigned int thread_id,
				132	unsigned int n_threads) const = 0;
				133	};
				134
Pablo Marquez Tello	4e2bbbb	2023-01-09 17:21:01 +0000	[diff] [blame]	135	// To handle a dilation factor of D execute the kernel once for each d in
				136	// [0..D). Each `d` corresponds to a portion or "view" of the input and output
				137	// tensors. The output view corresponds to every Dth pixel starting from `d`;
				138	// this function computes how many pixels are covered. The input view consists
				139	// of an amount of before padding, every Dth pixel starting from an offset, and
				140	// some after padding. This function computes the start padding, input offset,
				141	// number of valid input pixels, and the after padding.
				142	//
				143	// Returns
				144	// - Number of valid output pixels corresponding to `d`
				145	// - Number of valid input pixels corresponding to `d`
				146	// - Offset of the first pixel corresponding to `d`
				147	// - Amount of padding in the view for `d`
				148	std::tuple<size_t, size_t, size_t, size_t, size_t>
				149	get_reduced_view_for_dilation(
				150	size_t out_size, size_t in_size,
				151	size_t d, size_t dilation_factor,
				152	size_t kernel_size, size_t stride,
				153	size_t pad_before);
				154
Michele Di Giorgio	d02d5ed	2021-01-22 09:47:04 +0000	[diff] [blame]	155	} // namespace depthwise
				156	} // namespace arm_conv