Blame - src/core/NEON/kernels/convolution/depthwise/depthwise_dilated.hpp - ml/ComputeLibrary

blob: 1bae815613bef9069b367c26c16fce905d1a9b14 [file] [log] [blame]

Georgios Pinitas	30271c7	2019-06-24 14:56:34 +0100	[diff] [blame]	1	/*
Michele Di Giorgio	d9eaf61	2020-07-08 11:12:57 +0100	[diff] [blame]	2	* Copyright (c) 2019 Arm Limited.
Georgios Pinitas	30271c7	2019-06-24 14:56:34 +0100	[diff] [blame]	3	*
				4	* SPDX-License-Identifier: MIT
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a copy
				7	* of this software and associated documentation files (the "Software"), to
				8	* deal in the Software without restriction, including without limitation the
				9	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
				10	* sell copies of the Software, and to permit persons to whom the Software is
				11	* furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in all
				14	* copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
				19	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				21	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				22	* SOFTWARE.
				23	*/
				24
				25	#pragma once
				26
				27	#include <deque>
				28	#include <functional>
				29	#include <memory>
				30
				31	#include "depthwise.hpp"
				32
				33	namespace depthwise
				34	{
				35
				36	template <
				37	unsigned int OutputTileRows, unsigned int OutputTileCols,
				38	unsigned int KernelRows, unsigned int KernelCols,
				39	unsigned int StrideRows, unsigned int StrideCols,
				40	typename TIn, typename TBias, typename TOut
				41	>
				42	class DilatedDepthwiseConvolution : public IDepthwiseConvolution
				43	{
				44	public:
				45	/** Create a new dilated depthwise convolution engine.
				46	*/
				47	DilatedDepthwiseConvolution(
				48	int n_batches, int n_input_rows, int n_input_cols, int n_channels,
				49	int dilation_factor,
				50	nck::ActivationFunction activation,
				51	unsigned int padding_top,
				52	unsigned int padding_left,
				53	unsigned int padding_bottom,
				54	unsigned int padding_right
				55	);
				56
				57	/** Create a new dilated depthwise convolution engine.
				58	*/
				59	DilatedDepthwiseConvolution(
				60	int n_batches, int n_input_rows, int n_input_cols, int n_channels,
				61	int dilation_factor, int n_output_rows, int n_output_cols,
				62	nck::ActivationFunction activation,
				63	unsigned int padding_top,
				64	unsigned int padding_left,
				65	unsigned int padding_bottom,
				66	unsigned int padding_right
				67	);
				68
				69	// Cannot copy or move a DilatedDepthwiseConvolution.
				70	DilatedDepthwiseConvolution(DilatedDepthwiseConvolution&) = delete;
				71	DilatedDepthwiseConvolution operator=(DilatedDepthwiseConvolution&) = delete;
				72
				73	/* Set input tensor and stride. */
				74	void set_input(const void *inptr) override;
				75	void set_input(const void *inptr, int column_stride) override;
				76	void set_input(const void *inptr, int row_stride, int column_stride) override;
				77	void set_input(const void *inptr, int batch_stride, int row_stride, int column_stride) override;
				78
				79	/* Set output tensor and stride. */
				80	void set_output(void *outptr) override;
				81	void set_output(void *outptr, int column_stride) override;
				82	void set_output(void *outptr, int row_stride, int column_stride) override;
				83	void set_output(void *outptr, int batch_stride, int row_stride, int column_stride) override;
				84
				85	static int get_output_size(
				86	int dim_size,
				87	unsigned int padding_before,
				88	unsigned int padding_after,
				89	int dilation_factor
				90	);
				91
				92	int output_size(
				93	int dim_size, unsigned int padding_before, unsigned int padding_after
				94	) const override;
				95
				96	/* Weights and biases are re-ordered to improve memory access patterns. Use
				97	* these methods to determine the size of the re-pack buffer and to set the
				98	* address (and implicitly reorder the weights and biases into) the buffer.
				99	*/
				100	size_t get_packed_params_size(void) const override;
				101	void set_packed_params_buffer(void *) override;
				102
				103	void pack_params(const void weights, const void biases=nullptr) const override;
				104	void pack_params(void buffer, const void weights, const void *biases=nullptr) const override;
				105	void pack_params(
				106	void *buffer,
				107	const void* weights,
				108	unsigned int weight_row_stride,
				109	unsigned int weight_col_stride,
				110	const void *biases=nullptr
				111	) const override;
				112
				113	/* Working space is used to pad tensors on the fly. Before running any
				114	* inference check the amount of space required, allocate and provide a
				115	* pointer to the convolution engine.
				116	*/
				117	size_t get_working_space_size(unsigned int nthreads=1) const override;
				118	void set_working_space(void *) override;
				119
				120	unsigned int get_window(void) const override;
				121	void run(unsigned int start, unsigned int stop, unsigned int threadid=0) override;
				122
				123	protected:
				124	/** Protected constructor which also accepts a function to construct a new
				125	* subconvolution
				126	*/
				127	DilatedDepthwiseConvolution(
				128	int n_batches, int n_input_rows, int n_input_cols, int n_channels,
				129	int dilation_factor, int n_output_rows, int n_output_cols,
				130	nck::ActivationFunction activation,
				131	unsigned int padding_top,
				132	unsigned int padding_left,
				133	unsigned int padding_bottom,
				134	unsigned int padding_right,
				135	std::function<IDepthwiseConvolution *(int, int, int, int, int, int, nck::ActivationFunction, unsigned int, unsigned int, unsigned int, unsigned int)> subconvfn
				136	);
				137
				138	const int _dilation_factor;
				139	const int _n_input_rows, _n_input_cols, _n_channels;
				140	const int _padding_top, _padding_left;
				141	const int _n_output_rows, _n_output_cols;
				142
				143	/* Dilated depthwise convolution is performed through repeated calls to
				144	* non-dilated convolutions. If the dilation factor is $n$, then we perform
				145	* $(n + 1)^2$ depthwise convolutions.
				146	*/
				147	using BaseDepthwise = DepthwiseConvolution<
				148	OutputTileRows, OutputTileCols,
				149	KernelRows, KernelCols,
				150	StrideRows, StrideCols,
				151	TIn, TBias, TOut
				152	>;
				153	std::deque<std::deque<std::unique_ptr<IDepthwiseConvolution>>> _convs;
				154	};
				155
				156	} // namespace depthwise