arm_compute/core/NEON/kernels/convolution/depthwise/depthwise.hpp - ml/ComputeLibrary - Gitiles

 /*
  * Copyright (c) 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to
  * deal in the Software without restriction, including without limitation the
  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  * sell copies of the Software, and to permit persons to whom the Software is
  * furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice shall be included in all
  * copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */

 #pragma once

 namespace depthwise
 {

 class IDepthwiseConvolution
 {
 public:
     virtual ~IDepthwiseConvolution() = default;
     virtual int output_size(const int dim_size, const bool padding_same) const = 0;
     virtual unsigned int get_window(void) const = 0;
     virtual void run(const unsigned int start, const unsigned int stop) = 0;
 };

 template <
   int OutputTileRows,
   int OutputTileCols,
   int KernelRows,
   int KernelCols,
   int StrideRows,
   int StrideCols,
   typename TIn,
   typename TOut
 >
 class DepthwiseConvolution : public IDepthwiseConvolution
 {
   public:
     typedef TIn InputType;
     typedef TOut OutputType;

     // Information about the specific convolution instance
     static constexpr int output_tile_rows = OutputTileRows;
     static constexpr int output_tile_cols = OutputTileCols;
     static constexpr int kernel_rows = KernelRows;
     static constexpr int kernel_cols = KernelCols;
     static constexpr int stride_rows = StrideRows;
     static constexpr int stride_cols = StrideCols;
     static constexpr int inner_tile_rows = stride_rows * output_tile_rows + kernel_rows - 1;
     static constexpr int inner_tile_cols = stride_cols * output_tile_cols + kernel_cols - 1;

     /** Create a new depthwise convolution engine.
      *
      * @param[in] n_batches Number of batches tensors.
      * @param[in] n_input_rows Number of rows in input tensor.
      * @param[in] n_input_cols Number of columns in input tensor.
      * @param[in] n_channels Number of channels in input and output tensors.
      * @param[in] padding_same True if padding is SAME, else VALID.
      * @param[in] weights Pointer to Height x Width x Channel ordered weights.
      * @param[in] input Pointer to NHWC ordered input tensor.
      * @param[output] output Pointer to NHWC ordered output tensor.
      */
     DepthwiseConvolution(
       const int n_batches, const int n_input_rows, const int n_input_cols,
       const int n_channels, const bool padding_same,
       const TIn* const weights,
       const TIn* const input,
       TOut* const output
     );

     // Cannot copy or move a DepthwiseConvolution.
     DepthwiseConvolution(DepthwiseConvolution&) = delete;
     DepthwiseConvolution operator=(DepthwiseConvolution&) = delete;

     /** Get the number of output rows/columns.
      *
      * @param[in] dim_size Number of elements in the dimension (rows/columns)
      * @param[in] same_padding True if the padding is SAME, otherwise false.
      */
     static int get_output_size(const int dim_size, const bool padding_same);

     /** Get the number of output rows/columns.
      *
      * @param[in] dim_size Number of elements in the dimension (rows/columns)
      * @param[in] same_padding True if the padding is SAME, otherwise false.
      */
     int output_size(const int dim_size, const bool padding_same) const override
     {
         return DepthwiseConvolution<OutputTileRows,
                                     OutputTileCols,
                                     KernelRows,
                                     KernelCols,
                                     StrideRows,
                                     StrideCols,
                                     TIn,
                                     TOut>::get_output_size(dim_size, padding_same);
     }

     /** Get the window of work to be performed by an instance of the operator.
      */
     unsigned int get_window(void) const override;

     /** Perform a portion of the work associated with the operator.
      *
      * Will perform the window of work described by $[start, stop)$.
      *
      * @param[in] start Start of the window of work to perform.
      * @param[in] stop End of the work to perform.
      */
     void run(const unsigned int start, const unsigned int stop) override;

   protected:
     /** Process a tile-row of the tensors.
      */
     static void process_tile_row(
       const int n_channels,
       const TIn* const weights,
       const TIn* const inptr,
       const int in_row_stride,
       const int in_col_stride,
       TOut* const outptr,
       const int out_row_stride,
       const int out_col_stride,
       const int row_pad_in_top,
       const int row_pad_in_left,
       const int row_pad_in_bottom,
       const int row_pad_out_bottom,
       const int n_tiles,
       const int n_input_cols,
       const int n_output_cols
     );

     /** Process a single tile of the tensors.
      *
      * @param[in] n_channels Number of channels.
      * @param[in] weights Pointer to Height x Width x Channels ordered weights.
      * @param[in] inptr Pointer to the top-left unpadded value of the tile.
      * @param[in] in_row_stride Stride between rows of the input tensor.
      * @param[in] in_col_stride Stride between columns of the input tensor.
      * @param[out] outptr Pointer to the top-left output value for the tile.
      * @param[in] out_row_stride Stride between rows of the output tensor.
      * @param[in] out_col_stride Stride between columns of the output tensor.
      */
     template <
       int in_pad_top, int in_pad_left, int in_pad_bottom, int in_pad_right,
       int out_pad_bottom, int out_pad_right
     >
     static void process_tile(
       const int n_channels,
       const TIn* const weights,
       const TIn* const inptr,
       const int in_row_stride,
       const int in_col_stride,
       TOut* const outptr,
       const int out_row_stride,
       const int out_col_stride
     );

     // Type of a pointer to a `process_tile` instance
     typedef void (*TileFn)(
       const int,
       const TIn* const,
       const TIn* const, const int, const int,
       TOut* const, const int, const int
     );

     // Determine the maximum padding values which can be applied to tiles of
     // the tensors involved in this class of convolution.
     static constexpr int max_in_pad_top = 2;
     static constexpr int max_in_pad_left = 2;
     static constexpr int max_in_pad_bottom = inner_tile_rows - 1;
     static constexpr int max_in_pad_right = inner_tile_cols - 1;
     static constexpr int max_out_pad_bottom = output_tile_rows;
     static constexpr int max_out_pad_right = output_tile_cols;

     /** Array of methods to process tensor tiles.
      *
      * Allows dynamic dispatch to specialized implementations based on
      * different padding configurations.
      */
     static const TileFn tile_fns[
       max_in_pad_top][max_in_pad_left][max_in_pad_bottom][max_in_pad_right][
       max_out_pad_bottom][max_out_pad_right
     ];

   private:
     // Member variables of instances of a convolution engine.
     const TIn* const _weights;
     const TIn* const _input;
     TOut* const _output;
     const int _n_batches, _n_input_rows, _n_input_cols, _n_channels,
               _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols;
     const bool _padding_same;
 };

 }  // namespace depthwise
	/*
	* Copyright (c) 2018 ARM Limited.
	*
	* SPDX-License-Identifier: MIT
	*
	* Permission is hereby granted, free of charge, to any person obtaining a copy
	* of this software and associated documentation files (the "Software"), to
	* deal in the Software without restriction, including without limitation the
	* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
	* sell copies of the Software, and to permit persons to whom the Software is
	* furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice shall be included in all
	* copies or substantial portions of the Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	* SOFTWARE.
	*/

	#pragma once

	namespace depthwise
	{

	class IDepthwiseConvolution
	{
	public:
	virtual ~IDepthwiseConvolution() = default;
	virtual int output_size(const int dim_size, const bool padding_same) const = 0;
	virtual unsigned int get_window(void) const = 0;
	virtual void run(const unsigned int start, const unsigned int stop) = 0;
	};

	template <
	int OutputTileRows,
	int OutputTileCols,
	int KernelRows,
	int KernelCols,
	int StrideRows,
	int StrideCols,
	typename TIn,
	typename TOut
	>
	class DepthwiseConvolution : public IDepthwiseConvolution
	{
	public:
	typedef TIn InputType;
	typedef TOut OutputType;

	// Information about the specific convolution instance
	static constexpr int output_tile_rows = OutputTileRows;
	static constexpr int output_tile_cols = OutputTileCols;
	static constexpr int kernel_rows = KernelRows;
	static constexpr int kernel_cols = KernelCols;
	static constexpr int stride_rows = StrideRows;
	static constexpr int stride_cols = StrideCols;
	static constexpr int inner_tile_rows = stride_rows * output_tile_rows + kernel_rows - 1;
	static constexpr int inner_tile_cols = stride_cols * output_tile_cols + kernel_cols - 1;

	/** Create a new depthwise convolution engine.
	*
	* @param[in] n_batches Number of batches tensors.
	* @param[in] n_input_rows Number of rows in input tensor.
	* @param[in] n_input_cols Number of columns in input tensor.
	* @param[in] n_channels Number of channels in input and output tensors.
	* @param[in] padding_same True if padding is SAME, else VALID.
	* @param[in] weights Pointer to Height x Width x Channel ordered weights.
	* @param[in] input Pointer to NHWC ordered input tensor.
	* @param[output] output Pointer to NHWC ordered output tensor.
	*/
	DepthwiseConvolution(
	const int n_batches, const int n_input_rows, const int n_input_cols,
	const int n_channels, const bool padding_same,
	const TIn* const weights,
	const TIn* const input,
	TOut* const output
	);

	// Cannot copy or move a DepthwiseConvolution.
	DepthwiseConvolution(DepthwiseConvolution&) = delete;
	DepthwiseConvolution operator=(DepthwiseConvolution&) = delete;

	/** Get the number of output rows/columns.
	*
	* @param[in] dim_size Number of elements in the dimension (rows/columns)
	* @param[in] same_padding True if the padding is SAME, otherwise false.
	*/
	static int get_output_size(const int dim_size, const bool padding_same);

	/** Get the number of output rows/columns.
	*
	* @param[in] dim_size Number of elements in the dimension (rows/columns)
	* @param[in] same_padding True if the padding is SAME, otherwise false.
	*/
	int output_size(const int dim_size, const bool padding_same) const override
	{
	return DepthwiseConvolution<OutputTileRows,
	OutputTileCols,
	KernelRows,
	KernelCols,
	StrideRows,
	StrideCols,
	TIn,
	TOut>::get_output_size(dim_size, padding_same);
	}

	/** Get the window of work to be performed by an instance of the operator.
	*/
	unsigned int get_window(void) const override;

	/** Perform a portion of the work associated with the operator.
	*
	* Will perform the window of work described by $[start, stop)$.
	*
	* @param[in] start Start of the window of work to perform.
	* @param[in] stop End of the work to perform.
	*/
	void run(const unsigned int start, const unsigned int stop) override;

	protected:
	/** Process a tile-row of the tensors.
	*/
	static void process_tile_row(
	const int n_channels,
	const TIn* const weights,
	const TIn* const inptr,
	const int in_row_stride,
	const int in_col_stride,
	TOut* const outptr,
	const int out_row_stride,
	const int out_col_stride,
	const int row_pad_in_top,
	const int row_pad_in_left,
	const int row_pad_in_bottom,
	const int row_pad_out_bottom,
	const int n_tiles,
	const int n_input_cols,
	const int n_output_cols
	);

	/** Process a single tile of the tensors.
	*
	* @param[in] n_channels Number of channels.
	* @param[in] weights Pointer to Height x Width x Channels ordered weights.
	* @param[in] inptr Pointer to the top-left unpadded value of the tile.
	* @param[in] in_row_stride Stride between rows of the input tensor.
	* @param[in] in_col_stride Stride between columns of the input tensor.
	* @param[out] outptr Pointer to the top-left output value for the tile.
	* @param[in] out_row_stride Stride between rows of the output tensor.
	* @param[in] out_col_stride Stride between columns of the output tensor.
	*/
	template <
	int in_pad_top, int in_pad_left, int in_pad_bottom, int in_pad_right,
	int out_pad_bottom, int out_pad_right
	>
	static void process_tile(
	const int n_channels,
	const TIn* const weights,
	const TIn* const inptr,
	const int in_row_stride,
	const int in_col_stride,
	TOut* const outptr,
	const int out_row_stride,
	const int out_col_stride
	);

	// Type of a pointer to a `process_tile` instance
	typedef void (*TileFn)(
	const int,
	const TIn* const,
	const TIn* const, const int, const int,
	TOut* const, const int, const int
	);

	// Determine the maximum padding values which can be applied to tiles of
	// the tensors involved in this class of convolution.
	static constexpr int max_in_pad_top = 2;
	static constexpr int max_in_pad_left = 2;
	static constexpr int max_in_pad_bottom = inner_tile_rows - 1;
	static constexpr int max_in_pad_right = inner_tile_cols - 1;
	static constexpr int max_out_pad_bottom = output_tile_rows;
	static constexpr int max_out_pad_right = output_tile_cols;

	/** Array of methods to process tensor tiles.
	*
	* Allows dynamic dispatch to specialized implementations based on
	* different padding configurations.
	*/
	static const TileFn tile_fns[
	max_in_pad_top][max_in_pad_left][max_in_pad_bottom][max_in_pad_right][
	max_out_pad_bottom][max_out_pad_right
	];

	private:
	// Member variables of instances of a convolution engine.
	const TIn* const _weights;
	const TIn* const _input;
	TOut* const _output;
	const int _n_batches, _n_input_rows, _n_input_cols, _n_channels,
	_n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols;
	const bool _padding_same;
	};

	} // namespace depthwise