blob: 80b061401553f9ff59651e1402d40e3bcafb62bc [file] [log] [blame]
/*
* Copyright (c) 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#pragma once
namespace depthwise
{
class IDepthwiseConvolution
{
public:
virtual ~IDepthwiseConvolution() = default;
virtual int output_size(const int dim_size, const bool padding_same) const = 0;
virtual unsigned int get_window(void) const = 0;
virtual void run(const unsigned int start, const unsigned int stop) = 0;
};
template <
int OutputTileRows,
int OutputTileCols,
int KernelRows,
int KernelCols,
int StrideRows,
int StrideCols,
typename TIn,
typename TOut
>
class DepthwiseConvolution : public IDepthwiseConvolution
{
public:
typedef TIn InputType;
typedef TOut OutputType;
// Information about the specific convolution instance
static constexpr int output_tile_rows = OutputTileRows;
static constexpr int output_tile_cols = OutputTileCols;
static constexpr int kernel_rows = KernelRows;
static constexpr int kernel_cols = KernelCols;
static constexpr int stride_rows = StrideRows;
static constexpr int stride_cols = StrideCols;
static constexpr int inner_tile_rows = stride_rows * output_tile_rows + kernel_rows - 1;
static constexpr int inner_tile_cols = stride_cols * output_tile_cols + kernel_cols - 1;
/** Create a new depthwise convolution engine.
*
* @param[in] n_batches Number of batches tensors.
* @param[in] n_input_rows Number of rows in input tensor.
* @param[in] n_input_cols Number of columns in input tensor.
* @param[in] n_channels Number of channels in input and output tensors.
* @param[in] padding_same True if padding is SAME, else VALID.
* @param[in] weights Pointer to Height x Width x Channel ordered weights.
* @param[in] input Pointer to NHWC ordered input tensor.
* @param[output] output Pointer to NHWC ordered output tensor.
*/
DepthwiseConvolution(
const int n_batches, const int n_input_rows, const int n_input_cols,
const int n_channels, const bool padding_same,
const TIn* const weights,
const TIn* const input,
TOut* const output
);
// Cannot copy or move a DepthwiseConvolution.
DepthwiseConvolution(DepthwiseConvolution&) = delete;
DepthwiseConvolution operator=(DepthwiseConvolution&) = delete;
/** Get the number of output rows/columns.
*
* @param[in] dim_size Number of elements in the dimension (rows/columns)
* @param[in] same_padding True if the padding is SAME, otherwise false.
*/
static int get_output_size(const int dim_size, const bool padding_same);
/** Get the number of output rows/columns.
*
* @param[in] dim_size Number of elements in the dimension (rows/columns)
* @param[in] same_padding True if the padding is SAME, otherwise false.
*/
int output_size(const int dim_size, const bool padding_same) const override
{
return DepthwiseConvolution<OutputTileRows,
OutputTileCols,
KernelRows,
KernelCols,
StrideRows,
StrideCols,
TIn,
TOut>::get_output_size(dim_size, padding_same);
}
/** Get the window of work to be performed by an instance of the operator.
*/
unsigned int get_window(void) const override;
/** Perform a portion of the work associated with the operator.
*
* Will perform the window of work described by $[start, stop)$.
*
* @param[in] start Start of the window of work to perform.
* @param[in] stop End of the work to perform.
*/
void run(const unsigned int start, const unsigned int stop) override;
protected:
/** Process a tile-row of the tensors.
*/
static void process_tile_row(
const int n_channels,
const TIn* const weights,
const TIn* const inptr,
const int in_row_stride,
const int in_col_stride,
TOut* const outptr,
const int out_row_stride,
const int out_col_stride,
const int row_pad_in_top,
const int row_pad_in_left,
const int row_pad_in_bottom,
const int row_pad_out_bottom,
const int n_tiles,
const int n_input_cols,
const int n_output_cols
);
/** Process a single tile of the tensors.
*
* @param[in] n_channels Number of channels.
* @param[in] weights Pointer to Height x Width x Channels ordered weights.
* @param[in] inptr Pointer to the top-left unpadded value of the tile.
* @param[in] in_row_stride Stride between rows of the input tensor.
* @param[in] in_col_stride Stride between columns of the input tensor.
* @param[out] outptr Pointer to the top-left output value for the tile.
* @param[in] out_row_stride Stride between rows of the output tensor.
* @param[in] out_col_stride Stride between columns of the output tensor.
*/
template <
int in_pad_top, int in_pad_left, int in_pad_bottom, int in_pad_right,
int out_pad_bottom, int out_pad_right
>
static void process_tile(
const int n_channels,
const TIn* const weights,
const TIn* const inptr,
const int in_row_stride,
const int in_col_stride,
TOut* const outptr,
const int out_row_stride,
const int out_col_stride
);
// Type of a pointer to a `process_tile` instance
typedef void (*TileFn)(
const int,
const TIn* const,
const TIn* const, const int, const int,
TOut* const, const int, const int
);
// Determine the maximum padding values which can be applied to tiles of
// the tensors involved in this class of convolution.
static constexpr int max_in_pad_top = 2;
static constexpr int max_in_pad_left = 2;
static constexpr int max_in_pad_bottom = inner_tile_rows - 1;
static constexpr int max_in_pad_right = inner_tile_cols - 1;
static constexpr int max_out_pad_bottom = output_tile_rows;
static constexpr int max_out_pad_right = output_tile_cols;
/** Array of methods to process tensor tiles.
*
* Allows dynamic dispatch to specialized implementations based on
* different padding configurations.
*/
static const TileFn tile_fns[
max_in_pad_top][max_in_pad_left][max_in_pad_bottom][max_in_pad_right][
max_out_pad_bottom][max_out_pad_right
];
private:
// Member variables of instances of a convolution engine.
const TIn* const _weights;
const TIn* const _input;
TOut* const _output;
const int _n_batches, _n_input_rows, _n_input_cols, _n_channels,
_n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols;
const bool _padding_same;
};
} // namespace depthwise