Georgios Pinitas | 4074c99 | 2018-01-30 18:13:46 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2018 ARM Limited. |
| 3 | * |
| 4 | * SPDX-License-Identifier: MIT |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 7 | * of this software and associated documentation files (the "Software"), to |
| 8 | * deal in the Software without restriction, including without limitation the |
| 9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| 10 | * sell copies of the Software, and to permit persons to whom the Software is |
| 11 | * furnished to do so, subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice shall be included in all |
| 14 | * copies or substantial portions of the Software. |
| 15 | * |
| 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 22 | * SOFTWARE. |
| 23 | */ |
| 24 | |
| 25 | #pragma once |
| 26 | |
| 27 | namespace depthwise |
| 28 | { |
| 29 | |
| 30 | class IDepthwiseConvolution |
| 31 | { |
| 32 | public: |
| 33 | virtual ~IDepthwiseConvolution() = default; |
| 34 | virtual int output_size(const int dim_size, const bool padding_same) const = 0; |
| 35 | virtual unsigned int get_window(void) const = 0; |
| 36 | virtual void run(const unsigned int start, const unsigned int stop) = 0; |
| 37 | }; |
| 38 | |
| 39 | template < |
| 40 | int OutputTileRows, |
| 41 | int OutputTileCols, |
| 42 | int KernelRows, |
| 43 | int KernelCols, |
| 44 | int StrideRows, |
| 45 | int StrideCols, |
| 46 | typename TIn, |
| 47 | typename TOut |
| 48 | > |
| 49 | class DepthwiseConvolution : public IDepthwiseConvolution |
| 50 | { |
| 51 | public: |
| 52 | typedef TIn InputType; |
| 53 | typedef TOut OutputType; |
| 54 | |
| 55 | // Information about the specific convolution instance |
| 56 | static constexpr int output_tile_rows = OutputTileRows; |
| 57 | static constexpr int output_tile_cols = OutputTileCols; |
| 58 | static constexpr int kernel_rows = KernelRows; |
| 59 | static constexpr int kernel_cols = KernelCols; |
| 60 | static constexpr int stride_rows = StrideRows; |
| 61 | static constexpr int stride_cols = StrideCols; |
| 62 | static constexpr int inner_tile_rows = stride_rows * output_tile_rows + kernel_rows - 1; |
| 63 | static constexpr int inner_tile_cols = stride_cols * output_tile_cols + kernel_cols - 1; |
| 64 | |
| 65 | /** Create a new depthwise convolution engine. |
| 66 | * |
| 67 | * @param[in] n_batches Number of batches tensors. |
| 68 | * @param[in] n_input_rows Number of rows in input tensor. |
| 69 | * @param[in] n_input_cols Number of columns in input tensor. |
| 70 | * @param[in] n_channels Number of channels in input and output tensors. |
| 71 | * @param[in] padding_same True if padding is SAME, else VALID. |
| 72 | * @param[in] weights Pointer to Height x Width x Channel ordered weights. |
| 73 | * @param[in] input Pointer to NHWC ordered input tensor. |
| 74 | * @param[output] output Pointer to NHWC ordered output tensor. |
| 75 | */ |
| 76 | DepthwiseConvolution( |
| 77 | const int n_batches, const int n_input_rows, const int n_input_cols, |
| 78 | const int n_channels, const bool padding_same, |
| 79 | const TIn* const weights, |
| 80 | const TIn* const input, |
| 81 | TOut* const output |
| 82 | ); |
| 83 | |
| 84 | // Cannot copy or move a DepthwiseConvolution. |
| 85 | DepthwiseConvolution(DepthwiseConvolution&) = delete; |
| 86 | DepthwiseConvolution operator=(DepthwiseConvolution&) = delete; |
| 87 | |
| 88 | /** Get the number of output rows/columns. |
| 89 | * |
| 90 | * @param[in] dim_size Number of elements in the dimension (rows/columns) |
| 91 | * @param[in] same_padding True if the padding is SAME, otherwise false. |
| 92 | */ |
| 93 | static int get_output_size(const int dim_size, const bool padding_same); |
| 94 | |
| 95 | /** Get the number of output rows/columns. |
| 96 | * |
| 97 | * @param[in] dim_size Number of elements in the dimension (rows/columns) |
| 98 | * @param[in] same_padding True if the padding is SAME, otherwise false. |
| 99 | */ |
| 100 | int output_size(const int dim_size, const bool padding_same) const override |
| 101 | { |
| 102 | return DepthwiseConvolution<OutputTileRows, |
| 103 | OutputTileCols, |
| 104 | KernelRows, |
| 105 | KernelCols, |
| 106 | StrideRows, |
| 107 | StrideCols, |
| 108 | TIn, |
| 109 | TOut>::get_output_size(dim_size, padding_same); |
| 110 | } |
| 111 | |
| 112 | /** Get the window of work to be performed by an instance of the operator. |
| 113 | */ |
| 114 | unsigned int get_window(void) const override; |
| 115 | |
| 116 | /** Perform a portion of the work associated with the operator. |
| 117 | * |
| 118 | * Will perform the window of work described by $[start, stop)$. |
| 119 | * |
| 120 | * @param[in] start Start of the window of work to perform. |
| 121 | * @param[in] stop End of the work to perform. |
| 122 | */ |
| 123 | void run(const unsigned int start, const unsigned int stop) override; |
| 124 | |
| 125 | protected: |
| 126 | /** Process a tile-row of the tensors. |
| 127 | */ |
| 128 | static void process_tile_row( |
| 129 | const int n_channels, |
| 130 | const TIn* const weights, |
| 131 | const TIn* const inptr, |
| 132 | const int in_row_stride, |
| 133 | const int in_col_stride, |
| 134 | TOut* const outptr, |
| 135 | const int out_row_stride, |
| 136 | const int out_col_stride, |
| 137 | const int row_pad_in_top, |
| 138 | const int row_pad_in_left, |
| 139 | const int row_pad_in_bottom, |
| 140 | const int row_pad_out_bottom, |
| 141 | const int n_tiles, |
| 142 | const int n_input_cols, |
| 143 | const int n_output_cols |
| 144 | ); |
| 145 | |
| 146 | /** Process a single tile of the tensors. |
| 147 | * |
| 148 | * @param[in] n_channels Number of channels. |
| 149 | * @param[in] weights Pointer to Height x Width x Channels ordered weights. |
| 150 | * @param[in] inptr Pointer to the top-left unpadded value of the tile. |
| 151 | * @param[in] in_row_stride Stride between rows of the input tensor. |
| 152 | * @param[in] in_col_stride Stride between columns of the input tensor. |
| 153 | * @param[out] outptr Pointer to the top-left output value for the tile. |
| 154 | * @param[in] out_row_stride Stride between rows of the output tensor. |
| 155 | * @param[in] out_col_stride Stride between columns of the output tensor. |
| 156 | */ |
| 157 | template < |
| 158 | int in_pad_top, int in_pad_left, int in_pad_bottom, int in_pad_right, |
| 159 | int out_pad_bottom, int out_pad_right |
| 160 | > |
| 161 | static void process_tile( |
| 162 | const int n_channels, |
| 163 | const TIn* const weights, |
| 164 | const TIn* const inptr, |
| 165 | const int in_row_stride, |
| 166 | const int in_col_stride, |
| 167 | TOut* const outptr, |
| 168 | const int out_row_stride, |
| 169 | const int out_col_stride |
| 170 | ); |
| 171 | |
| 172 | // Type of a pointer to a `process_tile` instance |
| 173 | typedef void (*TileFn)( |
| 174 | const int, |
| 175 | const TIn* const, |
| 176 | const TIn* const, const int, const int, |
| 177 | TOut* const, const int, const int |
| 178 | ); |
| 179 | |
| 180 | // Determine the maximum padding values which can be applied to tiles of |
| 181 | // the tensors involved in this class of convolution. |
| 182 | static constexpr int max_in_pad_top = 2; |
| 183 | static constexpr int max_in_pad_left = 2; |
| 184 | static constexpr int max_in_pad_bottom = inner_tile_rows - 1; |
| 185 | static constexpr int max_in_pad_right = inner_tile_cols - 1; |
| 186 | static constexpr int max_out_pad_bottom = output_tile_rows; |
| 187 | static constexpr int max_out_pad_right = output_tile_cols; |
| 188 | |
| 189 | /** Array of methods to process tensor tiles. |
| 190 | * |
| 191 | * Allows dynamic dispatch to specialized implementations based on |
| 192 | * different padding configurations. |
| 193 | */ |
| 194 | static const TileFn tile_fns[ |
| 195 | max_in_pad_top][max_in_pad_left][max_in_pad_bottom][max_in_pad_right][ |
| 196 | max_out_pad_bottom][max_out_pad_right |
| 197 | ]; |
| 198 | |
| 199 | private: |
| 200 | // Member variables of instances of a convolution engine. |
| 201 | const TIn* const _weights; |
| 202 | const TIn* const _input; |
| 203 | TOut* const _output; |
| 204 | const int _n_batches, _n_input_rows, _n_input_cols, _n_channels, |
| 205 | _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols; |
| 206 | const bool _padding_same; |
| 207 | }; |
| 208 | |
| 209 | } // namespace depthwise |