blob: 80b061401553f9ff59651e1402d40e3bcafb62bc [file] [log] [blame]
Georgios Pinitas4074c992018-01-30 18:13:46 +00001/*
2 * Copyright (c) 2018 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#pragma once
26
27namespace depthwise
28{
29
30class IDepthwiseConvolution
31{
32public:
33 virtual ~IDepthwiseConvolution() = default;
34 virtual int output_size(const int dim_size, const bool padding_same) const = 0;
35 virtual unsigned int get_window(void) const = 0;
36 virtual void run(const unsigned int start, const unsigned int stop) = 0;
37};
38
39template <
40 int OutputTileRows,
41 int OutputTileCols,
42 int KernelRows,
43 int KernelCols,
44 int StrideRows,
45 int StrideCols,
46 typename TIn,
47 typename TOut
48>
49class DepthwiseConvolution : public IDepthwiseConvolution
50{
51 public:
52 typedef TIn InputType;
53 typedef TOut OutputType;
54
55 // Information about the specific convolution instance
56 static constexpr int output_tile_rows = OutputTileRows;
57 static constexpr int output_tile_cols = OutputTileCols;
58 static constexpr int kernel_rows = KernelRows;
59 static constexpr int kernel_cols = KernelCols;
60 static constexpr int stride_rows = StrideRows;
61 static constexpr int stride_cols = StrideCols;
62 static constexpr int inner_tile_rows = stride_rows * output_tile_rows + kernel_rows - 1;
63 static constexpr int inner_tile_cols = stride_cols * output_tile_cols + kernel_cols - 1;
64
65 /** Create a new depthwise convolution engine.
66 *
67 * @param[in] n_batches Number of batches tensors.
68 * @param[in] n_input_rows Number of rows in input tensor.
69 * @param[in] n_input_cols Number of columns in input tensor.
70 * @param[in] n_channels Number of channels in input and output tensors.
71 * @param[in] padding_same True if padding is SAME, else VALID.
72 * @param[in] weights Pointer to Height x Width x Channel ordered weights.
73 * @param[in] input Pointer to NHWC ordered input tensor.
74 * @param[output] output Pointer to NHWC ordered output tensor.
75 */
76 DepthwiseConvolution(
77 const int n_batches, const int n_input_rows, const int n_input_cols,
78 const int n_channels, const bool padding_same,
79 const TIn* const weights,
80 const TIn* const input,
81 TOut* const output
82 );
83
84 // Cannot copy or move a DepthwiseConvolution.
85 DepthwiseConvolution(DepthwiseConvolution&) = delete;
86 DepthwiseConvolution operator=(DepthwiseConvolution&) = delete;
87
88 /** Get the number of output rows/columns.
89 *
90 * @param[in] dim_size Number of elements in the dimension (rows/columns)
91 * @param[in] same_padding True if the padding is SAME, otherwise false.
92 */
93 static int get_output_size(const int dim_size, const bool padding_same);
94
95 /** Get the number of output rows/columns.
96 *
97 * @param[in] dim_size Number of elements in the dimension (rows/columns)
98 * @param[in] same_padding True if the padding is SAME, otherwise false.
99 */
100 int output_size(const int dim_size, const bool padding_same) const override
101 {
102 return DepthwiseConvolution<OutputTileRows,
103 OutputTileCols,
104 KernelRows,
105 KernelCols,
106 StrideRows,
107 StrideCols,
108 TIn,
109 TOut>::get_output_size(dim_size, padding_same);
110 }
111
112 /** Get the window of work to be performed by an instance of the operator.
113 */
114 unsigned int get_window(void) const override;
115
116 /** Perform a portion of the work associated with the operator.
117 *
118 * Will perform the window of work described by $[start, stop)$.
119 *
120 * @param[in] start Start of the window of work to perform.
121 * @param[in] stop End of the work to perform.
122 */
123 void run(const unsigned int start, const unsigned int stop) override;
124
125 protected:
126 /** Process a tile-row of the tensors.
127 */
128 static void process_tile_row(
129 const int n_channels,
130 const TIn* const weights,
131 const TIn* const inptr,
132 const int in_row_stride,
133 const int in_col_stride,
134 TOut* const outptr,
135 const int out_row_stride,
136 const int out_col_stride,
137 const int row_pad_in_top,
138 const int row_pad_in_left,
139 const int row_pad_in_bottom,
140 const int row_pad_out_bottom,
141 const int n_tiles,
142 const int n_input_cols,
143 const int n_output_cols
144 );
145
146 /** Process a single tile of the tensors.
147 *
148 * @param[in] n_channels Number of channels.
149 * @param[in] weights Pointer to Height x Width x Channels ordered weights.
150 * @param[in] inptr Pointer to the top-left unpadded value of the tile.
151 * @param[in] in_row_stride Stride between rows of the input tensor.
152 * @param[in] in_col_stride Stride between columns of the input tensor.
153 * @param[out] outptr Pointer to the top-left output value for the tile.
154 * @param[in] out_row_stride Stride between rows of the output tensor.
155 * @param[in] out_col_stride Stride between columns of the output tensor.
156 */
157 template <
158 int in_pad_top, int in_pad_left, int in_pad_bottom, int in_pad_right,
159 int out_pad_bottom, int out_pad_right
160 >
161 static void process_tile(
162 const int n_channels,
163 const TIn* const weights,
164 const TIn* const inptr,
165 const int in_row_stride,
166 const int in_col_stride,
167 TOut* const outptr,
168 const int out_row_stride,
169 const int out_col_stride
170 );
171
172 // Type of a pointer to a `process_tile` instance
173 typedef void (*TileFn)(
174 const int,
175 const TIn* const,
176 const TIn* const, const int, const int,
177 TOut* const, const int, const int
178 );
179
180 // Determine the maximum padding values which can be applied to tiles of
181 // the tensors involved in this class of convolution.
182 static constexpr int max_in_pad_top = 2;
183 static constexpr int max_in_pad_left = 2;
184 static constexpr int max_in_pad_bottom = inner_tile_rows - 1;
185 static constexpr int max_in_pad_right = inner_tile_cols - 1;
186 static constexpr int max_out_pad_bottom = output_tile_rows;
187 static constexpr int max_out_pad_right = output_tile_cols;
188
189 /** Array of methods to process tensor tiles.
190 *
191 * Allows dynamic dispatch to specialized implementations based on
192 * different padding configurations.
193 */
194 static const TileFn tile_fns[
195 max_in_pad_top][max_in_pad_left][max_in_pad_bottom][max_in_pad_right][
196 max_out_pad_bottom][max_out_pad_right
197 ];
198
199 private:
200 // Member variables of instances of a convolution engine.
201 const TIn* const _weights;
202 const TIn* const _input;
203 TOut* const _output;
204 const int _n_batches, _n_input_rows, _n_input_cols, _n_channels,
205 _n_output_rows, _n_output_cols, _n_tile_rows, _n_tile_cols;
206 const bool _padding_same;
207};
208
209} // namespace depthwise