blob: 1740df031256ddf2bb95d82a83623ece3e4564c6 [file] [log] [blame]
Pablo Tello89519332017-11-17 11:52:36 +00001/*
Georgios Pinitas5ce897f2020-04-29 11:44:10 +01002 * Copyright (c) 2017-2020 ARM Limited.
Pablo Tello89519332017-11-17 11:52:36 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouf4643372019-11-29 16:17:13 +000024#ifndef ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H
25#define ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H
Pablo Tello89519332017-11-17 11:52:36 +000026
27#include "arm_compute/core/NEON/INEKernel.h"
Georgios Pinitas4074c992018-01-30 18:13:46 +000028#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp"
29#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp"
Pablo Tello8f43d742019-03-27 09:28:32 +000030#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_layer.hpp"
Pablo Tello89519332017-11-17 11:52:36 +000031
32namespace arm_compute
33{
Georgios Pinitas5ce897f2020-04-29 11:44:10 +010034// Forward declarations
Pablo Tello89519332017-11-17 11:52:36 +000035class ITensor;
Pablo Tello02541fb2017-12-15 09:48:59 +000036
Alex Gildayc357c472018-03-21 13:54:09 +000037/** Interface for the NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000038class INEWinogradLayerTransformInputKernel : public INEKernel
Pablo Tello3d4968a2017-12-04 15:03:35 +000039{
40public:
Pablo Tello8f43d742019-03-27 09:28:32 +000041 /** Get the working space required to perform the transformation.
42 *
43 * Note, the working space is only required when performing the
44 * transformation - hence it can be reused whenever the transformation is
45 * not running.
46 *
47 * @param num_threads The greatest number of threads that will be used to execute the transform.
48 * @return Size of working space required in bytes.
49 */
50 virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
51
Pablo Tello52140b42018-01-30 14:48:11 +000052 /** Determine how much memory (in units of TIn) to allocate for the
53 * transformed input.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000054 *
Pablo Tello7df27862018-05-30 11:44:26 +010055 * @param[in] num_batches Number of batches in the input tensor.
56 * @param[in] num_channels Number of feature maps in the input tensor.
57 * @param[in] num_rows Number of rows in each feature map.
58 * @param[in] num_cols Number of columns in each feature map.
Pablo Tello52140b42018-01-30 14:48:11 +000059 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +000060 *
61 * @return Storage size (in units of TIn) required.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000062 */
Pablo Tello7df27862018-05-30 11:44:26 +010063 virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000064
65 /** Gets the stride between matrices in the input worspace
66 *
Pablo Tello5264b7d2019-10-21 14:25:41 +010067 * @param[in] num_batches Number of batches in the input tensor.
68 * @param[in] num_channels Number of feature maps in the input tensor.
69 * @param[in] num_rows Number of rows in each feature map.
70 * @param[in] num_cols Number of columns in each feature map.
71 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tellof6c572c2018-02-14 12:47:30 +000072 *
73 * @return Stride expressed in bytes.
74 */
Pablo Tello5264b7d2019-10-21 14:25:41 +010075 virtual int get_matrix_stride(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000076
77 /** Configure the output transform kernel.
78 *
Pablo Tello7df27862018-05-30 11:44:26 +010079 * @param[in] input_nhwc Input tensor in NHWC data layout format.
80 * @param[in] num_batches Number of batches in input tensor.
81 * @param[in] num_rows Number of rows in input tensor.
82 * @param[in] num_cols Number of columns in input tensor.
83 * @param[in] num_channels Number of channels in input tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +000084 * @param[in] padding Padding type.
85 * @param[out] output Base of output matrices.
86 * @param[in] matrix_stride Stride between output matrices.
Pablo Tello8f43d742019-03-27 09:28:32 +000087 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tellof6c572c2018-02-14 12:47:30 +000088 */
Pablo Tello7df27862018-05-30 11:44:26 +010089 virtual void configure(const ITensor *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels,
Pablo Tello8f43d742019-03-27 09:28:32 +000090 const PaddingType padding, ITensor *output, const int matrix_stride, ITensor *workspace) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000091
Alex Gildayc357c472018-03-21 13:54:09 +000092 /** Destructor */
Pablo Tellof6c572c2018-02-14 12:47:30 +000093 virtual ~INEWinogradLayerTransformInputKernel()
94 {
95 }
96};
97
Alex Gildayc357c472018-03-21 13:54:09 +000098/** NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000099template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100100class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel
Pablo Tellof6c572c2018-02-14 12:47:30 +0000101{
102public:
Pablo Tello7df27862018-05-30 11:44:26 +0100103 /** Prevent instances of this class from being copied (As this class contains pointers) */
104 NEWinogradLayerTransformInputKernel(const NEWinogradLayerTransformInputKernel &) = delete;
105 /** Prevent instances of this class from being copied (As this class contains pointers) */
106 NEWinogradLayerTransformInputKernel &operator=(const NEWinogradLayerTransformInputKernel &) = delete;
107 /** Allow instances of this class to be moved */
108 NEWinogradLayerTransformInputKernel(NEWinogradLayerTransformInputKernel &&) = default;
109 /** Allow instances of this class to be moved */
110 NEWinogradLayerTransformInputKernel &operator=(NEWinogradLayerTransformInputKernel &&) = default;
111 /** Default destructor */
112 ~NEWinogradLayerTransformInputKernel() = default;
113
Pablo Tellof6c572c2018-02-14 12:47:30 +0000114 /** Determine how much memory (in units of TIn) to allocate for the
115 * transformed input.
116 *
Pablo Tello7df27862018-05-30 11:44:26 +0100117 * @param[in] num_batches Number of batches in the input tensor.
118 * @param[in] num_channels Number of feature maps in the input tensor.
119 * @param[in] num_rows Number of rows in each feature map.
120 * @param[in] num_cols Number of columns in each feature map.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000121 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000122 *
123 * @return Storage size (in units of TIn) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000124 */
125 unsigned int get_input_storage_size(
Pablo Tello7df27862018-05-30 11:44:26 +0100126 int num_batches,
127 int num_channels,
128 int num_rows,
129 int num_cols,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000130 bool same_padding) const override;
131
Pablo Tello8f43d742019-03-27 09:28:32 +0000132 /** Get the working space required to perform the transformation.
133 *
134 * Note, the working space is only required when performing the
135 * transformation - hence it can be reused whenever the transformation is
136 * not running.
137 *
138 * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
139 *
140 * @return Size of working space required in bytes.
141 */
142 unsigned int get_working_space_size(unsigned int num_threads) const override;
143
Pablo Tellof6c572c2018-02-14 12:47:30 +0000144 /** Gets the stride between matrices in the input worspace
145 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100146 * @param[in] num_batches Number of batches in the input tensor.
147 * @param[in] num_channels Number of feature maps in the input tensor.
148 * @param[in] num_rows Number of rows in each feature map.
149 * @param[in] num_cols Number of columns in each feature map.
150 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tellof6c572c2018-02-14 12:47:30 +0000151 *
152 * @return Stride expressed in bytes.
153 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100154 int get_matrix_stride(
155 int num_batches,
156 int num_channels,
157 int num_rows,
158 int num_cols,
159 bool same_padding) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000160
Alex Gildayc357c472018-03-21 13:54:09 +0000161 /** Default constructor */
Pablo Tello52140b42018-01-30 14:48:11 +0000162 NEWinogradLayerTransformInputKernel();
Pablo Tellof6c572c2018-02-14 12:47:30 +0000163
Pablo Tellod6ca4782018-01-23 09:36:04 +0000164 const char *name() const override
165 {
166 return "NEWinogradLayerTransformInputKernel";
167 }
Pablo Tello52140b42018-01-30 14:48:11 +0000168
169 /** Configure the output transform kernel.
170 *
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100171 * @param[in] input_nhwc Input tensor. Data types supported: F16/F32. Layout supported NHWC.
Pablo Tello7df27862018-05-30 11:44:26 +0100172 * @param[in] num_batches Number of batches in input tensor.
173 * @param[in] num_rows Number of rows in input tensor.
174 * @param[in] num_cols Number of columns in input tensor.
175 * @param[in] num_channels Number of channels in input tensor.
Pablo Tello52140b42018-01-30 14:48:11 +0000176 * @param[in] padding Padding type.
177 * @param[out] output Base of output matrices.
178 * @param[in] matrix_stride Stride between output matrices.
Pablo Tello8f43d742019-03-27 09:28:32 +0000179 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello52140b42018-01-30 14:48:11 +0000180 */
181 void configure(
Pablo Tello7df27862018-05-30 11:44:26 +0100182 const ITensor *input_nhwc,
183 const int num_batches,
184 const int num_rows,
185 const int num_cols,
186 const int num_channels,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000187 const PaddingType padding,
Anthony Barbiere1553372018-07-16 18:53:52 +0100188 ITensor *output,
Pablo Tello8f43d742019-03-27 09:28:32 +0000189 const int matrix_stride,
190 ITensor *workspace) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000191
Pablo Tellod6ca4782018-01-23 09:36:04 +0000192 // Inherited methods overridden:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000193 void run(const Window &window, const ThreadInfo &info) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000194
Alex Gildayc357c472018-03-21 13:54:09 +0000195 /** Winograd base kernel */
Pablo Tello8f43d742019-03-27 09:28:32 +0000196 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Alex Gildayc357c472018-03-21 13:54:09 +0000197 /** Winograd convolution kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000198 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
199
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100200 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformInputKernel
201 *
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100202 * @param[in] input First tensor input info. Data types supported: F16/F32.
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100203 * @param[in] output Output tensor info. Data types supported: same as @p input.
204 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100205 *
206 * @return a status
207 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100208 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100209
Pablo Tello52140b42018-01-30 14:48:11 +0000210private:
Pablo Tello8f43d742019-03-27 09:28:32 +0000211 using InputTransform = typename WinogradBase::template InputTransform<T, T>;
212
213 std::unique_ptr<InputTransform> _transform{ nullptr };
214 const ITensor *_input_nhwc;
215 int _num_batches; /**< Number of batches in input tensor. */
216 int _num_rows; /**< Number of rows in input tensor. */
217 int _num_cols; /**< Number of columns in input tensor. */
218 int _num_channels; /**< Number of channels in input tensor. */
219 PaddingType _padding; /**< Padding type. */
220 ITensor *_output; /**< Base of output matrices. */
221 int _matrix_stride; /**< Stride between output matrices. */
222 int _padding_top; /**< Padding to apply to the top of the image. */
223 int _padding_left; /**< Padding to apply to the left of the image. */
224 int _padding_right; /**< Padding to apply to the right of the image. */
225 int _padding_bottom; /**< Padding to apply to the bottom of the image. */
226 ITensor *_workspace;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000227};
228
Alex Gildayc357c472018-03-21 13:54:09 +0000229/** Interface for the NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000230class INEWinogradLayerTransformOutputKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000231{
232public:
Pablo Tello8f43d742019-03-27 09:28:32 +0000233 /** Get the working space required to perform the transformation.
234 *
235 * Note, the working space is only required when performing the
236 * transformation - hence it can be reused whenever the transformation is
237 * not running.
238 *
239 * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
240 *
241 * @return Size of working space required in bytes.
242 */
243 virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
244
Pablo Tello52140b42018-01-30 14:48:11 +0000245 /** Determine how much memory (in units of TOut) to allocate for the
246 * (Winograd domain) output.
247 *
Pablo Tello7df27862018-05-30 11:44:26 +0100248 * @param[in] num_batches Number of batches in the output tensor.
249 * @param[in] num_rows Number of rows in each feature map of the input tensor.
250 * @param[in] num_cols Number of columns in each feature map of the input tensor.
251 * @param[in] num_output_channels Number of feature maps in the output tensor.
Alex Gildayc357c472018-03-21 13:54:09 +0000252 *
253 * @return Storage size (in units of TOut) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000254 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100255 virtual unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000256
Pablo Tellof6c572c2018-02-14 12:47:30 +0000257 /** Gets the stride between matrices in the output worspace
258 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100259 * @param[in] num_batches Number of batches in the output tensor.
260 * @param[in] num_rows Number of rows in each feature map of the input tensor.
261 * @param[in] num_cols Number of columns in each feature map of the input tensor.
262 * @param[in] num_output_channels Number of feature maps in the output tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000263 *
264 * @return Stride expressed in bytes.
265 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100266 virtual int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000267
268 /** Get the output shape of a convolution.
269 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100270 * @param[in] num_rows Number of rows in each feature map of the input tensor.
271 * @param[in] num_cols Number of columns in each feature map of the input tensor.
272 * @param[in] padding_same True if padding is SAME, false otherwise
Pablo Tellof6c572c2018-02-14 12:47:30 +0000273 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100274 * @return Shape of the output tensor
Pablo Tellof6c572c2018-02-14 12:47:30 +0000275 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100276 virtual std::pair<unsigned int, unsigned int> get_output_shape(
277 int num_rows, /* Number of rows in each feature map of the input tensor. */
278 int num_cols, /* Number of columns in each feature map of the input tensor. */
279 bool padding_same /* True if padding is SAME, false otherwise */
280 ) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000281
282 /** Configure the output transform kernel.
283 *
Pablo Tello8f43d742019-03-27 09:28:32 +0000284 * @param[in] biases Pointer to the biases tensor.
285 * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
286 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
287 * @param[out] output_nhwc Pointer to a tensor in NHWC data layout ordered output tensor, in the spatial domain.
288 * @param[in] num_batches Number of batches in the input tensor.
289 * @param[in] num_rows Number of rows in output tensor.
290 * @param[in] num_cols Number of columns in output tensor.
291 * @param[in] num_channels Number of feature maps in the output tensor.
292 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello5264b7d2019-10-21 14:25:41 +0100293 * @param[in] activation Activation to be used
Pablo Tellof6c572c2018-02-14 12:47:30 +0000294 */
295 virtual void configure(
Pablo Tello5264b7d2019-10-21 14:25:41 +0100296 const ITensor *biases,
297 const ITensor *transformed_output,
298 const int matrix_stride,
299 ITensor *output_nhwc,
300 const int num_batches,
301 const int num_rows,
302 const int num_cols,
303 const int num_channels,
304 ITensor *workspace,
305 const arm_gemm::Activation &activation) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000306
307 virtual ~INEWinogradLayerTransformOutputKernel()
308 {
309 }
310};
311
Alex Gildayc357c472018-03-21 13:54:09 +0000312/** NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000313template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100314class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel
Pablo Tellof6c572c2018-02-14 12:47:30 +0000315{
316public:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000317 const char *name() const override
318 {
319 return "NEWinogradLayerTransformOutputKernel";
320 }
321 /** Constructor */
322 NEWinogradLayerTransformOutputKernel();
323
324 /** Prevent instances of this class from being copied (As this class contains pointers) */
325 NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
326 /** Prevent instances of this class from being copied (As this class contains pointers) */
327 NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
328 /** Allow instances of this class to be moved */
329 NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
330 /** Allow instances of this class to be moved */
331 NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
Alex Gildayc357c472018-03-21 13:54:09 +0000332 /** Default destructor */
Pablo Tellod6ca4782018-01-23 09:36:04 +0000333 ~NEWinogradLayerTransformOutputKernel() = default;
334
Pablo Tellof6c572c2018-02-14 12:47:30 +0000335 // Inherited methods overridden:
336 /** Determine how much memory (in units of TOut) to allocate for the
337 * (Winograd domain) output.
338 *
Pablo Tello7df27862018-05-30 11:44:26 +0100339 * @param[in] num_batches Number of batches in the output tensor.
340 * @param[in] num_rows Number of rows in each feature map of the input tensor.
341 * @param[in] num_cols Number of columns in each feature map of the input tensor.
342 * @param[in] num_output_channels Number of feature maps in the output tensor.
Alex Gildayc357c472018-03-21 13:54:09 +0000343 *
344 * @return Storage size (in units of TOut) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000345 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100346 unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000347
348 /** Gets the stride between matrices in the output worspace
349 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100350 * @param[in] num_batches Number of batches in the output tensor.
351 * @param[in] num_rows Number of rows in each feature map of the input tensor.
352 * @param[in] num_cols Number of columns in each feature map of the input tensor.
353 * @param[in] num_output_channels Number of feature maps in the output tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000354 *
355 * @return Stride expressed in bytes.
356 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100357 int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000358 /** Get the output shape of a convolution.
359 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100360 * @param[in] num_rows Number of rows in each feature map of the input tensor.
361 * @param[in] num_cols Number of columns in each feature map of the input tensor.
362 * @param[in] padding_same True if padding is SAME, false otherwise
Pablo Tellof6c572c2018-02-14 12:47:30 +0000363 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100364 * @return Shape of the output tensor
Pablo Tellof6c572c2018-02-14 12:47:30 +0000365 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100366 std::pair<unsigned int, unsigned int> get_output_shape(
367 int num_rows, /* Number of rows in each feature map of the input tensor. */
368 int num_cols, /* Number of columns in each feature map of the input tensor. */
369 bool padding_same) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000370
Pablo Tello8f43d742019-03-27 09:28:32 +0000371 /** Get the working space required to perform the transformation.
372 *
373 * Note, the working space is only required when performing the
374 * transformation - hence it can be reused whenever the transformation is
375 * not running.
376 *
377 * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
378 *
379 * @return Size of working space required in bytes.
380 */
381 unsigned int get_working_space_size(unsigned int num_threads) const override;
382
Pablo Tellod6ca4782018-01-23 09:36:04 +0000383 /** Configure the output transform kernel.
384 *
Pablo Tello8f43d742019-03-27 09:28:32 +0000385 * @param[in] biases Pointer to the biases tensor.
386 * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
387 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
388 * @param[out] output_nhwc Pointer to a tensor with NHWC data layout, in the spatial domain.
389 * @param[in] num_batches Number of batches in the input tensor.
390 * @param[in] num_rows Number of rows in output tensor.
391 * @param[in] num_cols Number of columns in output tensor.
392 * @param[in] num_channels Number of feature maps in the output tensor.
393 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello5264b7d2019-10-21 14:25:41 +0100394 * @param[in] activation Activation to be used
Pablo Tellod6ca4782018-01-23 09:36:04 +0000395 */
396 void configure(
Pablo Tello5264b7d2019-10-21 14:25:41 +0100397 const ITensor *biases,
398 const ITensor *transformed_output,
399 const int matrix_stride,
400 ITensor *output_nhwc,
401 const int num_batches,
402 const int num_rows,
403 const int num_cols,
404 const int num_channels,
405 ITensor *workspace,
406 const arm_gemm::Activation &activation) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000407
Pablo Tellod6ca4782018-01-23 09:36:04 +0000408 void run(const Window &window, const ThreadInfo &info) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000409
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100410 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformOutputKernel
411 *
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100412 * @param[in] input Source tensor info with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F16/F32.
Pablo Tello8f43d742019-03-27 09:28:32 +0000413 * @param[in] bias Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
414 * @param[in] output Destination tensor info with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
415 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100416 *
417 * @return a status
418 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100419 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100420
Pablo Tellod6ca4782018-01-23 09:36:04 +0000421private:
Pablo Tello8f43d742019-03-27 09:28:32 +0000422 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000423 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
Pablo Tello8f43d742019-03-27 09:28:32 +0000424 using OutputTransform = typename WinogradBase::template OutputTransform<T, T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000425
Pablo Tello8f43d742019-03-27 09:28:32 +0000426 std::unique_ptr<OutputTransform> _transform{ nullptr };
427 const ITensor *_biases;
428 const ITensor *_transformed_output;
429 ITensor *_workspace;
430 int _matrix_stride;
431 int _matrix_row_stride;
432 ITensor *_output_nhwc;
433 int _num_batches;
434 int _num_rows;
435 int _num_cols;
436 int _num_channels;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000437};
438
Alex Gildayc357c472018-03-21 13:54:09 +0000439/** Interface for the NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000440class INEWinogradLayerTransformWeightsKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000441{
442public:
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100443 /** Prevent instances of this class from being copied (As this class contains pointers) */
444 INEWinogradLayerTransformWeightsKernel(const INEWinogradLayerTransformWeightsKernel &) = default;
445 /** Prevent instances of this class from being copied (As this class contains pointers) */
446 INEWinogradLayerTransformWeightsKernel &operator=(const INEWinogradLayerTransformWeightsKernel &) = default;
447 /** Allow instances of this class to be moved */
448 INEWinogradLayerTransformWeightsKernel(INEWinogradLayerTransformWeightsKernel &&) = default;
449 /** Allow instances of this class to be moved */
450 INEWinogradLayerTransformWeightsKernel &operator=(INEWinogradLayerTransformWeightsKernel &&) = default;
451
452 INEWinogradLayerTransformWeightsKernel()
453 {
454 }
455 virtual ~INEWinogradLayerTransformWeightsKernel()
456 {
457 }
Pablo Tellof6c572c2018-02-14 12:47:30 +0000458 /** Determine how much memory (in units of T) to allocate for the
Pablo Tello52140b42018-01-30 14:48:11 +0000459 * transformed weights.
460 *
Pablo Tello7df27862018-05-30 11:44:26 +0100461 * @param[in] num_output_channels Number of output feature maps.
462 * @param[in] num_input_channels Number of input feature maps.
Alex Gildayc357c472018-03-21 13:54:09 +0000463 *
464 * @return Storage size (in units of T) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000465 */
Pablo Tello7df27862018-05-30 11:44:26 +0100466 virtual unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000467 /** Gets the stride between matrices in the kernel worspace
468 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100469 * @param[in] num_output_channels Number of output feature maps.
470 * @param[in] num_input_channels Number of input feature maps.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000471 *
472 * @return Stride expressed in bytes.
473 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100474 virtual int get_matrix_stride(int num_output_channels, int num_input_channels) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000475
Pablo Tellof6c572c2018-02-14 12:47:30 +0000476 /** Configure the weights transform kernel.
Pablo Tello52140b42018-01-30 14:48:11 +0000477 *
Anthony Barbiere1553372018-07-16 18:53:52 +0100478 * @param[in] weights_hwio Pointer to the weights tensor
479 * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
480 * @param[in] matrix_stride Stride across matrices in the output workspace.
481 * @param[in] num_output_channels Number of filters.
482 * @param[in] num_input_channels Number of channels in each filter.
Pablo Tello52140b42018-01-30 14:48:11 +0000483 */
Pablo Tello7df27862018-05-30 11:44:26 +0100484
Anthony Barbiere1553372018-07-16 18:53:52 +0100485 virtual void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000486
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100487 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
488 *
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100489 * @param[in] input First tensor input info. Data types supported: F16/F32.
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100490 * @param[in] weights Weights tensor info. Data types supported: same as @p input.
491 *
492 * @return a status
493 */
494 static Status validate(const ITensorInfo *input, const ITensorInfo *weights);
Pablo Tellof6c572c2018-02-14 12:47:30 +0000495};
496
Alex Gildayc357c472018-03-21 13:54:09 +0000497/** NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000498template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100499class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel
Pablo Tellof6c572c2018-02-14 12:47:30 +0000500{
501public:
Pablo Tello7df27862018-05-30 11:44:26 +0100502 /** Prevent instances of this class from being copied (As this class contains pointers) */
503 NEWinogradLayerTransformWeightsKernel(const NEWinogradLayerTransformWeightsKernel &) = delete;
504 /** Prevent instances of this class from being copied (As this class contains pointers) */
505 NEWinogradLayerTransformWeightsKernel &operator=(const NEWinogradLayerTransformWeightsKernel &) = delete;
506 /** Allow instances of this class to be moved */
507 NEWinogradLayerTransformWeightsKernel(NEWinogradLayerTransformWeightsKernel &&) = default;
508 /** Allow instances of this class to be moved */
509 NEWinogradLayerTransformWeightsKernel &operator=(NEWinogradLayerTransformWeightsKernel &&) = default;
510 /** Default destructor */
511 ~NEWinogradLayerTransformWeightsKernel() = default;
512
Alex Gildayc357c472018-03-21 13:54:09 +0000513 /** Default constructor. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000514 NEWinogradLayerTransformWeightsKernel();
515 const char *name() const override
516 {
517 return "NEWinogradLayerTransformWeightsKernel";
518 }
Pablo Tello52140b42018-01-30 14:48:11 +0000519
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100520 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
521 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100522 * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100523 * kernel_x must be 3 and equal to kernel_y. Data types supported: F16/F32.
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100524 * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
525 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100526 *
527 * @return a status
528 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100529 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100530
Pablo Tellod6ca4782018-01-23 09:36:04 +0000531 // Inherited methods overridden:
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000532
533#ifndef DOXYGEN_SKIP_THIS
534 /** Configure the weights transform kernel.
535 *
536 * @param[in] weights_hwio Pointer to the weights tensor
537 * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
538 * @param[in] matrix_stride Stride across matrices in the output workspace.
539 * @param[in] num_output_channels Number of filters.
540 * @param[in] num_input_channels Number of channels in each filter.
541 */
Anthony Barbiere1553372018-07-16 18:53:52 +0100542 void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override;
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000543#endif /* DOXYGEN_SKIP_THIS */
544
545 /** Determine how much memory (in units of T) to allocate for the
546 * transformed weights.
547 *
548 * @param[in] num_output_channels Number of output feature maps.
549 * @param[in] num_input_channels Number of input feature maps.
550 *
551 * @return Storage size (in units of T) required.
552 */
Pablo Tello7df27862018-05-30 11:44:26 +0100553 unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override;
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000554
555 /** Gets the stride between matrices in the input worspace
556 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100557 * @param[in] num_output_channels Number of output feature maps.
558 * @param[in] num_input_channels Number of input feature maps.
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000559 *
560 * @return Stride expressed in bytes.
561 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100562 int get_matrix_stride(int num_output_channels, int num_input_channels) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000563 void run(const Window &window, const ThreadInfo &info) override;
564 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +0000565
566private:
Pablo Tello8f43d742019-03-27 09:28:32 +0000567 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000568 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
Pablo Tello8f43d742019-03-27 09:28:32 +0000569 using WeightsTransform = typename WinogradBase::template WeightsTransform<T, T>;
Pablo Tello7df27862018-05-30 11:44:26 +0100570
Pablo Tello8f43d742019-03-27 09:28:32 +0000571 std::unique_ptr<WeightsTransform> _transform{ nullptr };
572 const ITensor *_weights_hwio;
573 ITensor *_output;
574 int _matrix_stride;
575 int _num_output_channels;
576 int _num_input_channels;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000577};
578
Alex Gildayc357c472018-03-21 13:54:09 +0000579/** NEON kernel to perform Winograd. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000580template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Anthony Barbiere1553372018-07-16 18:53:52 +0100581class NEWinogradLayerConfiguration
Pablo Tello89519332017-11-17 11:52:36 +0000582{
583public:
Alex Gildayc357c472018-03-21 13:54:09 +0000584 /** Winograd base kernel */
Pablo Tello8f43d742019-03-27 09:28:32 +0000585 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Alex Gildayc357c472018-03-21 13:54:09 +0000586 /** Winograd convolution kernel */
Anthony Barbiere1553372018-07-16 18:53:52 +0100587
Pablo Tellof6c572c2018-02-14 12:47:30 +0000588 using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
Pablo Tello52140b42018-01-30 14:48:11 +0000589
Anthony Barbiere1553372018-07-16 18:53:52 +0100590 using TransformInputKernel = NEWinogradLayerTransformInputKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
591 using TransformWeightsKernel = NEWinogradLayerTransformWeightsKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
592 using TransformOutputKernel = NEWinogradLayerTransformOutputKernel<TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tello89519332017-11-17 11:52:36 +0000593};
594
595} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000596#endif /*ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H*/