blob: e2e83319e18bda66be4e4e085ccec353091be95c [file] [log] [blame]
Pablo Tello89519332017-11-17 11:52:36 +00001/*
Pablo Tello8f43d742019-03-27 09:28:32 +00002 * Copyright (c) 2017-2019 ARM Limited.
Pablo Tello89519332017-11-17 11:52:36 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouf4643372019-11-29 16:17:13 +000024#ifndef ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H
25#define ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H
Pablo Tello89519332017-11-17 11:52:36 +000026
27#include "arm_compute/core/NEON/INEKernel.h"
Georgios Pinitas4074c992018-01-30 18:13:46 +000028#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp"
29#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp"
Pablo Tello8f43d742019-03-27 09:28:32 +000030#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_layer.hpp"
Pablo Tello89519332017-11-17 11:52:36 +000031
32namespace arm_compute
33{
34class ITensor;
Pablo Tello02541fb2017-12-15 09:48:59 +000035
Alex Gildayc357c472018-03-21 13:54:09 +000036/** Interface for the NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000037template <typename T>
38class INEWinogradLayerTransformInputKernel : public INEKernel
Pablo Tello3d4968a2017-12-04 15:03:35 +000039{
40public:
Pablo Tello8f43d742019-03-27 09:28:32 +000041 /** Get the working space required to perform the transformation.
42 *
43 * Note, the working space is only required when performing the
44 * transformation - hence it can be reused whenever the transformation is
45 * not running.
46 *
47 * @param num_threads The greatest number of threads that will be used to execute the transform.
48 * @return Size of working space required in bytes.
49 */
50 virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
51
Pablo Tello52140b42018-01-30 14:48:11 +000052 /** Determine how much memory (in units of TIn) to allocate for the
53 * transformed input.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000054 *
Pablo Tello7df27862018-05-30 11:44:26 +010055 * @param[in] num_batches Number of batches in the input tensor.
56 * @param[in] num_channels Number of feature maps in the input tensor.
57 * @param[in] num_rows Number of rows in each feature map.
58 * @param[in] num_cols Number of columns in each feature map.
Pablo Tello52140b42018-01-30 14:48:11 +000059 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +000060 *
61 * @return Storage size (in units of TIn) required.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000062 */
Pablo Tello7df27862018-05-30 11:44:26 +010063 virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000064
65 /** Gets the stride between matrices in the input worspace
66 *
Pablo Tello5264b7d2019-10-21 14:25:41 +010067 * @param[in] num_batches Number of batches in the input tensor.
68 * @param[in] num_channels Number of feature maps in the input tensor.
69 * @param[in] num_rows Number of rows in each feature map.
70 * @param[in] num_cols Number of columns in each feature map.
71 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tellof6c572c2018-02-14 12:47:30 +000072 *
73 * @return Stride expressed in bytes.
74 */
Pablo Tello5264b7d2019-10-21 14:25:41 +010075 virtual int get_matrix_stride(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000076
77 /** Configure the output transform kernel.
78 *
Pablo Tello7df27862018-05-30 11:44:26 +010079 * @param[in] input_nhwc Input tensor in NHWC data layout format.
80 * @param[in] num_batches Number of batches in input tensor.
81 * @param[in] num_rows Number of rows in input tensor.
82 * @param[in] num_cols Number of columns in input tensor.
83 * @param[in] num_channels Number of channels in input tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +000084 * @param[in] padding Padding type.
85 * @param[out] output Base of output matrices.
86 * @param[in] matrix_stride Stride between output matrices.
Pablo Tello8f43d742019-03-27 09:28:32 +000087 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tellof6c572c2018-02-14 12:47:30 +000088 */
Pablo Tello7df27862018-05-30 11:44:26 +010089 virtual void configure(const ITensor *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels,
Pablo Tello8f43d742019-03-27 09:28:32 +000090 const PaddingType padding, ITensor *output, const int matrix_stride, ITensor *workspace) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000091
Alex Gildayc357c472018-03-21 13:54:09 +000092 /** Destructor */
Pablo Tellof6c572c2018-02-14 12:47:30 +000093 virtual ~INEWinogradLayerTransformInputKernel()
94 {
95 }
96};
97
Alex Gildayc357c472018-03-21 13:54:09 +000098/** NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000099template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
100class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel<T>
101{
102public:
Pablo Tello7df27862018-05-30 11:44:26 +0100103 /** Prevent instances of this class from being copied (As this class contains pointers) */
104 NEWinogradLayerTransformInputKernel(const NEWinogradLayerTransformInputKernel &) = delete;
105 /** Prevent instances of this class from being copied (As this class contains pointers) */
106 NEWinogradLayerTransformInputKernel &operator=(const NEWinogradLayerTransformInputKernel &) = delete;
107 /** Allow instances of this class to be moved */
108 NEWinogradLayerTransformInputKernel(NEWinogradLayerTransformInputKernel &&) = default;
109 /** Allow instances of this class to be moved */
110 NEWinogradLayerTransformInputKernel &operator=(NEWinogradLayerTransformInputKernel &&) = default;
111 /** Default destructor */
112 ~NEWinogradLayerTransformInputKernel() = default;
113
Pablo Tellof6c572c2018-02-14 12:47:30 +0000114 /** Determine how much memory (in units of TIn) to allocate for the
115 * transformed input.
116 *
Pablo Tello7df27862018-05-30 11:44:26 +0100117 * @param[in] num_batches Number of batches in the input tensor.
118 * @param[in] num_channels Number of feature maps in the input tensor.
119 * @param[in] num_rows Number of rows in each feature map.
120 * @param[in] num_cols Number of columns in each feature map.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000121 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000122 *
123 * @return Storage size (in units of TIn) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000124 */
125 unsigned int get_input_storage_size(
Pablo Tello7df27862018-05-30 11:44:26 +0100126 int num_batches,
127 int num_channels,
128 int num_rows,
129 int num_cols,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000130 bool same_padding) const override;
131
Pablo Tello8f43d742019-03-27 09:28:32 +0000132 /** Get the working space required to perform the transformation.
133 *
134 * Note, the working space is only required when performing the
135 * transformation - hence it can be reused whenever the transformation is
136 * not running.
137 *
138 * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
139 *
140 * @return Size of working space required in bytes.
141 */
142 unsigned int get_working_space_size(unsigned int num_threads) const override;
143
Pablo Tellof6c572c2018-02-14 12:47:30 +0000144 /** Gets the stride between matrices in the input worspace
145 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100146 * @param[in] num_batches Number of batches in the input tensor.
147 * @param[in] num_channels Number of feature maps in the input tensor.
148 * @param[in] num_rows Number of rows in each feature map.
149 * @param[in] num_cols Number of columns in each feature map.
150 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tellof6c572c2018-02-14 12:47:30 +0000151 *
152 * @return Stride expressed in bytes.
153 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100154 int get_matrix_stride(
155 int num_batches,
156 int num_channels,
157 int num_rows,
158 int num_cols,
159 bool same_padding) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000160
Alex Gildayc357c472018-03-21 13:54:09 +0000161 /** Default constructor */
Pablo Tello52140b42018-01-30 14:48:11 +0000162 NEWinogradLayerTransformInputKernel();
Pablo Tellof6c572c2018-02-14 12:47:30 +0000163
Pablo Tellod6ca4782018-01-23 09:36:04 +0000164 const char *name() const override
165 {
166 return "NEWinogradLayerTransformInputKernel";
167 }
Pablo Tello52140b42018-01-30 14:48:11 +0000168
169 /** Configure the output transform kernel.
170 *
Pablo Tello7df27862018-05-30 11:44:26 +0100171 * @param[in] input_nhwc Input tensor. Data types supported: F32. Layout supported NHWC.
172 * @param[in] num_batches Number of batches in input tensor.
173 * @param[in] num_rows Number of rows in input tensor.
174 * @param[in] num_cols Number of columns in input tensor.
175 * @param[in] num_channels Number of channels in input tensor.
Pablo Tello52140b42018-01-30 14:48:11 +0000176 * @param[in] padding Padding type.
177 * @param[out] output Base of output matrices.
178 * @param[in] matrix_stride Stride between output matrices.
Pablo Tello8f43d742019-03-27 09:28:32 +0000179 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello52140b42018-01-30 14:48:11 +0000180 */
181 void configure(
Pablo Tello7df27862018-05-30 11:44:26 +0100182 const ITensor *input_nhwc,
183 const int num_batches,
184 const int num_rows,
185 const int num_cols,
186 const int num_channels,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000187 const PaddingType padding,
Anthony Barbiere1553372018-07-16 18:53:52 +0100188 ITensor *output,
Pablo Tello8f43d742019-03-27 09:28:32 +0000189 const int matrix_stride,
190 ITensor *workspace) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000191
Pablo Tellod6ca4782018-01-23 09:36:04 +0000192 // Inherited methods overridden:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000193 void run(const Window &window, const ThreadInfo &info) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000194
Alex Gildayc357c472018-03-21 13:54:09 +0000195 /** Winograd base kernel */
Pablo Tello8f43d742019-03-27 09:28:32 +0000196 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Alex Gildayc357c472018-03-21 13:54:09 +0000197 /** Winograd convolution kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000198 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
199
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100200 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformInputKernel
201 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100202 * @param[in] input First tensor input info. Data types supported: F32.
203 * @param[in] output Output tensor info. Data types supported: same as @p input.
204 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100205 *
206 * @return a status
207 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100208 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100209
Pablo Tello52140b42018-01-30 14:48:11 +0000210private:
Pablo Tello8f43d742019-03-27 09:28:32 +0000211 using InputTransform = typename WinogradBase::template InputTransform<T, T>;
212
213 std::unique_ptr<InputTransform> _transform{ nullptr };
214 const ITensor *_input_nhwc;
215 int _num_batches; /**< Number of batches in input tensor. */
216 int _num_rows; /**< Number of rows in input tensor. */
217 int _num_cols; /**< Number of columns in input tensor. */
218 int _num_channels; /**< Number of channels in input tensor. */
219 PaddingType _padding; /**< Padding type. */
220 ITensor *_output; /**< Base of output matrices. */
221 int _matrix_stride; /**< Stride between output matrices. */
222 int _padding_top; /**< Padding to apply to the top of the image. */
223 int _padding_left; /**< Padding to apply to the left of the image. */
224 int _padding_right; /**< Padding to apply to the right of the image. */
225 int _padding_bottom; /**< Padding to apply to the bottom of the image. */
226 ITensor *_workspace;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000227};
228
Alex Gildayc357c472018-03-21 13:54:09 +0000229/** Interface for the NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000230template <typename T>
231class INEWinogradLayerTransformOutputKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000232{
233public:
Pablo Tello8f43d742019-03-27 09:28:32 +0000234 /** Get the working space required to perform the transformation.
235 *
236 * Note, the working space is only required when performing the
237 * transformation - hence it can be reused whenever the transformation is
238 * not running.
239 *
240 * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
241 *
242 * @return Size of working space required in bytes.
243 */
244 virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
245
Pablo Tello52140b42018-01-30 14:48:11 +0000246 /** Determine how much memory (in units of TOut) to allocate for the
247 * (Winograd domain) output.
248 *
Pablo Tello7df27862018-05-30 11:44:26 +0100249 * @param[in] num_batches Number of batches in the output tensor.
250 * @param[in] num_rows Number of rows in each feature map of the input tensor.
251 * @param[in] num_cols Number of columns in each feature map of the input tensor.
252 * @param[in] num_output_channels Number of feature maps in the output tensor.
Alex Gildayc357c472018-03-21 13:54:09 +0000253 *
254 * @return Storage size (in units of TOut) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000255 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100256 virtual unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000257
Pablo Tellof6c572c2018-02-14 12:47:30 +0000258 /** Gets the stride between matrices in the output worspace
259 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100260 * @param[in] num_batches Number of batches in the output tensor.
261 * @param[in] num_rows Number of rows in each feature map of the input tensor.
262 * @param[in] num_cols Number of columns in each feature map of the input tensor.
263 * @param[in] num_output_channels Number of feature maps in the output tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000264 *
265 * @return Stride expressed in bytes.
266 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100267 virtual int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000268
269 /** Get the output shape of a convolution.
270 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100271 * @param[in] num_rows Number of rows in each feature map of the input tensor.
272 * @param[in] num_cols Number of columns in each feature map of the input tensor.
273 * @param[in] padding_same True if padding is SAME, false otherwise
Pablo Tellof6c572c2018-02-14 12:47:30 +0000274 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100275 * @return Shape of the output tensor
Pablo Tellof6c572c2018-02-14 12:47:30 +0000276 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100277 virtual std::pair<unsigned int, unsigned int> get_output_shape(
278 int num_rows, /* Number of rows in each feature map of the input tensor. */
279 int num_cols, /* Number of columns in each feature map of the input tensor. */
280 bool padding_same /* True if padding is SAME, false otherwise */
281 ) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000282
283 /** Configure the output transform kernel.
284 *
Pablo Tello8f43d742019-03-27 09:28:32 +0000285 * @param[in] biases Pointer to the biases tensor.
286 * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
287 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
288 * @param[out] output_nhwc Pointer to a tensor in NHWC data layout ordered output tensor, in the spatial domain.
289 * @param[in] num_batches Number of batches in the input tensor.
290 * @param[in] num_rows Number of rows in output tensor.
291 * @param[in] num_cols Number of columns in output tensor.
292 * @param[in] num_channels Number of feature maps in the output tensor.
293 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello5264b7d2019-10-21 14:25:41 +0100294 * @param[in] activation Activation to be used
Pablo Tellof6c572c2018-02-14 12:47:30 +0000295 */
296 virtual void configure(
Pablo Tello5264b7d2019-10-21 14:25:41 +0100297 const ITensor *biases,
298 const ITensor *transformed_output,
299 const int matrix_stride,
300 ITensor *output_nhwc,
301 const int num_batches,
302 const int num_rows,
303 const int num_cols,
304 const int num_channels,
305 ITensor *workspace,
306 const arm_gemm::Activation &activation) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000307
308 virtual ~INEWinogradLayerTransformOutputKernel()
309 {
310 }
311};
312
Alex Gildayc357c472018-03-21 13:54:09 +0000313/** NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000314template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
315class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel<T>
316{
317public:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000318 const char *name() const override
319 {
320 return "NEWinogradLayerTransformOutputKernel";
321 }
322 /** Constructor */
323 NEWinogradLayerTransformOutputKernel();
324
325 /** Prevent instances of this class from being copied (As this class contains pointers) */
326 NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
327 /** Prevent instances of this class from being copied (As this class contains pointers) */
328 NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
329 /** Allow instances of this class to be moved */
330 NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
331 /** Allow instances of this class to be moved */
332 NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
Alex Gildayc357c472018-03-21 13:54:09 +0000333 /** Default destructor */
Pablo Tellod6ca4782018-01-23 09:36:04 +0000334 ~NEWinogradLayerTransformOutputKernel() = default;
335
Pablo Tellof6c572c2018-02-14 12:47:30 +0000336 // Inherited methods overridden:
337 /** Determine how much memory (in units of TOut) to allocate for the
338 * (Winograd domain) output.
339 *
Pablo Tello7df27862018-05-30 11:44:26 +0100340 * @param[in] num_batches Number of batches in the output tensor.
341 * @param[in] num_rows Number of rows in each feature map of the input tensor.
342 * @param[in] num_cols Number of columns in each feature map of the input tensor.
343 * @param[in] num_output_channels Number of feature maps in the output tensor.
Alex Gildayc357c472018-03-21 13:54:09 +0000344 *
345 * @return Storage size (in units of TOut) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000346 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100347 unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000348
349 /** Gets the stride between matrices in the output worspace
350 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100351 * @param[in] num_batches Number of batches in the output tensor.
352 * @param[in] num_rows Number of rows in each feature map of the input tensor.
353 * @param[in] num_cols Number of columns in each feature map of the input tensor.
354 * @param[in] num_output_channels Number of feature maps in the output tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000355 *
356 * @return Stride expressed in bytes.
357 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100358 int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000359 /** Get the output shape of a convolution.
360 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100361 * @param[in] num_rows Number of rows in each feature map of the input tensor.
362 * @param[in] num_cols Number of columns in each feature map of the input tensor.
363 * @param[in] padding_same True if padding is SAME, false otherwise
Pablo Tellof6c572c2018-02-14 12:47:30 +0000364 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100365 * @return Shape of the output tensor
Pablo Tellof6c572c2018-02-14 12:47:30 +0000366 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100367 std::pair<unsigned int, unsigned int> get_output_shape(
368 int num_rows, /* Number of rows in each feature map of the input tensor. */
369 int num_cols, /* Number of columns in each feature map of the input tensor. */
370 bool padding_same) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000371
Pablo Tello8f43d742019-03-27 09:28:32 +0000372 /** Get the working space required to perform the transformation.
373 *
374 * Note, the working space is only required when performing the
375 * transformation - hence it can be reused whenever the transformation is
376 * not running.
377 *
378 * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
379 *
380 * @return Size of working space required in bytes.
381 */
382 unsigned int get_working_space_size(unsigned int num_threads) const override;
383
Pablo Tellod6ca4782018-01-23 09:36:04 +0000384 /** Configure the output transform kernel.
385 *
Pablo Tello8f43d742019-03-27 09:28:32 +0000386 * @param[in] biases Pointer to the biases tensor.
387 * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
388 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
389 * @param[out] output_nhwc Pointer to a tensor with NHWC data layout, in the spatial domain.
390 * @param[in] num_batches Number of batches in the input tensor.
391 * @param[in] num_rows Number of rows in output tensor.
392 * @param[in] num_cols Number of columns in output tensor.
393 * @param[in] num_channels Number of feature maps in the output tensor.
394 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello5264b7d2019-10-21 14:25:41 +0100395 * @param[in] activation Activation to be used
Pablo Tellod6ca4782018-01-23 09:36:04 +0000396 */
397 void configure(
Pablo Tello5264b7d2019-10-21 14:25:41 +0100398 const ITensor *biases,
399 const ITensor *transformed_output,
400 const int matrix_stride,
401 ITensor *output_nhwc,
402 const int num_batches,
403 const int num_rows,
404 const int num_cols,
405 const int num_channels,
406 ITensor *workspace,
407 const arm_gemm::Activation &activation) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000408
Pablo Tellod6ca4782018-01-23 09:36:04 +0000409 void run(const Window &window, const ThreadInfo &info) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000410
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100411 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformOutputKernel
412 *
Pablo Tello8f43d742019-03-27 09:28:32 +0000413 * @param[in] input Source tensor info with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F32.
414 * @param[in] bias Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
415 * @param[in] output Destination tensor info with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
416 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100417 *
418 * @return a status
419 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100420 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100421
Pablo Tellod6ca4782018-01-23 09:36:04 +0000422private:
Pablo Tello8f43d742019-03-27 09:28:32 +0000423 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000424 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
Pablo Tello8f43d742019-03-27 09:28:32 +0000425 using OutputTransform = typename WinogradBase::template OutputTransform<T, T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000426
Pablo Tello8f43d742019-03-27 09:28:32 +0000427 std::unique_ptr<OutputTransform> _transform{ nullptr };
428 const ITensor *_biases;
429 const ITensor *_transformed_output;
430 ITensor *_workspace;
431 int _matrix_stride;
432 int _matrix_row_stride;
433 ITensor *_output_nhwc;
434 int _num_batches;
435 int _num_rows;
436 int _num_cols;
437 int _num_channels;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000438};
439
Alex Gildayc357c472018-03-21 13:54:09 +0000440/** Interface for the NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000441template <typename T>
442class INEWinogradLayerTransformWeightsKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000443{
444public:
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100445 /** Prevent instances of this class from being copied (As this class contains pointers) */
446 INEWinogradLayerTransformWeightsKernel(const INEWinogradLayerTransformWeightsKernel &) = default;
447 /** Prevent instances of this class from being copied (As this class contains pointers) */
448 INEWinogradLayerTransformWeightsKernel &operator=(const INEWinogradLayerTransformWeightsKernel &) = default;
449 /** Allow instances of this class to be moved */
450 INEWinogradLayerTransformWeightsKernel(INEWinogradLayerTransformWeightsKernel &&) = default;
451 /** Allow instances of this class to be moved */
452 INEWinogradLayerTransformWeightsKernel &operator=(INEWinogradLayerTransformWeightsKernel &&) = default;
453
454 INEWinogradLayerTransformWeightsKernel()
455 {
456 }
457 virtual ~INEWinogradLayerTransformWeightsKernel()
458 {
459 }
Pablo Tellof6c572c2018-02-14 12:47:30 +0000460 /** Determine how much memory (in units of T) to allocate for the
Pablo Tello52140b42018-01-30 14:48:11 +0000461 * transformed weights.
462 *
Pablo Tello7df27862018-05-30 11:44:26 +0100463 * @param[in] num_output_channels Number of output feature maps.
464 * @param[in] num_input_channels Number of input feature maps.
Alex Gildayc357c472018-03-21 13:54:09 +0000465 *
466 * @return Storage size (in units of T) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000467 */
Pablo Tello7df27862018-05-30 11:44:26 +0100468 virtual unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000469 /** Gets the stride between matrices in the kernel worspace
470 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100471 * @param[in] num_output_channels Number of output feature maps.
472 * @param[in] num_input_channels Number of input feature maps.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000473 *
474 * @return Stride expressed in bytes.
475 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100476 virtual int get_matrix_stride(int num_output_channels, int num_input_channels) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000477
Pablo Tellof6c572c2018-02-14 12:47:30 +0000478 /** Configure the weights transform kernel.
Pablo Tello52140b42018-01-30 14:48:11 +0000479 *
Anthony Barbiere1553372018-07-16 18:53:52 +0100480 * @param[in] weights_hwio Pointer to the weights tensor
481 * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
482 * @param[in] matrix_stride Stride across matrices in the output workspace.
483 * @param[in] num_output_channels Number of filters.
484 * @param[in] num_input_channels Number of channels in each filter.
Pablo Tello52140b42018-01-30 14:48:11 +0000485 */
Pablo Tello7df27862018-05-30 11:44:26 +0100486
Anthony Barbiere1553372018-07-16 18:53:52 +0100487 virtual void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000488
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100489 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
490 *
491 * @param[in] input First tensor input info. Data types supported: F32.
492 * @param[in] weights Weights tensor info. Data types supported: same as @p input.
493 *
494 * @return a status
495 */
496 static Status validate(const ITensorInfo *input, const ITensorInfo *weights);
Pablo Tellof6c572c2018-02-14 12:47:30 +0000497};
498
Alex Gildayc357c472018-03-21 13:54:09 +0000499/** NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000500template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
501class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel<T>
502{
503public:
Pablo Tello7df27862018-05-30 11:44:26 +0100504 /** Prevent instances of this class from being copied (As this class contains pointers) */
505 NEWinogradLayerTransformWeightsKernel(const NEWinogradLayerTransformWeightsKernel &) = delete;
506 /** Prevent instances of this class from being copied (As this class contains pointers) */
507 NEWinogradLayerTransformWeightsKernel &operator=(const NEWinogradLayerTransformWeightsKernel &) = delete;
508 /** Allow instances of this class to be moved */
509 NEWinogradLayerTransformWeightsKernel(NEWinogradLayerTransformWeightsKernel &&) = default;
510 /** Allow instances of this class to be moved */
511 NEWinogradLayerTransformWeightsKernel &operator=(NEWinogradLayerTransformWeightsKernel &&) = default;
512 /** Default destructor */
513 ~NEWinogradLayerTransformWeightsKernel() = default;
514
Alex Gildayc357c472018-03-21 13:54:09 +0000515 /** Default constructor. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000516 NEWinogradLayerTransformWeightsKernel();
517 const char *name() const override
518 {
519 return "NEWinogradLayerTransformWeightsKernel";
520 }
Pablo Tello52140b42018-01-30 14:48:11 +0000521
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100522 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
523 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100524 * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
525 * kernel_x must be 3 and equal to kernel_y. Data types supported: F32.
526 * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
527 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100528 *
529 * @return a status
530 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100531 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100532
Pablo Tellod6ca4782018-01-23 09:36:04 +0000533 // Inherited methods overridden:
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000534
535#ifndef DOXYGEN_SKIP_THIS
536 /** Configure the weights transform kernel.
537 *
538 * @param[in] weights_hwio Pointer to the weights tensor
539 * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
540 * @param[in] matrix_stride Stride across matrices in the output workspace.
541 * @param[in] num_output_channels Number of filters.
542 * @param[in] num_input_channels Number of channels in each filter.
543 */
Anthony Barbiere1553372018-07-16 18:53:52 +0100544 void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override;
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000545#endif /* DOXYGEN_SKIP_THIS */
546
547 /** Determine how much memory (in units of T) to allocate for the
548 * transformed weights.
549 *
550 * @param[in] num_output_channels Number of output feature maps.
551 * @param[in] num_input_channels Number of input feature maps.
552 *
553 * @return Storage size (in units of T) required.
554 */
Pablo Tello7df27862018-05-30 11:44:26 +0100555 unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override;
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000556
557 /** Gets the stride between matrices in the input worspace
558 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100559 * @param[in] num_output_channels Number of output feature maps.
560 * @param[in] num_input_channels Number of input feature maps.
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000561 *
562 * @return Stride expressed in bytes.
563 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100564 int get_matrix_stride(int num_output_channels, int num_input_channels) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000565 void run(const Window &window, const ThreadInfo &info) override;
566 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +0000567
568private:
Pablo Tello8f43d742019-03-27 09:28:32 +0000569 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000570 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
Pablo Tello8f43d742019-03-27 09:28:32 +0000571 using WeightsTransform = typename WinogradBase::template WeightsTransform<T, T>;
Pablo Tello7df27862018-05-30 11:44:26 +0100572
Pablo Tello8f43d742019-03-27 09:28:32 +0000573 std::unique_ptr<WeightsTransform> _transform{ nullptr };
574 const ITensor *_weights_hwio;
575 ITensor *_output;
576 int _matrix_stride;
577 int _num_output_channels;
578 int _num_input_channels;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000579};
580
Alex Gildayc357c472018-03-21 13:54:09 +0000581/** NEON kernel to perform Winograd. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000582template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Anthony Barbiere1553372018-07-16 18:53:52 +0100583class NEWinogradLayerConfiguration
Pablo Tello89519332017-11-17 11:52:36 +0000584{
585public:
Alex Gildayc357c472018-03-21 13:54:09 +0000586 /** Winograd base kernel */
Pablo Tello8f43d742019-03-27 09:28:32 +0000587 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Alex Gildayc357c472018-03-21 13:54:09 +0000588 /** Winograd convolution kernel */
Anthony Barbiere1553372018-07-16 18:53:52 +0100589
Pablo Tellof6c572c2018-02-14 12:47:30 +0000590 using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
Pablo Tello52140b42018-01-30 14:48:11 +0000591
Anthony Barbiere1553372018-07-16 18:53:52 +0100592 using TransformInputKernel = NEWinogradLayerTransformInputKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
593 using TransformWeightsKernel = NEWinogradLayerTransformWeightsKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
594 using TransformOutputKernel = NEWinogradLayerTransformOutputKernel<TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tello89519332017-11-17 11:52:36 +0000595};
596
597} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000598#endif /*ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H*/