blob: bf5d77fc439ebdf1d95ac03f303668ddffef45ca [file] [log] [blame]
Pablo Tello89519332017-11-17 11:52:36 +00001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2017-2020 Arm Limited.
Pablo Tello89519332017-11-17 11:52:36 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrouf4643372019-11-29 16:17:13 +000024#ifndef ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H
25#define ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H
Pablo Tello89519332017-11-17 11:52:36 +000026
27#include "arm_compute/core/NEON/INEKernel.h"
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010028#include "src/core/NEON/kernels/convolution/common/convolution.hpp"
29#include "src/core/NEON/kernels/convolution/common/tensor.hpp"
Michele Di Giorgio6ad60af2020-06-09 14:52:15 +010030
31#include "src/core/NEON/kernels/convolution/winograd/winograd_layer.hpp"
Pablo Tello89519332017-11-17 11:52:36 +000032
33namespace arm_compute
34{
Georgios Pinitas5ce897f2020-04-29 11:44:10 +010035// Forward declarations
Pablo Tello89519332017-11-17 11:52:36 +000036class ITensor;
Pablo Tello02541fb2017-12-15 09:48:59 +000037
Alex Gildayc357c472018-03-21 13:54:09 +000038/** Interface for the NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000039class INEWinogradLayerTransformInputKernel : public INEKernel
Pablo Tello3d4968a2017-12-04 15:03:35 +000040{
41public:
Pablo Tello8f43d742019-03-27 09:28:32 +000042 /** Get the working space required to perform the transformation.
43 *
44 * Note, the working space is only required when performing the
45 * transformation - hence it can be reused whenever the transformation is
46 * not running.
47 *
48 * @param num_threads The greatest number of threads that will be used to execute the transform.
49 * @return Size of working space required in bytes.
50 */
51 virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
52
Pablo Tello52140b42018-01-30 14:48:11 +000053 /** Determine how much memory (in units of TIn) to allocate for the
54 * transformed input.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000055 *
Pablo Tello7df27862018-05-30 11:44:26 +010056 * @param[in] num_batches Number of batches in the input tensor.
57 * @param[in] num_channels Number of feature maps in the input tensor.
58 * @param[in] num_rows Number of rows in each feature map.
59 * @param[in] num_cols Number of columns in each feature map.
Pablo Tello52140b42018-01-30 14:48:11 +000060 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +000061 *
62 * @return Storage size (in units of TIn) required.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000063 */
Pablo Tello7df27862018-05-30 11:44:26 +010064 virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000065
66 /** Gets the stride between matrices in the input worspace
67 *
Pablo Tello5264b7d2019-10-21 14:25:41 +010068 * @param[in] num_batches Number of batches in the input tensor.
69 * @param[in] num_channels Number of feature maps in the input tensor.
70 * @param[in] num_rows Number of rows in each feature map.
71 * @param[in] num_cols Number of columns in each feature map.
72 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tellof6c572c2018-02-14 12:47:30 +000073 *
74 * @return Stride expressed in bytes.
75 */
Pablo Tello5264b7d2019-10-21 14:25:41 +010076 virtual int get_matrix_stride(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000077
78 /** Configure the output transform kernel.
79 *
Pablo Tello7df27862018-05-30 11:44:26 +010080 * @param[in] input_nhwc Input tensor in NHWC data layout format.
81 * @param[in] num_batches Number of batches in input tensor.
82 * @param[in] num_rows Number of rows in input tensor.
83 * @param[in] num_cols Number of columns in input tensor.
84 * @param[in] num_channels Number of channels in input tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +000085 * @param[in] padding Padding type.
86 * @param[out] output Base of output matrices.
87 * @param[in] matrix_stride Stride between output matrices.
Pablo Tello8f43d742019-03-27 09:28:32 +000088 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tellof6c572c2018-02-14 12:47:30 +000089 */
Pablo Tello7df27862018-05-30 11:44:26 +010090 virtual void configure(const ITensor *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels,
Pablo Tello8f43d742019-03-27 09:28:32 +000091 const PaddingType padding, ITensor *output, const int matrix_stride, ITensor *workspace) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000092
Alex Gildayc357c472018-03-21 13:54:09 +000093 /** Destructor */
Pablo Tellof6c572c2018-02-14 12:47:30 +000094 virtual ~INEWinogradLayerTransformInputKernel()
95 {
96 }
97};
98
Alex Gildayc357c472018-03-21 13:54:09 +000099/** NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000100template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100101class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel
Pablo Tellof6c572c2018-02-14 12:47:30 +0000102{
103public:
Pablo Tello7df27862018-05-30 11:44:26 +0100104 /** Prevent instances of this class from being copied (As this class contains pointers) */
105 NEWinogradLayerTransformInputKernel(const NEWinogradLayerTransformInputKernel &) = delete;
106 /** Prevent instances of this class from being copied (As this class contains pointers) */
107 NEWinogradLayerTransformInputKernel &operator=(const NEWinogradLayerTransformInputKernel &) = delete;
108 /** Allow instances of this class to be moved */
109 NEWinogradLayerTransformInputKernel(NEWinogradLayerTransformInputKernel &&) = default;
110 /** Allow instances of this class to be moved */
111 NEWinogradLayerTransformInputKernel &operator=(NEWinogradLayerTransformInputKernel &&) = default;
112 /** Default destructor */
113 ~NEWinogradLayerTransformInputKernel() = default;
114
Pablo Tellof6c572c2018-02-14 12:47:30 +0000115 /** Determine how much memory (in units of TIn) to allocate for the
116 * transformed input.
117 *
Pablo Tello7df27862018-05-30 11:44:26 +0100118 * @param[in] num_batches Number of batches in the input tensor.
119 * @param[in] num_channels Number of feature maps in the input tensor.
120 * @param[in] num_rows Number of rows in each feature map.
121 * @param[in] num_cols Number of columns in each feature map.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000122 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000123 *
124 * @return Storage size (in units of TIn) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000125 */
126 unsigned int get_input_storage_size(
Pablo Tello7df27862018-05-30 11:44:26 +0100127 int num_batches,
128 int num_channels,
129 int num_rows,
130 int num_cols,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000131 bool same_padding) const override;
132
Pablo Tello8f43d742019-03-27 09:28:32 +0000133 /** Get the working space required to perform the transformation.
134 *
135 * Note, the working space is only required when performing the
136 * transformation - hence it can be reused whenever the transformation is
137 * not running.
138 *
139 * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
140 *
141 * @return Size of working space required in bytes.
142 */
143 unsigned int get_working_space_size(unsigned int num_threads) const override;
144
Pablo Tellof6c572c2018-02-14 12:47:30 +0000145 /** Gets the stride between matrices in the input worspace
146 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100147 * @param[in] num_batches Number of batches in the input tensor.
148 * @param[in] num_channels Number of feature maps in the input tensor.
149 * @param[in] num_rows Number of rows in each feature map.
150 * @param[in] num_cols Number of columns in each feature map.
151 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tellof6c572c2018-02-14 12:47:30 +0000152 *
153 * @return Stride expressed in bytes.
154 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100155 int get_matrix_stride(
156 int num_batches,
157 int num_channels,
158 int num_rows,
159 int num_cols,
160 bool same_padding) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000161
Alex Gildayc357c472018-03-21 13:54:09 +0000162 /** Default constructor */
Pablo Tello52140b42018-01-30 14:48:11 +0000163 NEWinogradLayerTransformInputKernel();
Pablo Tellof6c572c2018-02-14 12:47:30 +0000164
Pablo Tellod6ca4782018-01-23 09:36:04 +0000165 const char *name() const override
166 {
167 return "NEWinogradLayerTransformInputKernel";
168 }
Pablo Tello52140b42018-01-30 14:48:11 +0000169
170 /** Configure the output transform kernel.
171 *
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100172 * @param[in] input_nhwc Input tensor. Data types supported: F16/F32. Layout supported NHWC.
Pablo Tello7df27862018-05-30 11:44:26 +0100173 * @param[in] num_batches Number of batches in input tensor.
174 * @param[in] num_rows Number of rows in input tensor.
175 * @param[in] num_cols Number of columns in input tensor.
176 * @param[in] num_channels Number of channels in input tensor.
Pablo Tello52140b42018-01-30 14:48:11 +0000177 * @param[in] padding Padding type.
178 * @param[out] output Base of output matrices.
179 * @param[in] matrix_stride Stride between output matrices.
Pablo Tello8f43d742019-03-27 09:28:32 +0000180 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello52140b42018-01-30 14:48:11 +0000181 */
182 void configure(
Pablo Tello7df27862018-05-30 11:44:26 +0100183 const ITensor *input_nhwc,
184 const int num_batches,
185 const int num_rows,
186 const int num_cols,
187 const int num_channels,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000188 const PaddingType padding,
Anthony Barbiere1553372018-07-16 18:53:52 +0100189 ITensor *output,
Pablo Tello8f43d742019-03-27 09:28:32 +0000190 const int matrix_stride,
191 ITensor *workspace) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000192
Pablo Tellod6ca4782018-01-23 09:36:04 +0000193 // Inherited methods overridden:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000194 void run(const Window &window, const ThreadInfo &info) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000195
Alex Gildayc357c472018-03-21 13:54:09 +0000196 /** Winograd base kernel */
Pablo Tello8f43d742019-03-27 09:28:32 +0000197 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Alex Gildayc357c472018-03-21 13:54:09 +0000198 /** Winograd convolution kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000199 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
200
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100201 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformInputKernel
202 *
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100203 * @param[in] input First tensor input info. Data types supported: F16/F32.
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100204 * @param[in] output Output tensor info. Data types supported: same as @p input.
205 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100206 *
207 * @return a status
208 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100209 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100210
Pablo Tello52140b42018-01-30 14:48:11 +0000211private:
Pablo Tello8f43d742019-03-27 09:28:32 +0000212 using InputTransform = typename WinogradBase::template InputTransform<T, T>;
213
214 std::unique_ptr<InputTransform> _transform{ nullptr };
215 const ITensor *_input_nhwc;
216 int _num_batches; /**< Number of batches in input tensor. */
217 int _num_rows; /**< Number of rows in input tensor. */
218 int _num_cols; /**< Number of columns in input tensor. */
219 int _num_channels; /**< Number of channels in input tensor. */
220 PaddingType _padding; /**< Padding type. */
221 ITensor *_output; /**< Base of output matrices. */
222 int _matrix_stride; /**< Stride between output matrices. */
223 int _padding_top; /**< Padding to apply to the top of the image. */
224 int _padding_left; /**< Padding to apply to the left of the image. */
225 int _padding_right; /**< Padding to apply to the right of the image. */
226 int _padding_bottom; /**< Padding to apply to the bottom of the image. */
227 ITensor *_workspace;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000228};
229
Alex Gildayc357c472018-03-21 13:54:09 +0000230/** Interface for the NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000231class INEWinogradLayerTransformOutputKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000232{
233public:
Pablo Tello8f43d742019-03-27 09:28:32 +0000234 /** Get the working space required to perform the transformation.
235 *
236 * Note, the working space is only required when performing the
237 * transformation - hence it can be reused whenever the transformation is
238 * not running.
239 *
240 * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
241 *
242 * @return Size of working space required in bytes.
243 */
244 virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
245
Pablo Tello52140b42018-01-30 14:48:11 +0000246 /** Determine how much memory (in units of TOut) to allocate for the
247 * (Winograd domain) output.
248 *
Pablo Tello7df27862018-05-30 11:44:26 +0100249 * @param[in] num_batches Number of batches in the output tensor.
250 * @param[in] num_rows Number of rows in each feature map of the input tensor.
251 * @param[in] num_cols Number of columns in each feature map of the input tensor.
252 * @param[in] num_output_channels Number of feature maps in the output tensor.
Alex Gildayc357c472018-03-21 13:54:09 +0000253 *
254 * @return Storage size (in units of TOut) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000255 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100256 virtual unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000257
Pablo Tellof6c572c2018-02-14 12:47:30 +0000258 /** Gets the stride between matrices in the output worspace
259 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100260 * @param[in] num_batches Number of batches in the output tensor.
261 * @param[in] num_rows Number of rows in each feature map of the input tensor.
262 * @param[in] num_cols Number of columns in each feature map of the input tensor.
263 * @param[in] num_output_channels Number of feature maps in the output tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000264 *
265 * @return Stride expressed in bytes.
266 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100267 virtual int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000268
269 /** Get the output shape of a convolution.
270 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100271 * @param[in] num_rows Number of rows in each feature map of the input tensor.
272 * @param[in] num_cols Number of columns in each feature map of the input tensor.
273 * @param[in] padding_same True if padding is SAME, false otherwise
Pablo Tellof6c572c2018-02-14 12:47:30 +0000274 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100275 * @return Shape of the output tensor
Pablo Tellof6c572c2018-02-14 12:47:30 +0000276 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100277 virtual std::pair<unsigned int, unsigned int> get_output_shape(
278 int num_rows, /* Number of rows in each feature map of the input tensor. */
279 int num_cols, /* Number of columns in each feature map of the input tensor. */
280 bool padding_same /* True if padding is SAME, false otherwise */
281 ) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000282
283 /** Configure the output transform kernel.
284 *
Pablo Tello8f43d742019-03-27 09:28:32 +0000285 * @param[in] biases Pointer to the biases tensor.
286 * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
287 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
288 * @param[out] output_nhwc Pointer to a tensor in NHWC data layout ordered output tensor, in the spatial domain.
289 * @param[in] num_batches Number of batches in the input tensor.
290 * @param[in] num_rows Number of rows in output tensor.
291 * @param[in] num_cols Number of columns in output tensor.
292 * @param[in] num_channels Number of feature maps in the output tensor.
293 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello5264b7d2019-10-21 14:25:41 +0100294 * @param[in] activation Activation to be used
Pablo Tellof6c572c2018-02-14 12:47:30 +0000295 */
296 virtual void configure(
Pablo Tello5264b7d2019-10-21 14:25:41 +0100297 const ITensor *biases,
298 const ITensor *transformed_output,
299 const int matrix_stride,
300 ITensor *output_nhwc,
301 const int num_batches,
302 const int num_rows,
303 const int num_cols,
304 const int num_channels,
305 ITensor *workspace,
306 const arm_gemm::Activation &activation) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000307
308 virtual ~INEWinogradLayerTransformOutputKernel()
309 {
310 }
311};
312
Alex Gildayc357c472018-03-21 13:54:09 +0000313/** NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000314template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100315class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel
Pablo Tellof6c572c2018-02-14 12:47:30 +0000316{
317public:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000318 const char *name() const override
319 {
320 return "NEWinogradLayerTransformOutputKernel";
321 }
322 /** Constructor */
323 NEWinogradLayerTransformOutputKernel();
324
325 /** Prevent instances of this class from being copied (As this class contains pointers) */
326 NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
327 /** Prevent instances of this class from being copied (As this class contains pointers) */
328 NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
329 /** Allow instances of this class to be moved */
330 NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
331 /** Allow instances of this class to be moved */
332 NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
Alex Gildayc357c472018-03-21 13:54:09 +0000333 /** Default destructor */
Pablo Tellod6ca4782018-01-23 09:36:04 +0000334 ~NEWinogradLayerTransformOutputKernel() = default;
335
Pablo Tellof6c572c2018-02-14 12:47:30 +0000336 // Inherited methods overridden:
337 /** Determine how much memory (in units of TOut) to allocate for the
338 * (Winograd domain) output.
339 *
Pablo Tello7df27862018-05-30 11:44:26 +0100340 * @param[in] num_batches Number of batches in the output tensor.
341 * @param[in] num_rows Number of rows in each feature map of the input tensor.
342 * @param[in] num_cols Number of columns in each feature map of the input tensor.
343 * @param[in] num_output_channels Number of feature maps in the output tensor.
Alex Gildayc357c472018-03-21 13:54:09 +0000344 *
345 * @return Storage size (in units of TOut) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000346 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100347 unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000348
349 /** Gets the stride between matrices in the output worspace
350 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100351 * @param[in] num_batches Number of batches in the output tensor.
352 * @param[in] num_rows Number of rows in each feature map of the input tensor.
353 * @param[in] num_cols Number of columns in each feature map of the input tensor.
354 * @param[in] num_output_channels Number of feature maps in the output tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000355 *
356 * @return Stride expressed in bytes.
357 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100358 int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000359 /** Get the output shape of a convolution.
360 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100361 * @param[in] num_rows Number of rows in each feature map of the input tensor.
362 * @param[in] num_cols Number of columns in each feature map of the input tensor.
363 * @param[in] padding_same True if padding is SAME, false otherwise
Pablo Tellof6c572c2018-02-14 12:47:30 +0000364 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100365 * @return Shape of the output tensor
Pablo Tellof6c572c2018-02-14 12:47:30 +0000366 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100367 std::pair<unsigned int, unsigned int> get_output_shape(
368 int num_rows, /* Number of rows in each feature map of the input tensor. */
369 int num_cols, /* Number of columns in each feature map of the input tensor. */
370 bool padding_same) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000371
Pablo Tello8f43d742019-03-27 09:28:32 +0000372 /** Get the working space required to perform the transformation.
373 *
374 * Note, the working space is only required when performing the
375 * transformation - hence it can be reused whenever the transformation is
376 * not running.
377 *
378 * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
379 *
380 * @return Size of working space required in bytes.
381 */
382 unsigned int get_working_space_size(unsigned int num_threads) const override;
383
Pablo Tellod6ca4782018-01-23 09:36:04 +0000384 /** Configure the output transform kernel.
385 *
Pablo Tello8f43d742019-03-27 09:28:32 +0000386 * @param[in] biases Pointer to the biases tensor.
387 * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
388 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
389 * @param[out] output_nhwc Pointer to a tensor with NHWC data layout, in the spatial domain.
390 * @param[in] num_batches Number of batches in the input tensor.
391 * @param[in] num_rows Number of rows in output tensor.
392 * @param[in] num_cols Number of columns in output tensor.
393 * @param[in] num_channels Number of feature maps in the output tensor.
394 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello5264b7d2019-10-21 14:25:41 +0100395 * @param[in] activation Activation to be used
Pablo Tellod6ca4782018-01-23 09:36:04 +0000396 */
397 void configure(
Pablo Tello5264b7d2019-10-21 14:25:41 +0100398 const ITensor *biases,
399 const ITensor *transformed_output,
400 const int matrix_stride,
401 ITensor *output_nhwc,
402 const int num_batches,
403 const int num_rows,
404 const int num_cols,
405 const int num_channels,
406 ITensor *workspace,
407 const arm_gemm::Activation &activation) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000408
Pablo Tellod6ca4782018-01-23 09:36:04 +0000409 void run(const Window &window, const ThreadInfo &info) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000410
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100411 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformOutputKernel
412 *
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100413 * @param[in] input Source tensor info with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F16/F32.
Pablo Tello8f43d742019-03-27 09:28:32 +0000414 * @param[in] bias Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
415 * @param[in] output Destination tensor info with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
416 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100417 *
418 * @return a status
419 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100420 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100421
Pablo Tellod6ca4782018-01-23 09:36:04 +0000422private:
Pablo Tello8f43d742019-03-27 09:28:32 +0000423 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000424 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
Pablo Tello8f43d742019-03-27 09:28:32 +0000425 using OutputTransform = typename WinogradBase::template OutputTransform<T, T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000426
Pablo Tello8f43d742019-03-27 09:28:32 +0000427 std::unique_ptr<OutputTransform> _transform{ nullptr };
428 const ITensor *_biases;
429 const ITensor *_transformed_output;
430 ITensor *_workspace;
431 int _matrix_stride;
432 int _matrix_row_stride;
433 ITensor *_output_nhwc;
434 int _num_batches;
435 int _num_rows;
436 int _num_cols;
437 int _num_channels;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000438};
439
Alex Gildayc357c472018-03-21 13:54:09 +0000440/** Interface for the NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000441class INEWinogradLayerTransformWeightsKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000442{
443public:
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100444 /** Prevent instances of this class from being copied (As this class contains pointers) */
445 INEWinogradLayerTransformWeightsKernel(const INEWinogradLayerTransformWeightsKernel &) = default;
446 /** Prevent instances of this class from being copied (As this class contains pointers) */
447 INEWinogradLayerTransformWeightsKernel &operator=(const INEWinogradLayerTransformWeightsKernel &) = default;
448 /** Allow instances of this class to be moved */
449 INEWinogradLayerTransformWeightsKernel(INEWinogradLayerTransformWeightsKernel &&) = default;
450 /** Allow instances of this class to be moved */
451 INEWinogradLayerTransformWeightsKernel &operator=(INEWinogradLayerTransformWeightsKernel &&) = default;
452
453 INEWinogradLayerTransformWeightsKernel()
454 {
455 }
456 virtual ~INEWinogradLayerTransformWeightsKernel()
457 {
458 }
Pablo Tellof6c572c2018-02-14 12:47:30 +0000459 /** Determine how much memory (in units of T) to allocate for the
Pablo Tello52140b42018-01-30 14:48:11 +0000460 * transformed weights.
461 *
Pablo Tello7df27862018-05-30 11:44:26 +0100462 * @param[in] num_output_channels Number of output feature maps.
463 * @param[in] num_input_channels Number of input feature maps.
Alex Gildayc357c472018-03-21 13:54:09 +0000464 *
465 * @return Storage size (in units of T) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000466 */
Pablo Tello7df27862018-05-30 11:44:26 +0100467 virtual unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000468 /** Gets the stride between matrices in the kernel worspace
469 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100470 * @param[in] num_output_channels Number of output feature maps.
471 * @param[in] num_input_channels Number of input feature maps.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000472 *
473 * @return Stride expressed in bytes.
474 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100475 virtual int get_matrix_stride(int num_output_channels, int num_input_channels) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000476
Pablo Tellof6c572c2018-02-14 12:47:30 +0000477 /** Configure the weights transform kernel.
Pablo Tello52140b42018-01-30 14:48:11 +0000478 *
Anthony Barbiere1553372018-07-16 18:53:52 +0100479 * @param[in] weights_hwio Pointer to the weights tensor
480 * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
481 * @param[in] matrix_stride Stride across matrices in the output workspace.
482 * @param[in] num_output_channels Number of filters.
483 * @param[in] num_input_channels Number of channels in each filter.
Pablo Tello52140b42018-01-30 14:48:11 +0000484 */
Pablo Tello7df27862018-05-30 11:44:26 +0100485
Anthony Barbiere1553372018-07-16 18:53:52 +0100486 virtual void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000487
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100488 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
489 *
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100490 * @param[in] input First tensor input info. Data types supported: F16/F32.
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100491 * @param[in] weights Weights tensor info. Data types supported: same as @p input.
492 *
493 * @return a status
494 */
495 static Status validate(const ITensorInfo *input, const ITensorInfo *weights);
Pablo Tellof6c572c2018-02-14 12:47:30 +0000496};
497
Alex Gildayc357c472018-03-21 13:54:09 +0000498/** NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000499template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100500class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel
Pablo Tellof6c572c2018-02-14 12:47:30 +0000501{
502public:
Pablo Tello7df27862018-05-30 11:44:26 +0100503 /** Prevent instances of this class from being copied (As this class contains pointers) */
504 NEWinogradLayerTransformWeightsKernel(const NEWinogradLayerTransformWeightsKernel &) = delete;
505 /** Prevent instances of this class from being copied (As this class contains pointers) */
506 NEWinogradLayerTransformWeightsKernel &operator=(const NEWinogradLayerTransformWeightsKernel &) = delete;
507 /** Allow instances of this class to be moved */
508 NEWinogradLayerTransformWeightsKernel(NEWinogradLayerTransformWeightsKernel &&) = default;
509 /** Allow instances of this class to be moved */
510 NEWinogradLayerTransformWeightsKernel &operator=(NEWinogradLayerTransformWeightsKernel &&) = default;
511 /** Default destructor */
512 ~NEWinogradLayerTransformWeightsKernel() = default;
513
Alex Gildayc357c472018-03-21 13:54:09 +0000514 /** Default constructor. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000515 NEWinogradLayerTransformWeightsKernel();
516 const char *name() const override
517 {
518 return "NEWinogradLayerTransformWeightsKernel";
519 }
Pablo Tello52140b42018-01-30 14:48:11 +0000520
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100521 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
522 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100523 * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100524 * kernel_x must be 3 and equal to kernel_y. Data types supported: F16/F32.
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100525 * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
526 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100527 *
528 * @return a status
529 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100530 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100531
Pablo Tellod6ca4782018-01-23 09:36:04 +0000532 // Inherited methods overridden:
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000533
534#ifndef DOXYGEN_SKIP_THIS
535 /** Configure the weights transform kernel.
536 *
537 * @param[in] weights_hwio Pointer to the weights tensor
538 * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
539 * @param[in] matrix_stride Stride across matrices in the output workspace.
540 * @param[in] num_output_channels Number of filters.
541 * @param[in] num_input_channels Number of channels in each filter.
542 */
Anthony Barbiere1553372018-07-16 18:53:52 +0100543 void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override;
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000544#endif /* DOXYGEN_SKIP_THIS */
545
546 /** Determine how much memory (in units of T) to allocate for the
547 * transformed weights.
548 *
549 * @param[in] num_output_channels Number of output feature maps.
550 * @param[in] num_input_channels Number of input feature maps.
551 *
552 * @return Storage size (in units of T) required.
553 */
Pablo Tello7df27862018-05-30 11:44:26 +0100554 unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override;
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000555
556 /** Gets the stride between matrices in the input worspace
557 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100558 * @param[in] num_output_channels Number of output feature maps.
559 * @param[in] num_input_channels Number of input feature maps.
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000560 *
561 * @return Stride expressed in bytes.
562 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100563 int get_matrix_stride(int num_output_channels, int num_input_channels) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000564 void run(const Window &window, const ThreadInfo &info) override;
565 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +0000566
567private:
Pablo Tello8f43d742019-03-27 09:28:32 +0000568 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000569 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
Pablo Tello8f43d742019-03-27 09:28:32 +0000570 using WeightsTransform = typename WinogradBase::template WeightsTransform<T, T>;
Pablo Tello7df27862018-05-30 11:44:26 +0100571
Pablo Tello8f43d742019-03-27 09:28:32 +0000572 std::unique_ptr<WeightsTransform> _transform{ nullptr };
573 const ITensor *_weights_hwio;
574 ITensor *_output;
575 int _matrix_stride;
576 int _num_output_channels;
577 int _num_input_channels;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000578};
579
Alex Gildayc357c472018-03-21 13:54:09 +0000580/** NEON kernel to perform Winograd. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000581template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Anthony Barbiere1553372018-07-16 18:53:52 +0100582class NEWinogradLayerConfiguration
Pablo Tello89519332017-11-17 11:52:36 +0000583{
584public:
Alex Gildayc357c472018-03-21 13:54:09 +0000585 /** Winograd base kernel */
Pablo Tello8f43d742019-03-27 09:28:32 +0000586 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Alex Gildayc357c472018-03-21 13:54:09 +0000587 /** Winograd convolution kernel */
Anthony Barbiere1553372018-07-16 18:53:52 +0100588
Pablo Tellof6c572c2018-02-14 12:47:30 +0000589 using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
Pablo Tello52140b42018-01-30 14:48:11 +0000590
Anthony Barbiere1553372018-07-16 18:53:52 +0100591 using TransformInputKernel = NEWinogradLayerTransformInputKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
592 using TransformWeightsKernel = NEWinogradLayerTransformWeightsKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
593 using TransformOutputKernel = NEWinogradLayerTransformOutputKernel<TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tello89519332017-11-17 11:52:36 +0000594};
595
596} // namespace arm_compute
Michalis Spyrouf4643372019-11-29 16:17:13 +0000597#endif /*ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H*/