blob: db2d8acfdbdeb4bb7913dd2ee028b7594f6e040a [file] [log] [blame]
Pablo Tello89519332017-11-17 11:52:36 +00001/*
Sheri Zhangac6499a2021-02-10 15:32:38 +00002 * Copyright (c) 2017-2021 Arm Limited.
Pablo Tello89519332017-11-17 11:52:36 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Michalis Spyrou96f977e2021-07-01 12:20:56 +010024#ifndef ARM_COMPUTE_CPUWINOGRADCONV2DKERNEL_H
25#define ARM_COMPUTE_CPUWINOGRADCONV2DKERNEL_H
Pablo Tello89519332017-11-17 11:52:36 +000026
Sang-Hoon Park68dd25f2020-10-19 16:00:11 +010027#include "src/core/NEON/kernels/convolution/common/convolution.hpp"
28#include "src/core/NEON/kernels/convolution/common/tensor.hpp"
Georgios Pinitas7891a732021-08-20 21:39:25 +010029#include "src/cpu/ICpuKernel.h"
Michele Di Giorgio6ad60af2020-06-09 14:52:15 +010030
31#include "src/core/NEON/kernels/convolution/winograd/winograd_layer.hpp"
Pablo Tello89519332017-11-17 11:52:36 +000032
33namespace arm_compute
34{
Michalis Spyrou96f977e2021-07-01 12:20:56 +010035namespace cpu
36{
Michele Di Giorgio33f41fa2021-03-09 14:09:08 +000037/** Interface for the kernel to perform Winograd input transform. */
Michalis Spyrou96f977e2021-07-01 12:20:56 +010038class ICpuWinogradConv2dTransformInputKernel : public ICpuKernel
Pablo Tello3d4968a2017-12-04 15:03:35 +000039{
40public:
Pablo Tello8f43d742019-03-27 09:28:32 +000041 /** Get the working space required to perform the transformation.
42 *
43 * Note, the working space is only required when performing the
44 * transformation - hence it can be reused whenever the transformation is
45 * not running.
46 *
47 * @param num_threads The greatest number of threads that will be used to execute the transform.
48 * @return Size of working space required in bytes.
49 */
50 virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
51
Pablo Tello52140b42018-01-30 14:48:11 +000052 /** Determine how much memory (in units of TIn) to allocate for the
53 * transformed input.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000054 *
Pablo Tello7df27862018-05-30 11:44:26 +010055 * @param[in] num_batches Number of batches in the input tensor.
56 * @param[in] num_channels Number of feature maps in the input tensor.
57 * @param[in] num_rows Number of rows in each feature map.
58 * @param[in] num_cols Number of columns in each feature map.
Pablo Tello52140b42018-01-30 14:48:11 +000059 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +000060 *
61 * @return Storage size (in units of TIn) required.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000062 */
Pablo Tello7df27862018-05-30 11:44:26 +010063 virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000064
65 /** Gets the stride between matrices in the input worspace
66 *
Pablo Tello5264b7d2019-10-21 14:25:41 +010067 * @param[in] num_batches Number of batches in the input tensor.
68 * @param[in] num_channels Number of feature maps in the input tensor.
69 * @param[in] num_rows Number of rows in each feature map.
70 * @param[in] num_cols Number of columns in each feature map.
71 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tellof6c572c2018-02-14 12:47:30 +000072 *
73 * @return Stride expressed in bytes.
74 */
Pablo Tello5264b7d2019-10-21 14:25:41 +010075 virtual int get_matrix_stride(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000076
77 /** Configure the output transform kernel.
78 *
Pablo Tello7df27862018-05-30 11:44:26 +010079 * @param[in] input_nhwc Input tensor in NHWC data layout format.
80 * @param[in] num_batches Number of batches in input tensor.
81 * @param[in] num_rows Number of rows in input tensor.
82 * @param[in] num_cols Number of columns in input tensor.
83 * @param[in] num_channels Number of channels in input tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +000084 * @param[in] padding Padding type.
85 * @param[out] output Base of output matrices.
86 * @param[in] matrix_stride Stride between output matrices.
Pablo Tello8f43d742019-03-27 09:28:32 +000087 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tellof6c572c2018-02-14 12:47:30 +000088 */
Michalis Spyrou96f977e2021-07-01 12:20:56 +010089 virtual void configure(const ITensorInfo *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels,
90 const PaddingType padding, ITensorInfo *output, const int matrix_stride, ITensorInfo *workspace) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000091
Alex Gildayc357c472018-03-21 13:54:09 +000092 /** Destructor */
Michalis Spyrou96f977e2021-07-01 12:20:56 +010093 virtual ~ICpuWinogradConv2dTransformInputKernel()
Pablo Tellof6c572c2018-02-14 12:47:30 +000094 {
95 }
96};
97
Michele Di Giorgio33f41fa2021-03-09 14:09:08 +000098/** Kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000099template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100100class CpuWinogradConv2dTransformInputKernel : public ICpuWinogradConv2dTransformInputKernel
Pablo Tellof6c572c2018-02-14 12:47:30 +0000101{
102public:
Pablo Tello7df27862018-05-30 11:44:26 +0100103 /** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100104 CpuWinogradConv2dTransformInputKernel(const CpuWinogradConv2dTransformInputKernel &) = delete;
Pablo Tello7df27862018-05-30 11:44:26 +0100105 /** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100106 CpuWinogradConv2dTransformInputKernel &operator=(const CpuWinogradConv2dTransformInputKernel &) = delete;
Pablo Tello7df27862018-05-30 11:44:26 +0100107 /** Allow instances of this class to be moved */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100108 CpuWinogradConv2dTransformInputKernel(CpuWinogradConv2dTransformInputKernel &&) = default;
Pablo Tello7df27862018-05-30 11:44:26 +0100109 /** Allow instances of this class to be moved */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100110 CpuWinogradConv2dTransformInputKernel &operator=(CpuWinogradConv2dTransformInputKernel &&) = default;
Pablo Tello7df27862018-05-30 11:44:26 +0100111 /** Default destructor */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100112 ~CpuWinogradConv2dTransformInputKernel() = default;
Pablo Tello7df27862018-05-30 11:44:26 +0100113
Pablo Tellof6c572c2018-02-14 12:47:30 +0000114 /** Determine how much memory (in units of TIn) to allocate for the
115 * transformed input.
116 *
Pablo Tello7df27862018-05-30 11:44:26 +0100117 * @param[in] num_batches Number of batches in the input tensor.
118 * @param[in] num_channels Number of feature maps in the input tensor.
119 * @param[in] num_rows Number of rows in each feature map.
120 * @param[in] num_cols Number of columns in each feature map.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000121 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000122 *
123 * @return Storage size (in units of TIn) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000124 */
125 unsigned int get_input_storage_size(
Pablo Tello7df27862018-05-30 11:44:26 +0100126 int num_batches,
127 int num_channels,
128 int num_rows,
129 int num_cols,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000130 bool same_padding) const override;
131
Pablo Tello8f43d742019-03-27 09:28:32 +0000132 /** Get the working space required to perform the transformation.
133 *
134 * Note, the working space is only required when performing the
135 * transformation - hence it can be reused whenever the transformation is
136 * not running.
137 *
138 * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
139 *
140 * @return Size of working space required in bytes.
141 */
142 unsigned int get_working_space_size(unsigned int num_threads) const override;
143
Pablo Tellof6c572c2018-02-14 12:47:30 +0000144 /** Gets the stride between matrices in the input worspace
145 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100146 * @param[in] num_batches Number of batches in the input tensor.
147 * @param[in] num_channels Number of feature maps in the input tensor.
148 * @param[in] num_rows Number of rows in each feature map.
149 * @param[in] num_cols Number of columns in each feature map.
150 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tellof6c572c2018-02-14 12:47:30 +0000151 *
152 * @return Stride expressed in bytes.
153 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100154 int get_matrix_stride(
155 int num_batches,
156 int num_channels,
157 int num_rows,
158 int num_cols,
159 bool same_padding) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000160
Alex Gildayc357c472018-03-21 13:54:09 +0000161 /** Default constructor */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100162 CpuWinogradConv2dTransformInputKernel();
Pablo Tellof6c572c2018-02-14 12:47:30 +0000163
Pablo Tellod6ca4782018-01-23 09:36:04 +0000164 const char *name() const override
165 {
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100166 return "CpuWinogradConv2dTransformInputKernel";
Pablo Tellod6ca4782018-01-23 09:36:04 +0000167 }
Pablo Tello52140b42018-01-30 14:48:11 +0000168
169 /** Configure the output transform kernel.
170 *
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100171 * @param[in] input_nhwc Input tensor. Data types supported: F16/F32. Layout supported NHWC.
Pablo Tello7df27862018-05-30 11:44:26 +0100172 * @param[in] num_batches Number of batches in input tensor.
173 * @param[in] num_rows Number of rows in input tensor.
174 * @param[in] num_cols Number of columns in input tensor.
175 * @param[in] num_channels Number of channels in input tensor.
Pablo Tello52140b42018-01-30 14:48:11 +0000176 * @param[in] padding Padding type.
177 * @param[out] output Base of output matrices.
178 * @param[in] matrix_stride Stride between output matrices.
Pablo Tello8f43d742019-03-27 09:28:32 +0000179 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello52140b42018-01-30 14:48:11 +0000180 */
181 void configure(
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100182 const ITensorInfo *input_nhwc,
183 const int num_batches,
184 const int num_rows,
185 const int num_cols,
186 const int num_channels,
187 const PaddingType padding,
188 ITensorInfo *output,
189 const int matrix_stride,
190 ITensorInfo *workspace) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000191
Pablo Tellod6ca4782018-01-23 09:36:04 +0000192 // Inherited methods overridden:
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100193 void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000194
Alex Gildayc357c472018-03-21 13:54:09 +0000195 /** Winograd base kernel */
Pablo Tello8f43d742019-03-27 09:28:32 +0000196 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Alex Gildayc357c472018-03-21 13:54:09 +0000197 /** Winograd convolution kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000198 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
199
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100200 /** Static function to check if given info will lead to a valid configuration of @ref CpuWinogradConv2dTransformInputKernel
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100201 *
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100202 * @param[in] input First tensor input info. Data types supported: F16/F32.
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100203 * @param[in] output Output tensor info. Data types supported: same as @p input.
204 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100205 *
206 * @return a status
207 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100208 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100209
Pablo Tello52140b42018-01-30 14:48:11 +0000210private:
Pablo Tello8f43d742019-03-27 09:28:32 +0000211 using InputTransform = typename WinogradBase::template InputTransform<T, T>;
212
213 std::unique_ptr<InputTransform> _transform{ nullptr };
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100214 int _num_channels; /**< Number of channels in input tensor. */
215 int _matrix_stride; /**< Stride between output matrices. */
Pablo Tellod6ca4782018-01-23 09:36:04 +0000216};
217
Michele Di Giorgio33f41fa2021-03-09 14:09:08 +0000218/** Interface for the kernel to perform Winograd output transform. */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100219class ICpuWinogradConv2dTransformOutputKernel : public ICpuKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000220{
221public:
Pablo Tello8f43d742019-03-27 09:28:32 +0000222 /** Get the working space required to perform the transformation.
223 *
224 * Note, the working space is only required when performing the
225 * transformation - hence it can be reused whenever the transformation is
226 * not running.
227 *
228 * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
229 *
230 * @return Size of working space required in bytes.
231 */
232 virtual unsigned int get_working_space_size(unsigned int num_threads) const = 0;
233
Pablo Tello52140b42018-01-30 14:48:11 +0000234 /** Determine how much memory (in units of TOut) to allocate for the
235 * (Winograd domain) output.
236 *
Pablo Tello7df27862018-05-30 11:44:26 +0100237 * @param[in] num_batches Number of batches in the output tensor.
238 * @param[in] num_rows Number of rows in each feature map of the input tensor.
239 * @param[in] num_cols Number of columns in each feature map of the input tensor.
240 * @param[in] num_output_channels Number of feature maps in the output tensor.
Alex Gildayc357c472018-03-21 13:54:09 +0000241 *
242 * @return Storage size (in units of TOut) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000243 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100244 virtual unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000245
Pablo Tellof6c572c2018-02-14 12:47:30 +0000246 /** Gets the stride between matrices in the output worspace
247 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100248 * @param[in] num_batches Number of batches in the output tensor.
249 * @param[in] num_rows Number of rows in each feature map of the input tensor.
250 * @param[in] num_cols Number of columns in each feature map of the input tensor.
251 * @param[in] num_output_channels Number of feature maps in the output tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000252 *
253 * @return Stride expressed in bytes.
254 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100255 virtual int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000256
257 /** Get the output shape of a convolution.
258 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100259 * @param[in] num_rows Number of rows in each feature map of the input tensor.
260 * @param[in] num_cols Number of columns in each feature map of the input tensor.
261 * @param[in] padding_same True if padding is SAME, false otherwise
Pablo Tellof6c572c2018-02-14 12:47:30 +0000262 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100263 * @return Shape of the output tensor
Pablo Tellof6c572c2018-02-14 12:47:30 +0000264 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100265 virtual std::pair<unsigned int, unsigned int> get_output_shape(
266 int num_rows, /* Number of rows in each feature map of the input tensor. */
267 int num_cols, /* Number of columns in each feature map of the input tensor. */
268 bool padding_same /* True if padding is SAME, false otherwise */
269 ) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000270
271 /** Configure the output transform kernel.
272 *
Pablo Tello8f43d742019-03-27 09:28:32 +0000273 * @param[in] biases Pointer to the biases tensor.
274 * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
275 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
276 * @param[out] output_nhwc Pointer to a tensor in NHWC data layout ordered output tensor, in the spatial domain.
277 * @param[in] num_batches Number of batches in the input tensor.
278 * @param[in] num_rows Number of rows in output tensor.
279 * @param[in] num_cols Number of columns in output tensor.
280 * @param[in] num_channels Number of feature maps in the output tensor.
281 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello5264b7d2019-10-21 14:25:41 +0100282 * @param[in] activation Activation to be used
Pablo Tellof6c572c2018-02-14 12:47:30 +0000283 */
284 virtual void configure(
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100285 const ITensorInfo *biases,
286 const ITensorInfo *transformed_output,
Pablo Tello5264b7d2019-10-21 14:25:41 +0100287 const int matrix_stride,
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100288 ITensorInfo *output_nhwc,
Pablo Tello5264b7d2019-10-21 14:25:41 +0100289 const int num_batches,
290 const int num_rows,
291 const int num_cols,
292 const int num_channels,
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100293 ITensorInfo *workspace,
Pablo Tello5264b7d2019-10-21 14:25:41 +0100294 const arm_gemm::Activation &activation) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000295
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100296 virtual ~ICpuWinogradConv2dTransformOutputKernel()
Pablo Tellof6c572c2018-02-14 12:47:30 +0000297 {
298 }
299};
300
Michele Di Giorgio33f41fa2021-03-09 14:09:08 +0000301/** Kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000302template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100303class CpuWinogradConv2dTransformOutputKernel : public ICpuWinogradConv2dTransformOutputKernel
Pablo Tellof6c572c2018-02-14 12:47:30 +0000304{
305public:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000306 const char *name() const override
307 {
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100308 return "CpuWinogradConv2dTransformOutputKernel";
Pablo Tellod6ca4782018-01-23 09:36:04 +0000309 }
310 /** Constructor */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100311 CpuWinogradConv2dTransformOutputKernel();
Pablo Tellod6ca4782018-01-23 09:36:04 +0000312
313 /** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100314 CpuWinogradConv2dTransformOutputKernel(const CpuWinogradConv2dTransformOutputKernel &) = delete;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000315 /** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100316 CpuWinogradConv2dTransformOutputKernel &operator=(const CpuWinogradConv2dTransformOutputKernel &) = delete;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000317 /** Allow instances of this class to be moved */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100318 CpuWinogradConv2dTransformOutputKernel(CpuWinogradConv2dTransformOutputKernel &&) = default;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000319 /** Allow instances of this class to be moved */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100320 CpuWinogradConv2dTransformOutputKernel &operator=(CpuWinogradConv2dTransformOutputKernel &&) = default;
Alex Gildayc357c472018-03-21 13:54:09 +0000321 /** Default destructor */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100322 ~CpuWinogradConv2dTransformOutputKernel() = default;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000323
Pablo Tellof6c572c2018-02-14 12:47:30 +0000324 // Inherited methods overridden:
325 /** Determine how much memory (in units of TOut) to allocate for the
326 * (Winograd domain) output.
327 *
Pablo Tello7df27862018-05-30 11:44:26 +0100328 * @param[in] num_batches Number of batches in the output tensor.
329 * @param[in] num_rows Number of rows in each feature map of the input tensor.
330 * @param[in] num_cols Number of columns in each feature map of the input tensor.
331 * @param[in] num_output_channels Number of feature maps in the output tensor.
Alex Gildayc357c472018-03-21 13:54:09 +0000332 *
333 * @return Storage size (in units of TOut) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000334 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100335 unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000336
337 /** Gets the stride between matrices in the output worspace
338 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100339 * @param[in] num_batches Number of batches in the output tensor.
340 * @param[in] num_rows Number of rows in each feature map of the input tensor.
341 * @param[in] num_cols Number of columns in each feature map of the input tensor.
342 * @param[in] num_output_channels Number of feature maps in the output tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000343 *
344 * @return Stride expressed in bytes.
345 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100346 int get_matrix_stride(int num_batches, int num_rows, int num_cols, int num_output_channels) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000347 /** Get the output shape of a convolution.
348 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100349 * @param[in] num_rows Number of rows in each feature map of the input tensor.
350 * @param[in] num_cols Number of columns in each feature map of the input tensor.
351 * @param[in] padding_same True if padding is SAME, false otherwise
Pablo Tellof6c572c2018-02-14 12:47:30 +0000352 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100353 * @return Shape of the output tensor
Pablo Tellof6c572c2018-02-14 12:47:30 +0000354 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100355 std::pair<unsigned int, unsigned int> get_output_shape(
356 int num_rows, /* Number of rows in each feature map of the input tensor. */
357 int num_cols, /* Number of columns in each feature map of the input tensor. */
358 bool padding_same) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000359
Pablo Tello8f43d742019-03-27 09:28:32 +0000360 /** Get the working space required to perform the transformation.
361 *
362 * Note, the working space is only required when performing the
363 * transformation - hence it can be reused whenever the transformation is
364 * not running.
365 *
366 * @param[in] num_threads The greatest number of threads that will be used to execute the transform.
367 *
368 * @return Size of working space required in bytes.
369 */
370 unsigned int get_working_space_size(unsigned int num_threads) const override;
371
Pablo Tellod6ca4782018-01-23 09:36:04 +0000372 /** Configure the output transform kernel.
373 *
Pablo Tello8f43d742019-03-27 09:28:32 +0000374 * @param[in] biases Pointer to the biases tensor.
375 * @param[in] transformed_output Pointer to working space for the output tensor in the Winograd domain.
376 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
377 * @param[out] output_nhwc Pointer to a tensor with NHWC data layout, in the spatial domain.
378 * @param[in] num_batches Number of batches in the input tensor.
379 * @param[in] num_rows Number of rows in output tensor.
380 * @param[in] num_cols Number of columns in output tensor.
381 * @param[in] num_channels Number of feature maps in the output tensor.
382 * @param[in] workspace Tensor to be used as the working space during the computation.
Pablo Tello5264b7d2019-10-21 14:25:41 +0100383 * @param[in] activation Activation to be used
Pablo Tellod6ca4782018-01-23 09:36:04 +0000384 */
385 void configure(
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100386 const ITensorInfo *biases,
387 const ITensorInfo *transformed_output,
Pablo Tello5264b7d2019-10-21 14:25:41 +0100388 const int matrix_stride,
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100389 ITensorInfo *output_nhwc,
Pablo Tello5264b7d2019-10-21 14:25:41 +0100390 const int num_batches,
391 const int num_rows,
392 const int num_cols,
393 const int num_channels,
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100394 ITensorInfo *workspace,
Pablo Tello5264b7d2019-10-21 14:25:41 +0100395 const arm_gemm::Activation &activation) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000396
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100397 void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000398
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100399 /** Static function to check if given info will lead to a valid configuration of @ref CpuWinogradConv2dTransformOutputKernel
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100400 *
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100401 * @param[in] input Source tensor info with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F16/F32.
Pablo Tello8f43d742019-03-27 09:28:32 +0000402 * @param[in] bias Biases tensor info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
403 * @param[in] output Destination tensor info with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
404 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100405 *
406 * @return a status
407 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100408 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100409
Pablo Tellod6ca4782018-01-23 09:36:04 +0000410private:
Pablo Tello8f43d742019-03-27 09:28:32 +0000411 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000412 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
Pablo Tello8f43d742019-03-27 09:28:32 +0000413 using OutputTransform = typename WinogradBase::template OutputTransform<T, T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000414
Pablo Tello8f43d742019-03-27 09:28:32 +0000415 std::unique_ptr<OutputTransform> _transform{ nullptr };
Pablo Tello8f43d742019-03-27 09:28:32 +0000416 int _matrix_stride;
417 int _matrix_row_stride;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000418};
419
Michele Di Giorgio33f41fa2021-03-09 14:09:08 +0000420/** Interface for the kernel to perform Winograd weights transform. */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100421class ICpuWinogradConv2dTransformWeightsKernel : public ICpuKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000422{
423public:
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100424 /** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100425 ICpuWinogradConv2dTransformWeightsKernel(const ICpuWinogradConv2dTransformWeightsKernel &) = default;
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100426 /** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100427 ICpuWinogradConv2dTransformWeightsKernel &operator=(const ICpuWinogradConv2dTransformWeightsKernel &) = default;
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100428 /** Allow instances of this class to be moved */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100429 ICpuWinogradConv2dTransformWeightsKernel(ICpuWinogradConv2dTransformWeightsKernel &&) = default;
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100430 /** Allow instances of this class to be moved */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100431 ICpuWinogradConv2dTransformWeightsKernel &operator=(ICpuWinogradConv2dTransformWeightsKernel &&) = default;
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100432
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100433 ICpuWinogradConv2dTransformWeightsKernel()
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100434 {
435 }
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100436 virtual ~ICpuWinogradConv2dTransformWeightsKernel()
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100437 {
438 }
Pablo Tellof6c572c2018-02-14 12:47:30 +0000439 /** Determine how much memory (in units of T) to allocate for the
Pablo Tello52140b42018-01-30 14:48:11 +0000440 * transformed weights.
441 *
Pablo Tello7df27862018-05-30 11:44:26 +0100442 * @param[in] num_output_channels Number of output feature maps.
443 * @param[in] num_input_channels Number of input feature maps.
Alex Gildayc357c472018-03-21 13:54:09 +0000444 *
445 * @return Storage size (in units of T) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000446 */
Pablo Tello7df27862018-05-30 11:44:26 +0100447 virtual unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000448 /** Gets the stride between matrices in the kernel worspace
449 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100450 * @param[in] num_output_channels Number of output feature maps.
451 * @param[in] num_input_channels Number of input feature maps.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000452 *
453 * @return Stride expressed in bytes.
454 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100455 virtual int get_matrix_stride(int num_output_channels, int num_input_channels) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000456
Pablo Tellof6c572c2018-02-14 12:47:30 +0000457 /** Configure the weights transform kernel.
Pablo Tello52140b42018-01-30 14:48:11 +0000458 *
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100459 * @param[in] weights_hwio Pointer to the weights tensor info
Anthony Barbiere1553372018-07-16 18:53:52 +0100460 * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
461 * @param[in] matrix_stride Stride across matrices in the output workspace.
462 * @param[in] num_output_channels Number of filters.
463 * @param[in] num_input_channels Number of channels in each filter.
Pablo Tello52140b42018-01-30 14:48:11 +0000464 */
Pablo Tello7df27862018-05-30 11:44:26 +0100465
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100466 virtual void configure(const ITensorInfo *weights_hwio, ITensorInfo *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000467
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100468 /** Static function to check if given info will lead to a valid configuration of @ref CpuWinogradConv2dTransformWeightsKernel
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100469 *
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100470 * @param[in] input First tensor input info. Data types supported: F16/F32.
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100471 * @param[in] weights Weights tensor info. Data types supported: same as @p input.
472 *
473 * @return a status
474 */
475 static Status validate(const ITensorInfo *input, const ITensorInfo *weights);
Pablo Tellof6c572c2018-02-14 12:47:30 +0000476};
477
Michele Di Giorgio33f41fa2021-03-09 14:09:08 +0000478/** Kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000479template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100480class CpuWinogradConv2dTransformWeightsKernel final : public ICpuWinogradConv2dTransformWeightsKernel
Pablo Tellof6c572c2018-02-14 12:47:30 +0000481{
482public:
Pablo Tello7df27862018-05-30 11:44:26 +0100483 /** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100484 CpuWinogradConv2dTransformWeightsKernel(const CpuWinogradConv2dTransformWeightsKernel &) = delete;
Pablo Tello7df27862018-05-30 11:44:26 +0100485 /** Prevent instances of this class from being copied (As this class contains pointers) */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100486 CpuWinogradConv2dTransformWeightsKernel &operator=(const CpuWinogradConv2dTransformWeightsKernel &) = delete;
Pablo Tello7df27862018-05-30 11:44:26 +0100487 /** Allow instances of this class to be moved */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100488 CpuWinogradConv2dTransformWeightsKernel(CpuWinogradConv2dTransformWeightsKernel &&) = default;
Pablo Tello7df27862018-05-30 11:44:26 +0100489 /** Allow instances of this class to be moved */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100490 CpuWinogradConv2dTransformWeightsKernel &operator=(CpuWinogradConv2dTransformWeightsKernel &&) = default;
Pablo Tello7df27862018-05-30 11:44:26 +0100491 /** Default destructor */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100492 ~CpuWinogradConv2dTransformWeightsKernel() = default;
Pablo Tello7df27862018-05-30 11:44:26 +0100493
Alex Gildayc357c472018-03-21 13:54:09 +0000494 /** Default constructor. */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100495 CpuWinogradConv2dTransformWeightsKernel();
Pablo Tellof6c572c2018-02-14 12:47:30 +0000496 const char *name() const override
497 {
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100498 return "CpuWinogradConv2dTransformWeightsKernel";
Pablo Tellof6c572c2018-02-14 12:47:30 +0000499 }
Pablo Tello52140b42018-01-30 14:48:11 +0000500
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100501 /** Static function to check if given info will lead to a valid configuration of @ref CpuWinogradConv2dTransformWeightsKernel
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100502 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100503 * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
Georgios Pinitas5ce897f2020-04-29 11:44:10 +0100504 * kernel_x must be 3 and equal to kernel_y. Data types supported: F16/F32.
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100505 * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
506 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100507 *
508 * @return a status
509 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100510 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100511
Pablo Tellod6ca4782018-01-23 09:36:04 +0000512 // Inherited methods overridden:
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000513
514#ifndef DOXYGEN_SKIP_THIS
515 /** Configure the weights transform kernel.
516 *
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100517 * @param[in] weights_hwio Pointer to the weights tensor info
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000518 * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
519 * @param[in] matrix_stride Stride across matrices in the output workspace.
520 * @param[in] num_output_channels Number of filters.
521 * @param[in] num_input_channels Number of channels in each filter.
522 */
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100523 void configure(const ITensorInfo *weights_hwio, ITensorInfo *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override;
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000524#endif /* DOXYGEN_SKIP_THIS */
525
526 /** Determine how much memory (in units of T) to allocate for the
527 * transformed weights.
528 *
529 * @param[in] num_output_channels Number of output feature maps.
530 * @param[in] num_input_channels Number of input feature maps.
531 *
532 * @return Storage size (in units of T) required.
533 */
Pablo Tello7df27862018-05-30 11:44:26 +0100534 unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override;
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000535
536 /** Gets the stride between matrices in the input worspace
537 *
Pablo Tello5264b7d2019-10-21 14:25:41 +0100538 * @param[in] num_output_channels Number of output feature maps.
539 * @param[in] num_input_channels Number of input feature maps.
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000540 *
541 * @return Stride expressed in bytes.
542 */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100543 int get_matrix_stride(int num_output_channels, int num_input_channels) const override;
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100544 void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000545 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +0000546
547private:
Pablo Tello8f43d742019-03-27 09:28:32 +0000548 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000549 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
Pablo Tello8f43d742019-03-27 09:28:32 +0000550 using WeightsTransform = typename WinogradBase::template WeightsTransform<T, T>;
Pablo Tello7df27862018-05-30 11:44:26 +0100551
Pablo Tello8f43d742019-03-27 09:28:32 +0000552 std::unique_ptr<WeightsTransform> _transform{ nullptr };
Pablo Tello8f43d742019-03-27 09:28:32 +0000553 int _num_output_channels;
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100554 int _matrix_stride;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000555};
556
Michele Di Giorgio33f41fa2021-03-09 14:09:08 +0000557/** Kernel to perform Winograd. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000558template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100559class CpuWinogradConv2dConfiguration
Pablo Tello89519332017-11-17 11:52:36 +0000560{
561public:
Alex Gildayc357c472018-03-21 13:54:09 +0000562 /** Winograd base kernel */
Pablo Tello8f43d742019-03-27 09:28:32 +0000563 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, winograd::WinogradRoots::Integers>;
Alex Gildayc357c472018-03-21 13:54:09 +0000564 /** Winograd convolution kernel */
Anthony Barbiere1553372018-07-16 18:53:52 +0100565
Pablo Tellof6c572c2018-02-14 12:47:30 +0000566 using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
Pablo Tello52140b42018-01-30 14:48:11 +0000567
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100568 using TransformInputKernel = CpuWinogradConv2dTransformInputKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
569 using TransformWeightsKernel = CpuWinogradConv2dTransformWeightsKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
570 using TransformOutputKernel = CpuWinogradConv2dTransformOutputKernel<TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tello89519332017-11-17 11:52:36 +0000571};
572
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100573} // namespace cpu
Pablo Tello89519332017-11-17 11:52:36 +0000574} // namespace arm_compute
Michalis Spyrou96f977e2021-07-01 12:20:56 +0100575#endif /*ARM_COMPUTE_CPUWINOGRADCONV2DKERNEL_H*/