blob: 68c133ee371f685d00750c0439ef9263ea9a54d9 [file] [log] [blame]
Pablo Tello89519332017-11-17 11:52:36 +00001/*
Pablo Tello9ceebbe2018-01-10 16:44:13 +00002 * Copyright (c) 2017-2018 ARM Limited.
Pablo Tello89519332017-11-17 11:52:36 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas9fb11592018-04-26 20:34:58 +010024#ifndef __ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H__
25#define __ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H__
Pablo Tello89519332017-11-17 11:52:36 +000026
27#include "arm_compute/core/NEON/INEKernel.h"
Georgios Pinitas4074c992018-01-30 18:13:46 +000028#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp"
29#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp"
30#include "arm_compute/core/NEON/kernels/convolution/winograd/batched_blocked_gemm.hpp"
31#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp"
Pablo Tello89519332017-11-17 11:52:36 +000032
33namespace arm_compute
34{
35class ITensor;
Pablo Tello02541fb2017-12-15 09:48:59 +000036
Alex Gildayc357c472018-03-21 13:54:09 +000037/** Interface for the NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000038template <typename T>
39class INEWinogradLayerTransformInputKernel : public INEKernel
Pablo Tello3d4968a2017-12-04 15:03:35 +000040{
41public:
Pablo Tello52140b42018-01-30 14:48:11 +000042 /** Determine how much memory (in units of TIn) to allocate for the
43 * transformed input.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000044 *
Pablo Tello7df27862018-05-30 11:44:26 +010045 * @param[in] num_batches Number of batches in the input tensor.
46 * @param[in] num_channels Number of feature maps in the input tensor.
47 * @param[in] num_rows Number of rows in each feature map.
48 * @param[in] num_cols Number of columns in each feature map.
Pablo Tello52140b42018-01-30 14:48:11 +000049 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +000050 *
51 * @return Storage size (in units of TIn) required.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000052 */
Pablo Tello7df27862018-05-30 11:44:26 +010053 virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000054
55 /** Gets the stride between matrices in the input worspace
56 *
57 * @param[in] kernel_shape The shape of the weights tensor.
58 * @param[in] input_shape The shape of the input tensor.
59 * @param[in] padding_type The type of padding to be used.
60 *
61 * @return Stride expressed in bytes.
62 */
63 virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
64
65 /** Configure the output transform kernel.
66 *
Pablo Tello7df27862018-05-30 11:44:26 +010067 * @param[in] input_nhwc Input tensor in NHWC data layout format.
68 * @param[in] num_batches Number of batches in input tensor.
69 * @param[in] num_rows Number of rows in input tensor.
70 * @param[in] num_cols Number of columns in input tensor.
71 * @param[in] num_channels Number of channels in input tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +000072 * @param[in] padding Padding type.
73 * @param[out] output Base of output matrices.
74 * @param[in] matrix_stride Stride between output matrices.
75 */
Pablo Tello7df27862018-05-30 11:44:26 +010076 virtual void configure(const ITensor *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels,
77 const PaddingType padding, T *const output, const int matrix_stride) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000078
Alex Gildayc357c472018-03-21 13:54:09 +000079 /** Destructor */
Pablo Tellof6c572c2018-02-14 12:47:30 +000080 virtual ~INEWinogradLayerTransformInputKernel()
81 {
82 }
83};
84
Alex Gildayc357c472018-03-21 13:54:09 +000085/** NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000086template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
87class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel<T>
88{
89public:
Pablo Tello7df27862018-05-30 11:44:26 +010090 /** Prevent instances of this class from being copied (As this class contains pointers) */
91 NEWinogradLayerTransformInputKernel(const NEWinogradLayerTransformInputKernel &) = delete;
92 /** Prevent instances of this class from being copied (As this class contains pointers) */
93 NEWinogradLayerTransformInputKernel &operator=(const NEWinogradLayerTransformInputKernel &) = delete;
94 /** Allow instances of this class to be moved */
95 NEWinogradLayerTransformInputKernel(NEWinogradLayerTransformInputKernel &&) = default;
96 /** Allow instances of this class to be moved */
97 NEWinogradLayerTransformInputKernel &operator=(NEWinogradLayerTransformInputKernel &&) = default;
98 /** Default destructor */
99 ~NEWinogradLayerTransformInputKernel() = default;
100
Pablo Tellof6c572c2018-02-14 12:47:30 +0000101 /** Determine how much memory (in units of TIn) to allocate for the
102 * transformed input.
103 *
Pablo Tello7df27862018-05-30 11:44:26 +0100104 * @param[in] num_batches Number of batches in the input tensor.
105 * @param[in] num_channels Number of feature maps in the input tensor.
106 * @param[in] num_rows Number of rows in each feature map.
107 * @param[in] num_cols Number of columns in each feature map.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000108 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000109 *
110 * @return Storage size (in units of TIn) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000111 */
112 unsigned int get_input_storage_size(
Pablo Tello7df27862018-05-30 11:44:26 +0100113 int num_batches,
114 int num_channels,
115 int num_rows,
116 int num_cols,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000117 bool same_padding) const override;
118
119 /** Gets the stride between matrices in the input worspace
120 *
121 * @param[in] kernel_shape The shape of the weights tensor.
122 * @param[in] input_shape The shape of the input tensor.
123 * @param[in] padding_type The type of padding to be used.
124 *
125 * @return Stride expressed in bytes.
126 */
127 int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000128
Alex Gildayc357c472018-03-21 13:54:09 +0000129 /** Default constructor */
Pablo Tello52140b42018-01-30 14:48:11 +0000130 NEWinogradLayerTransformInputKernel();
Pablo Tellof6c572c2018-02-14 12:47:30 +0000131
Pablo Tellod6ca4782018-01-23 09:36:04 +0000132 const char *name() const override
133 {
134 return "NEWinogradLayerTransformInputKernel";
135 }
Pablo Tello52140b42018-01-30 14:48:11 +0000136
137 /** Configure the output transform kernel.
138 *
Pablo Tello7df27862018-05-30 11:44:26 +0100139 * @param[in] input_nhwc Input tensor. Data types supported: F32. Layout supported NHWC.
140 * @param[in] num_batches Number of batches in input tensor.
141 * @param[in] num_rows Number of rows in input tensor.
142 * @param[in] num_cols Number of columns in input tensor.
143 * @param[in] num_channels Number of channels in input tensor.
Pablo Tello52140b42018-01-30 14:48:11 +0000144 * @param[in] padding Padding type.
145 * @param[out] output Base of output matrices.
146 * @param[in] matrix_stride Stride between output matrices.
147 */
148 void configure(
Pablo Tello7df27862018-05-30 11:44:26 +0100149 const ITensor *input_nhwc,
150 const int num_batches,
151 const int num_rows,
152 const int num_cols,
153 const int num_channels,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000154 const PaddingType padding,
155 T *const output,
156 const int matrix_stride) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000157
Pablo Tellod6ca4782018-01-23 09:36:04 +0000158 // Inherited methods overridden:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000159 void run(const Window &window, const ThreadInfo &info) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000160
Alex Gildayc357c472018-03-21 13:54:09 +0000161 /** Winograd base kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000162 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelCols, KernelCols>;
Alex Gildayc357c472018-03-21 13:54:09 +0000163 /** Winograd convolution kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000164 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
165
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100166 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformInputKernel
167 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100168 * @param[in] input First tensor input info. Data types supported: F32.
169 * @param[in] output Output tensor info. Data types supported: same as @p input.
170 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100171 *
172 * @return a status
173 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100174 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100175
Pablo Tello52140b42018-01-30 14:48:11 +0000176private:
Pablo Tellof6c572c2018-02-14 12:47:30 +0000177 using InputTransform = typename WinogradBase::template InputTransform<T>;
Pablo Tello7df27862018-05-30 11:44:26 +0100178 const ITensor *_input_nhwc;
179 int _num_batches; /**< Number of batches in input tensor. */
180 int _num_rows; /**< Number of rows in input tensor. */
181 int _num_cols; /**< Number of columns in input tensor. */
182 int _num_channels; /**< Number of channels in input tensor. */
183 PaddingType _padding; /**< Padding type. */
184 T *_output; /**< Base of output matrices. */
185 int _matrix_stride; /**< Stride between output matrices. */
Pablo Tellod6ca4782018-01-23 09:36:04 +0000186};
187
Alex Gildayc357c472018-03-21 13:54:09 +0000188/** Interface for the NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000189template <typename T>
190class INEWinogradLayerTransformOutputKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000191{
192public:
Pablo Tello52140b42018-01-30 14:48:11 +0000193 /** Determine how much memory (in units of TOut) to allocate for the
194 * (Winograd domain) output.
195 *
Pablo Tello7df27862018-05-30 11:44:26 +0100196 * @param[in] num_batches Number of batches in the output tensor.
197 * @param[in] num_rows Number of rows in each feature map of the input tensor.
198 * @param[in] num_cols Number of columns in each feature map of the input tensor.
199 * @param[in] num_output_channels Number of feature maps in the output tensor.
200 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000201 *
202 * @return Storage size (in units of TOut) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000203 */
Pablo Tello7df27862018-05-30 11:44:26 +0100204 virtual unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels, bool same_padding) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000205
Pablo Tellof6c572c2018-02-14 12:47:30 +0000206 /** Gets the stride between matrices in the output worspace
207 *
208 * @param[in] kernel_shape The shape of the weights tensor.
209 * @param[in] input_shape The shape of the input tensor.
210 * @param[in] padding_type The type of padding to be used.
211 *
212 * @return Stride expressed in bytes.
213 */
214 virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
215
216 /** Get the output shape of a convolution.
217 *
218 * @param[in] kernel_shape The shape of the weights tensor.
219 * @param[in] in_shape The shape of the input tensor.
220 * @param[in] padding The type of padding to be used.
221 *
222 * @return Stride expressed in bytes.
223 */
224 virtual Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const = 0;
225
226 /** Configure the output transform kernel.
227 *
228 * @param[in] biases Pointer to the biases tensor.
229 * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
230 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
Pablo Tello7df27862018-05-30 11:44:26 +0100231 * @param[out] output_nhwc Pointer to a tensor in NHWC data layout ordered output tensor, in the spatial domain.
232 * @param[in] num_batches Number of batches in the input tensor.
233 * @param[in] num_rows Number of rows in output tensor.
234 * @param[in] num_cols Number of columns in output tensor.
235 * @param[in] num_channels Number of feature maps in the output tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000236 */
237 virtual void configure(
238 const ITensor *biases,
239 const T *const output_workingspace,
240 const int matrix_stride,
Pablo Tello7df27862018-05-30 11:44:26 +0100241 ITensor *const output_nhwc,
242 const int num_batches,
243 const int num_rows,
244 const int num_cols,
245 const int num_channels) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000246
247 virtual ~INEWinogradLayerTransformOutputKernel()
248 {
249 }
250};
251
Alex Gildayc357c472018-03-21 13:54:09 +0000252/** NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000253template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
254class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel<T>
255{
256public:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000257 const char *name() const override
258 {
259 return "NEWinogradLayerTransformOutputKernel";
260 }
261 /** Constructor */
262 NEWinogradLayerTransformOutputKernel();
263
264 /** Prevent instances of this class from being copied (As this class contains pointers) */
265 NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
266 /** Prevent instances of this class from being copied (As this class contains pointers) */
267 NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
268 /** Allow instances of this class to be moved */
269 NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
270 /** Allow instances of this class to be moved */
271 NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
Alex Gildayc357c472018-03-21 13:54:09 +0000272 /** Default destructor */
Pablo Tellod6ca4782018-01-23 09:36:04 +0000273 ~NEWinogradLayerTransformOutputKernel() = default;
274
Pablo Tellof6c572c2018-02-14 12:47:30 +0000275 // Inherited methods overridden:
276 /** Determine how much memory (in units of TOut) to allocate for the
277 * (Winograd domain) output.
278 *
Pablo Tello7df27862018-05-30 11:44:26 +0100279 * @param[in] num_batches Number of batches in the output tensor.
280 * @param[in] num_rows Number of rows in each feature map of the input tensor.
281 * @param[in] num_cols Number of columns in each feature map of the input tensor.
282 * @param[in] num_output_channels Number of feature maps in the output tensor.
283 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000284 *
285 * @return Storage size (in units of TOut) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000286 */
Pablo Tello7df27862018-05-30 11:44:26 +0100287 unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels, bool same_padding) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000288
289 /** Gets the stride between matrices in the output worspace
290 *
291 * @param[in] kernel_shape The shape of the weights tensor.
292 * @param[in] input_shape The shape of the input tensor.
293 * @param[in] padding_type The type of padding to be used.
294 *
295 * @return Stride expressed in bytes.
296 */
297 int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
298 /** Get the output shape of a convolution.
299 *
300 * @param[in] kernel_shape The shape of the weights tensor.
301 * @param[in] in_shape The shape of the input tensor.
302 * @param[in] padding The type of padding to be used.
303 *
304 * @return Stride expressed in bytes.
305 */
306 Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const override;
307
Pablo Tellod6ca4782018-01-23 09:36:04 +0000308 /** Configure the output transform kernel.
309 *
310 * @param[in] biases Pointer to the biases tensor.
311 * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
312 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
Pablo Tello7df27862018-05-30 11:44:26 +0100313 * @param[out] output_nhwc Pointer to a tensor with NHWC data layout, in the spatial domain.
314 * @param[in] num_batches Number of batches in the input tensor.
315 * @param[in] num_rows Number of rows in output tensor.
316 * @param[in] num_cols Number of columns in output tensor.
317 * @param[in] num_channels Number of feature maps in the output tensor.
Pablo Tellod6ca4782018-01-23 09:36:04 +0000318 */
319 void configure(
Pablo Tellof6c572c2018-02-14 12:47:30 +0000320 const ITensor *biases,
321 const T *const output_workingspace,
322 const int matrix_stride,
Pablo Tello7df27862018-05-30 11:44:26 +0100323 ITensor *const output_nhwc,
324 const int num_batches,
325 const int num_rows,
326 const int num_cols,
327 const int num_channels) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000328
Pablo Tellod6ca4782018-01-23 09:36:04 +0000329 void run(const Window &window, const ThreadInfo &info) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000330
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100331 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformOutputKernel
332 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100333 * @param[in] input Source tensor with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F32.
334 * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
335 * @param[out] output Destination tensor with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
336 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100337 *
338 * @return a status
339 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100340 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100341
Pablo Tellod6ca4782018-01-23 09:36:04 +0000342private:
Pablo Tello52140b42018-01-30 14:48:11 +0000343 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000344 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
345 using OutputTransform = typename WinogradBase::template OutputTransform<T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000346
Pablo Tellod6ca4782018-01-23 09:36:04 +0000347 const ITensor *_biases;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000348 const T *_output_workspace;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000349 int _matrix_stride;
350 int _matrix_row_stride;
Pablo Tello7df27862018-05-30 11:44:26 +0100351 ITensor *_output_nhwc;
352 int _num_batches;
353 int _num_rows;
354 int _num_cols;
355 int _num_channels;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000356};
357
Alex Gildayc357c472018-03-21 13:54:09 +0000358/** Interface for the NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000359template <typename T>
360class INEWinogradLayerTransformWeightsKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000361{
362public:
Pablo Tellof6c572c2018-02-14 12:47:30 +0000363 /** Determine how much memory (in units of T) to allocate for the
Pablo Tello52140b42018-01-30 14:48:11 +0000364 * transformed weights.
365 *
Pablo Tello7df27862018-05-30 11:44:26 +0100366 * @param[in] num_output_channels Number of output feature maps.
367 * @param[in] num_input_channels Number of input feature maps.
Alex Gildayc357c472018-03-21 13:54:09 +0000368 *
369 * @return Storage size (in units of T) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000370 */
Pablo Tello7df27862018-05-30 11:44:26 +0100371 virtual unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000372 /** Gets the stride between matrices in the kernel worspace
373 *
374 * @param[in] kernel_shape The shape of the weights tensor.
375 *
376 * @return Stride expressed in bytes.
377 */
378 virtual int get_matrix_stride(const KernelShape &kernel_shape) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000379
Pablo Tellof6c572c2018-02-14 12:47:30 +0000380 /** Configure the weights transform kernel.
Pablo Tello52140b42018-01-30 14:48:11 +0000381 *
Pablo Tello7df27862018-05-30 11:44:26 +0100382 * @param[in] weights_hwio Pointer to the weights tensor
383 * @param[in] output Pointer to working space for the output tensor in the Winograd domain.
384 * @param[in] matrix_stride Stride across matrices in the output workspace.
385 * @param[in] num_output_channels Number of filters.
386 * @param[in] num_input_channels Number of channels in each filter.
Pablo Tello52140b42018-01-30 14:48:11 +0000387 */
Pablo Tello7df27862018-05-30 11:44:26 +0100388
389 virtual void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000390
391 virtual ~INEWinogradLayerTransformWeightsKernel()
392 {
393 }
394};
395
Alex Gildayc357c472018-03-21 13:54:09 +0000396/** NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000397template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
398class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel<T>
399{
400public:
Pablo Tello7df27862018-05-30 11:44:26 +0100401 /** Prevent instances of this class from being copied (As this class contains pointers) */
402 NEWinogradLayerTransformWeightsKernel(const NEWinogradLayerTransformWeightsKernel &) = delete;
403 /** Prevent instances of this class from being copied (As this class contains pointers) */
404 NEWinogradLayerTransformWeightsKernel &operator=(const NEWinogradLayerTransformWeightsKernel &) = delete;
405 /** Allow instances of this class to be moved */
406 NEWinogradLayerTransformWeightsKernel(NEWinogradLayerTransformWeightsKernel &&) = default;
407 /** Allow instances of this class to be moved */
408 NEWinogradLayerTransformWeightsKernel &operator=(NEWinogradLayerTransformWeightsKernel &&) = default;
409 /** Default destructor */
410 ~NEWinogradLayerTransformWeightsKernel() = default;
411
Alex Gildayc357c472018-03-21 13:54:09 +0000412 /** Default constructor. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000413 NEWinogradLayerTransformWeightsKernel();
414 const char *name() const override
415 {
416 return "NEWinogradLayerTransformWeightsKernel";
417 }
Pablo Tello52140b42018-01-30 14:48:11 +0000418
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100419 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
420 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100421 * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
422 * kernel_x must be 3 and equal to kernel_y. Data types supported: F32.
423 * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
424 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100425 *
426 * @return a status
427 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100428 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100429
Pablo Tellod6ca4782018-01-23 09:36:04 +0000430 // Inherited methods overridden:
Pablo Tello7df27862018-05-30 11:44:26 +0100431 void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override;
432 unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000433 int get_matrix_stride(const KernelShape &kernel_shape) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000434 void run(const Window &window, const ThreadInfo &info) override;
435 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +0000436
437private:
438 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000439 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
440 using WeightsTransform = typename WinogradBase::template WeightsTransform<T>;
Pablo Tello7df27862018-05-30 11:44:26 +0100441
442 const ITensor *_weights_hwio;
443 T *_output;
444 int _matrix_stride;
445 int _num_output_channels;
446 int _num_input_channels;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000447};
448
Alex Gildayc357c472018-03-21 13:54:09 +0000449/** Interface for the NEON kernel to perform Winograd. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000450template <typename TIn, typename TOut>
451class INEWinogradLayerBatchedGEMMKernel : public INEKernel
452{
453public:
454 /** Get the number of GEMMs to compute
455 */
456 virtual unsigned int get_number_gemms() const = 0;
457 /** Initialise the kernel
458 *
459 * @param[in] n_gemms Number of GEMMs to compute.
Pablo Tello7df27862018-05-30 11:44:26 +0100460 * @param[in] M in_shape.num_batches * tile_rows * tile_cols.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000461 * @param[in] K Number of channels in the input tensor.
462 * @param[in] N Number of channels in the output tensor.
463 * @param[in] a_matrix_stride Stride between input matrices.
464 * @param[in] a_row_stride Row stride inside input matrix.
465 * @param[in] b_matrix_stride Stride between weights matrices.
466 * @param[in] b_row_stride Row stride inside the weights matrix.
467 * @param[in] c_matrix_stride Stride between output matrices.
468 * @param[in] c_row_stride Row stride inside the output matrix.
469 * @param[out] a_ptr Input workspace.
470 * @param[out] b_ptr Kernel workspace.
471 * @param[out] c_ptr Output workspace.
472 */
473 virtual void configure(
474 const unsigned int n_gemms,
475 const int M, const int K, const int N,
476 const int a_matrix_stride,
477 const int a_row_stride,
478 const int b_matrix_stride,
479 const int b_row_stride,
480 const int c_matrix_stride,
481 const int c_row_stride,
482 const TIn *const a_ptr,
483 const TIn *const b_ptr,
484 TOut *const c_ptr) = 0;
485
486 /** Get the number of tiles per row
487 */
488 virtual int get_output_tile_rows() const = 0;
489 /** Get the number of tiles per columns
490 */
491 virtual int get_output_tile_cols() const = 0;
492 /** Get the number of blocks
493 */
494 virtual int get_number_blocks() const = 0;
495};
496
Alex Gildayc357c472018-03-21 13:54:09 +0000497/** NEON kernel to perform Winograd. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000498template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
499class NEWinogradLayerBatchedGEMMKernel : public INEWinogradLayerBatchedGEMMKernel<TIn, TOut>
Pablo Tello89519332017-11-17 11:52:36 +0000500{
501public:
Alex Gildayc357c472018-03-21 13:54:09 +0000502 /** Winograd base kernel */
Pablo Tello52140b42018-01-30 14:48:11 +0000503 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Alex Gildayc357c472018-03-21 13:54:09 +0000504 /** Winograd convolution kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000505 using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
Alex Gildayc357c472018-03-21 13:54:09 +0000506 /** Winograd batched blocked GEMM operator */
507 using MultiGEMM = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, TIn, TOut>;
Pablo Tello52140b42018-01-30 14:48:11 +0000508
Anthony Barbiere8a49832018-01-18 10:04:05 +0000509 const char *name() const override
510 {
Pablo Tellof6c572c2018-02-14 12:47:30 +0000511 return "NEWinogradLayerBatchedGEMMKernel";
Anthony Barbiere8a49832018-01-18 10:04:05 +0000512 }
Pablo Tello89519332017-11-17 11:52:36 +0000513 /** Constructor */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000514 NEWinogradLayerBatchedGEMMKernel();
Pablo Tello89519332017-11-17 11:52:36 +0000515
516 /** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000517 NEWinogradLayerBatchedGEMMKernel(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello89519332017-11-17 11:52:36 +0000518 /** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000519 NEWinogradLayerBatchedGEMMKernel &operator=(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello89519332017-11-17 11:52:36 +0000520 /** Allow instances of this class to be moved */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000521 NEWinogradLayerBatchedGEMMKernel(NEWinogradLayerBatchedGEMMKernel &&) = default;
Pablo Tello89519332017-11-17 11:52:36 +0000522 /** Allow instances of this class to be moved */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000523 NEWinogradLayerBatchedGEMMKernel &operator=(NEWinogradLayerBatchedGEMMKernel &&) = default;
Alex Gildayc357c472018-03-21 13:54:09 +0000524 /** Default destructor. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000525 ~NEWinogradLayerBatchedGEMMKernel() = default;
526
527 // Inherited methods overridden:
528
529 unsigned int get_number_gemms() const override;
530 int get_output_tile_rows() const override;
531 int get_output_tile_cols() const override;
532 int get_number_blocks() const override;
Pablo Tello89519332017-11-17 11:52:36 +0000533
534 /** Initialise the kernel
535 *
Pablo Tello52140b42018-01-30 14:48:11 +0000536 * @param[in] n_gemms Number of GEMMs to compute.
Pablo Tello7df27862018-05-30 11:44:26 +0100537 * @param[in] M in_shape.num_batches * tile_rows * tile_cols.
Pablo Tello52140b42018-01-30 14:48:11 +0000538 * @param[in] K Number of channels in the input tensor.
539 * @param[in] N Number of channels in the output tensor.
540 * @param[in] a_matrix_stride Stride between input matrices.
541 * @param[in] a_row_stride Row stride inside input matrix.
542 * @param[in] b_matrix_stride Stride between weights matrices.
543 * @param[in] b_row_stride Row stride inside the weights matrix.
544 * @param[in] c_matrix_stride Stride between output matrices.
545 * @param[in] c_row_stride Row stride inside the output matrix.
546 * @param[out] a_ptr Input workspace.
547 * @param[out] b_ptr Kernel workspace.
548 * @param[out] c_ptr Output workspace.
Pablo Tello89519332017-11-17 11:52:36 +0000549 */
Pablo Tello52140b42018-01-30 14:48:11 +0000550 void configure(
551 const unsigned int n_gemms,
552 const int M, const int K, const int N,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000553 const int a_matrix_stride,
554 const int a_row_stride,
555 const int b_matrix_stride,
556 const int b_row_stride,
557 const int c_matrix_stride,
558 const int c_row_stride,
559 const TIn *const a_ptr,
560 const TIn *const b_ptr,
561 TOut *const c_ptr) override;
Pablo Tello89519332017-11-17 11:52:36 +0000562
Pablo Tello89519332017-11-17 11:52:36 +0000563 void run(const Window &window, const ThreadInfo &info) override;
564
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100565 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerBatchedGEMMKernel.
566 *
567 * @param[in] a First input tensor (Matrix or Vector A). Data types supported: F32
568 * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a.
569 * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
570 * @param[out] output Output tensor. Data type supported: same as @p a
571 * @param[in] alpha Weight of the matrix product
572 * @param[in] beta Weight of matrix C
573 * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
574 * if the reshape of matrix B should happen only for the first run
575 *
576 * @return a status
577 */
578 static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensor *c, const ITensorInfo *output, const float alpha, const float beta, const GEMMInfo &gemm_info = GEMMInfo());
579
Pablo Tello52140b42018-01-30 14:48:11 +0000580private:
Alex Gildayc357c472018-03-21 13:54:09 +0000581 static const int _output_tile_rows = OutputTileRows;
582 static const int _output_tile_cols = OutputTileCols;
Pablo Tello52140b42018-01-30 14:48:11 +0000583 std::unique_ptr<MultiGEMM> _gemms;
Pablo Tello89519332017-11-17 11:52:36 +0000584};
585
586} // namespace arm_compute
Georgios Pinitas9fb11592018-04-26 20:34:58 +0100587#endif /*__ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H__*/