blob: 7284f9fdc4a9b2885a70ceb3c8d0f352b382b181 [file] [log] [blame]
Pablo Tello89519332017-11-17 11:52:36 +00001/*
Pablo Tello9ceebbe2018-01-10 16:44:13 +00002 * Copyright (c) 2017-2018 ARM Limited.
Pablo Tello89519332017-11-17 11:52:36 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
25#define __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
26
27#include "arm_compute/core/NEON/INEKernel.h"
Georgios Pinitas4074c992018-01-30 18:13:46 +000028#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp"
29#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp"
30#include "arm_compute/core/NEON/kernels/convolution/winograd/batched_blocked_gemm.hpp"
31#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp"
Pablo Tello89519332017-11-17 11:52:36 +000032
33namespace arm_compute
34{
35class ITensor;
Pablo Tello02541fb2017-12-15 09:48:59 +000036
Alex Gildayc357c472018-03-21 13:54:09 +000037/** Interface for the NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000038template <typename T>
39class INEWinogradLayerTransformInputKernel : public INEKernel
Pablo Tello3d4968a2017-12-04 15:03:35 +000040{
41public:
Pablo Tello52140b42018-01-30 14:48:11 +000042 /** Determine how much memory (in units of TIn) to allocate for the
43 * transformed input.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000044 *
Pablo Tello52140b42018-01-30 14:48:11 +000045 * @param[in] n_batches Number of batches in the input tensor.
46 * @param[in] n_channels Number of feature maps in the input tensor.
47 * @param[in] n_rows Number of rows in each feature map.
48 * @param[in] n_cols Number of columns in each feature map.
49 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +000050 *
51 * @return Storage size (in units of TIn) required.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000052 */
Pablo Tellof6c572c2018-02-14 12:47:30 +000053 virtual unsigned int get_input_storage_size(int n_batches, int n_channels, int n_rows, int n_cols, bool same_padding) const = 0;
54
55 /** Gets the stride between matrices in the input worspace
56 *
57 * @param[in] kernel_shape The shape of the weights tensor.
58 * @param[in] input_shape The shape of the input tensor.
59 * @param[in] padding_type The type of padding to be used.
60 *
61 * @return Stride expressed in bytes.
62 */
63 virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
64
65 /** Configure the output transform kernel.
66 *
67 * @param[in] input Input tensor data
68 * @param[in] n_batches Number of batches in input tensor.
69 * @param[in] n_rows Number of rows in input tensor.
70 * @param[in] n_cols Number of columns in input tensor.
71 * @param[in] n_channels Number of channels in input tensor.
72 * @param[in] padding Padding type.
73 * @param[out] output Base of output matrices.
74 * @param[in] matrix_stride Stride between output matrices.
75 */
76 virtual void configure(const T *const input, const int n_batches, const int n_rows, const int n_cols, const int n_channels, const PaddingType padding, T *const output, const int matrix_stride) = 0;
77
Alex Gildayc357c472018-03-21 13:54:09 +000078 /** Destructor */
Pablo Tellof6c572c2018-02-14 12:47:30 +000079 virtual ~INEWinogradLayerTransformInputKernel()
80 {
81 }
82};
83
Alex Gildayc357c472018-03-21 13:54:09 +000084/** NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000085template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
86class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel<T>
87{
88public:
89 /** Determine how much memory (in units of TIn) to allocate for the
90 * transformed input.
91 *
92 * @param[in] n_batches Number of batches in the input tensor.
93 * @param[in] n_channels Number of feature maps in the input tensor.
94 * @param[in] n_rows Number of rows in each feature map.
95 * @param[in] n_cols Number of columns in each feature map.
96 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +000097 *
98 * @return Storage size (in units of TIn) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +000099 */
100 unsigned int get_input_storage_size(
Pablo Tello52140b42018-01-30 14:48:11 +0000101 int n_batches,
102 int n_channels,
103 int n_rows,
104 int n_cols,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000105 bool same_padding) const override;
106
107 /** Gets the stride between matrices in the input worspace
108 *
109 * @param[in] kernel_shape The shape of the weights tensor.
110 * @param[in] input_shape The shape of the input tensor.
111 * @param[in] padding_type The type of padding to be used.
112 *
113 * @return Stride expressed in bytes.
114 */
115 int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000116
Alex Gildayc357c472018-03-21 13:54:09 +0000117 /** Default constructor */
Pablo Tello52140b42018-01-30 14:48:11 +0000118 NEWinogradLayerTransformInputKernel();
Pablo Tellof6c572c2018-02-14 12:47:30 +0000119
Pablo Tellod6ca4782018-01-23 09:36:04 +0000120 const char *name() const override
121 {
122 return "NEWinogradLayerTransformInputKernel";
123 }
Pablo Tello52140b42018-01-30 14:48:11 +0000124
125 /** Configure the output transform kernel.
126 *
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100127 * @param[in] input Input tensor data. Data types supported: F32.
Pablo Tello52140b42018-01-30 14:48:11 +0000128 * @param[in] n_batches Number of batches in input tensor.
129 * @param[in] n_rows Number of rows in input tensor.
130 * @param[in] n_cols Number of columns in input tensor.
131 * @param[in] n_channels Number of channels in input tensor.
132 * @param[in] padding Padding type.
133 * @param[out] output Base of output matrices.
134 * @param[in] matrix_stride Stride between output matrices.
135 */
136 void configure(
Pablo Tellof6c572c2018-02-14 12:47:30 +0000137 const T *const input,
138 const int n_batches,
139 const int n_rows,
140 const int n_cols,
141 const int n_channels,
142 const PaddingType padding,
143 T *const output,
144 const int matrix_stride) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000145
Pablo Tellod6ca4782018-01-23 09:36:04 +0000146 // Inherited methods overridden:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000147 void run(const Window &window, const ThreadInfo &info) override;
148 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +0000149
Alex Gildayc357c472018-03-21 13:54:09 +0000150 /** Winograd base kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000151 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelCols, KernelCols>;
Alex Gildayc357c472018-03-21 13:54:09 +0000152 /** Winograd convolution kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000153 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
154
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100155 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformInputKernel
156 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100157 * @param[in] input First tensor input info. Data types supported: F32.
158 * @param[in] output Output tensor info. Data types supported: same as @p input.
159 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100160 *
161 * @return a status
162 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100163 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100164
Pablo Tello52140b42018-01-30 14:48:11 +0000165private:
Pablo Tellof6c572c2018-02-14 12:47:30 +0000166 using InputTransform = typename WinogradBase::template InputTransform<T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000167 std::unique_ptr<InputTransform> _transform;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000168};
169
Alex Gildayc357c472018-03-21 13:54:09 +0000170/** Interface for the NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000171template <typename T>
172class INEWinogradLayerTransformOutputKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000173{
174public:
Pablo Tello52140b42018-01-30 14:48:11 +0000175 /** Determine how much memory (in units of TOut) to allocate for the
176 * (Winograd domain) output.
177 *
178 * @param[in] n_batches Number of batches in the output tensor.
179 * @param[in] n_rows Number of rows in each feature map of the input tensor.
180 * @param[in] n_cols Number of columns in each feature map of the input tensor.
181 * @param[in] n_output_channels Number of feature maps in the output tensor.
182 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000183 *
184 * @return Storage size (in units of TOut) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000185 */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000186 virtual unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000187
Pablo Tellof6c572c2018-02-14 12:47:30 +0000188 /** Gets the stride between matrices in the output worspace
189 *
190 * @param[in] kernel_shape The shape of the weights tensor.
191 * @param[in] input_shape The shape of the input tensor.
192 * @param[in] padding_type The type of padding to be used.
193 *
194 * @return Stride expressed in bytes.
195 */
196 virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
197
198 /** Get the output shape of a convolution.
199 *
200 * @param[in] kernel_shape The shape of the weights tensor.
201 * @param[in] in_shape The shape of the input tensor.
202 * @param[in] padding The type of padding to be used.
203 *
204 * @return Stride expressed in bytes.
205 */
206 virtual Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const = 0;
207
208 /** Configure the output transform kernel.
209 *
210 * @param[in] biases Pointer to the biases tensor.
211 * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
212 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
213 * @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
214 * @param[in] n_batches Number of batches in the input tensor.
215 * @param[in] n_rows Number of rows in output tensor.
216 * @param[in] n_cols Number of columns in output tensor.
217 * @param[in] n_channels Number of feature maps in the output tensor.
218 */
219 virtual void configure(
220 const ITensor *biases,
221 const T *const output_workingspace,
222 const int matrix_stride,
223 T *const output,
224 const int n_batches,
225 const int n_rows,
226 const int n_cols,
227 const int n_channels) = 0;
228
229 virtual ~INEWinogradLayerTransformOutputKernel()
230 {
231 }
232};
233
Alex Gildayc357c472018-03-21 13:54:09 +0000234/** NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000235template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
236class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel<T>
237{
238public:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000239 const char *name() const override
240 {
241 return "NEWinogradLayerTransformOutputKernel";
242 }
243 /** Constructor */
244 NEWinogradLayerTransformOutputKernel();
245
246 /** Prevent instances of this class from being copied (As this class contains pointers) */
247 NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
248 /** Prevent instances of this class from being copied (As this class contains pointers) */
249 NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
250 /** Allow instances of this class to be moved */
251 NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
252 /** Allow instances of this class to be moved */
253 NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
Alex Gildayc357c472018-03-21 13:54:09 +0000254 /** Default destructor */
Pablo Tellod6ca4782018-01-23 09:36:04 +0000255 ~NEWinogradLayerTransformOutputKernel() = default;
256
Pablo Tellof6c572c2018-02-14 12:47:30 +0000257 // Inherited methods overridden:
258 /** Determine how much memory (in units of TOut) to allocate for the
259 * (Winograd domain) output.
260 *
261 * @param[in] n_batches Number of batches in the output tensor.
262 * @param[in] n_rows Number of rows in each feature map of the input tensor.
263 * @param[in] n_cols Number of columns in each feature map of the input tensor.
264 * @param[in] n_output_channels Number of feature maps in the output tensor.
265 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000266 *
267 * @return Storage size (in units of TOut) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000268 */
269 unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const override;
270
271 /** Gets the stride between matrices in the output worspace
272 *
273 * @param[in] kernel_shape The shape of the weights tensor.
274 * @param[in] input_shape The shape of the input tensor.
275 * @param[in] padding_type The type of padding to be used.
276 *
277 * @return Stride expressed in bytes.
278 */
279 int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
280 /** Get the output shape of a convolution.
281 *
282 * @param[in] kernel_shape The shape of the weights tensor.
283 * @param[in] in_shape The shape of the input tensor.
284 * @param[in] padding The type of padding to be used.
285 *
286 * @return Stride expressed in bytes.
287 */
288 Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const override;
289
Pablo Tellod6ca4782018-01-23 09:36:04 +0000290 /** Configure the output transform kernel.
291 *
292 * @param[in] biases Pointer to the biases tensor.
293 * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
294 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
295 * @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
296 * @param[in] n_batches Number of batches in the input tensor.
297 * @param[in] n_rows Number of rows in output tensor.
298 * @param[in] n_cols Number of columns in output tensor.
299 * @param[in] n_channels Number of feature maps in the output tensor.
300 */
301 void configure(
Pablo Tellof6c572c2018-02-14 12:47:30 +0000302 const ITensor *biases,
303 const T *const output_workingspace,
304 const int matrix_stride,
305 T *const output,
306 const int n_batches,
307 const int n_rows,
308 const int n_cols,
309 const int n_channels) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000310
Pablo Tellod6ca4782018-01-23 09:36:04 +0000311 void run(const Window &window, const ThreadInfo &info) override;
312 bool is_parallelisable() const override;
313
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100314 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformOutputKernel
315 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100316 * @param[in] input Source tensor with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F32.
317 * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
318 * @param[out] output Destination tensor with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
319 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100320 *
321 * @return a status
322 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100323 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100324
Pablo Tellod6ca4782018-01-23 09:36:04 +0000325private:
Pablo Tello52140b42018-01-30 14:48:11 +0000326 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000327 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
328 using OutputTransform = typename WinogradBase::template OutputTransform<T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000329
Pablo Tellod6ca4782018-01-23 09:36:04 +0000330 const ITensor *_biases;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000331 const T *_output_workspace;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000332 int _matrix_stride;
333 int _matrix_row_stride;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000334 T *_output;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000335 int _n_batches;
336 int _n_rows;
337 int _n_cols;
338 int _n_channels;
339};
340
Alex Gildayc357c472018-03-21 13:54:09 +0000341/** Interface for the NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000342template <typename T>
343class INEWinogradLayerTransformWeightsKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000344{
345public:
Pablo Tellof6c572c2018-02-14 12:47:30 +0000346 /** Determine how much memory (in units of T) to allocate for the
Pablo Tello52140b42018-01-30 14:48:11 +0000347 * transformed weights.
348 *
349 * @param[in] n_output_channels Number of output feature maps.
350 * @param[in] n_input_channels Number of input feature maps.
Alex Gildayc357c472018-03-21 13:54:09 +0000351 *
352 * @return Storage size (in units of T) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000353 */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000354 virtual unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const = 0;
355 /** Gets the stride between matrices in the kernel worspace
356 *
357 * @param[in] kernel_shape The shape of the weights tensor.
358 *
359 * @return Stride expressed in bytes.
360 */
361 virtual int get_matrix_stride(const KernelShape &kernel_shape) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000362
Pablo Tellof6c572c2018-02-14 12:47:30 +0000363 /** Configure the weights transform kernel.
Pablo Tello52140b42018-01-30 14:48:11 +0000364 *
365 * @param[in] weights_hwio Pointer to the weights tensor
366 * @param[in] output Pointer to working space for the output tensor in the Winograd domain.
367 * @param[in] matrix_stride Stride across matrices in the output workspace.
368 * @param[in] n_output_channels Number of filters.
369 * @param[in] n_input_channels Number of channels in each filter.
370 */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000371 virtual void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) = 0;
372
373 virtual ~INEWinogradLayerTransformWeightsKernel()
374 {
375 }
376};
377
Alex Gildayc357c472018-03-21 13:54:09 +0000378/** NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000379template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
380class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel<T>
381{
382public:
Alex Gildayc357c472018-03-21 13:54:09 +0000383 /** Default constructor. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000384 NEWinogradLayerTransformWeightsKernel();
385 const char *name() const override
386 {
387 return "NEWinogradLayerTransformWeightsKernel";
388 }
Pablo Tello52140b42018-01-30 14:48:11 +0000389
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100390 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
391 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100392 * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
393 * kernel_x must be 3 and equal to kernel_y. Data types supported: F32.
394 * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
395 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100396 *
397 * @return a status
398 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100399 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100400
Pablo Tellod6ca4782018-01-23 09:36:04 +0000401 // Inherited methods overridden:
Pablo Tellof6c572c2018-02-14 12:47:30 +0000402 void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) override;
403 unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const override;
404 int get_matrix_stride(const KernelShape &kernel_shape) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000405 void run(const Window &window, const ThreadInfo &info) override;
406 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +0000407
408private:
409 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000410 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
411 using WeightsTransform = typename WinogradBase::template WeightsTransform<T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000412 std::unique_ptr<WeightsTransform> _transform;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000413};
414
Alex Gildayc357c472018-03-21 13:54:09 +0000415/** Interface for the NEON kernel to perform Winograd. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000416template <typename TIn, typename TOut>
417class INEWinogradLayerBatchedGEMMKernel : public INEKernel
418{
419public:
420 /** Get the number of GEMMs to compute
421 */
422 virtual unsigned int get_number_gemms() const = 0;
423 /** Initialise the kernel
424 *
425 * @param[in] n_gemms Number of GEMMs to compute.
426 * @param[in] M in_shape.n_batches * tile_rows * tile_cols.
427 * @param[in] K Number of channels in the input tensor.
428 * @param[in] N Number of channels in the output tensor.
429 * @param[in] a_matrix_stride Stride between input matrices.
430 * @param[in] a_row_stride Row stride inside input matrix.
431 * @param[in] b_matrix_stride Stride between weights matrices.
432 * @param[in] b_row_stride Row stride inside the weights matrix.
433 * @param[in] c_matrix_stride Stride between output matrices.
434 * @param[in] c_row_stride Row stride inside the output matrix.
435 * @param[out] a_ptr Input workspace.
436 * @param[out] b_ptr Kernel workspace.
437 * @param[out] c_ptr Output workspace.
438 */
439 virtual void configure(
440 const unsigned int n_gemms,
441 const int M, const int K, const int N,
442 const int a_matrix_stride,
443 const int a_row_stride,
444 const int b_matrix_stride,
445 const int b_row_stride,
446 const int c_matrix_stride,
447 const int c_row_stride,
448 const TIn *const a_ptr,
449 const TIn *const b_ptr,
450 TOut *const c_ptr) = 0;
451
452 /** Get the number of tiles per row
453 */
454 virtual int get_output_tile_rows() const = 0;
455 /** Get the number of tiles per columns
456 */
457 virtual int get_output_tile_cols() const = 0;
458 /** Get the number of blocks
459 */
460 virtual int get_number_blocks() const = 0;
461};
462
Alex Gildayc357c472018-03-21 13:54:09 +0000463/** NEON kernel to perform Winograd. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000464template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
465class NEWinogradLayerBatchedGEMMKernel : public INEWinogradLayerBatchedGEMMKernel<TIn, TOut>
Pablo Tello89519332017-11-17 11:52:36 +0000466{
467public:
Alex Gildayc357c472018-03-21 13:54:09 +0000468 /** Winograd base kernel */
Pablo Tello52140b42018-01-30 14:48:11 +0000469 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Alex Gildayc357c472018-03-21 13:54:09 +0000470 /** Winograd convolution kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000471 using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
Alex Gildayc357c472018-03-21 13:54:09 +0000472 /** Winograd batched blocked GEMM operator */
473 using MultiGEMM = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, TIn, TOut>;
Pablo Tello52140b42018-01-30 14:48:11 +0000474
Anthony Barbiere8a49832018-01-18 10:04:05 +0000475 const char *name() const override
476 {
Pablo Tellof6c572c2018-02-14 12:47:30 +0000477 return "NEWinogradLayerBatchedGEMMKernel";
Anthony Barbiere8a49832018-01-18 10:04:05 +0000478 }
Pablo Tello89519332017-11-17 11:52:36 +0000479 /** Constructor */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000480 NEWinogradLayerBatchedGEMMKernel();
Pablo Tello89519332017-11-17 11:52:36 +0000481
482 /** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000483 NEWinogradLayerBatchedGEMMKernel(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello89519332017-11-17 11:52:36 +0000484 /** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000485 NEWinogradLayerBatchedGEMMKernel &operator=(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello89519332017-11-17 11:52:36 +0000486 /** Allow instances of this class to be moved */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000487 NEWinogradLayerBatchedGEMMKernel(NEWinogradLayerBatchedGEMMKernel &&) = default;
Pablo Tello89519332017-11-17 11:52:36 +0000488 /** Allow instances of this class to be moved */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000489 NEWinogradLayerBatchedGEMMKernel &operator=(NEWinogradLayerBatchedGEMMKernel &&) = default;
Alex Gildayc357c472018-03-21 13:54:09 +0000490 /** Default destructor. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000491 ~NEWinogradLayerBatchedGEMMKernel() = default;
492
493 // Inherited methods overridden:
494
495 unsigned int get_number_gemms() const override;
496 int get_output_tile_rows() const override;
497 int get_output_tile_cols() const override;
498 int get_number_blocks() const override;
Pablo Tello89519332017-11-17 11:52:36 +0000499
500 /** Initialise the kernel
501 *
Pablo Tello52140b42018-01-30 14:48:11 +0000502 * @param[in] n_gemms Number of GEMMs to compute.
503 * @param[in] M in_shape.n_batches * tile_rows * tile_cols.
504 * @param[in] K Number of channels in the input tensor.
505 * @param[in] N Number of channels in the output tensor.
506 * @param[in] a_matrix_stride Stride between input matrices.
507 * @param[in] a_row_stride Row stride inside input matrix.
508 * @param[in] b_matrix_stride Stride between weights matrices.
509 * @param[in] b_row_stride Row stride inside the weights matrix.
510 * @param[in] c_matrix_stride Stride between output matrices.
511 * @param[in] c_row_stride Row stride inside the output matrix.
512 * @param[out] a_ptr Input workspace.
513 * @param[out] b_ptr Kernel workspace.
514 * @param[out] c_ptr Output workspace.
Pablo Tello89519332017-11-17 11:52:36 +0000515 */
Pablo Tello52140b42018-01-30 14:48:11 +0000516 void configure(
517 const unsigned int n_gemms,
518 const int M, const int K, const int N,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000519 const int a_matrix_stride,
520 const int a_row_stride,
521 const int b_matrix_stride,
522 const int b_row_stride,
523 const int c_matrix_stride,
524 const int c_row_stride,
525 const TIn *const a_ptr,
526 const TIn *const b_ptr,
527 TOut *const c_ptr) override;
Pablo Tello89519332017-11-17 11:52:36 +0000528
Pablo Tello89519332017-11-17 11:52:36 +0000529 void run(const Window &window, const ThreadInfo &info) override;
530
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100531 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerBatchedGEMMKernel.
532 *
533 * @param[in] a First input tensor (Matrix or Vector A). Data types supported: F32
534 * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a.
535 * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
536 * @param[out] output Output tensor. Data type supported: same as @p a
537 * @param[in] alpha Weight of the matrix product
538 * @param[in] beta Weight of matrix C
539 * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
540 * if the reshape of matrix B should happen only for the first run
541 *
542 * @return a status
543 */
544 static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensor *c, const ITensorInfo *output, const float alpha, const float beta, const GEMMInfo &gemm_info = GEMMInfo());
545
Pablo Tello52140b42018-01-30 14:48:11 +0000546private:
Alex Gildayc357c472018-03-21 13:54:09 +0000547 static const int _output_tile_rows = OutputTileRows;
548 static const int _output_tile_cols = OutputTileCols;
Pablo Tello52140b42018-01-30 14:48:11 +0000549 std::unique_ptr<MultiGEMM> _gemms;
Pablo Tello89519332017-11-17 11:52:36 +0000550};
551
552} // namespace arm_compute
553#endif /*__ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__*/