blob: 2f44d19b4fb1f57ac9031ff3762c560988b4b7cb [file] [log] [blame]
Pablo Tello89519332017-11-17 11:52:36 +00001/*
Pablo Tello9ceebbe2018-01-10 16:44:13 +00002 * Copyright (c) 2017-2018 ARM Limited.
Pablo Tello89519332017-11-17 11:52:36 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
25#define __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
26
27#include "arm_compute/core/NEON/INEKernel.h"
Georgios Pinitas4074c992018-01-30 18:13:46 +000028#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp"
29#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp"
30#include "arm_compute/core/NEON/kernels/convolution/winograd/batched_blocked_gemm.hpp"
31#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp"
Pablo Tello89519332017-11-17 11:52:36 +000032
33namespace arm_compute
34{
35class ITensor;
Pablo Tello02541fb2017-12-15 09:48:59 +000036
Alex Gildayc357c472018-03-21 13:54:09 +000037/** Interface for the NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000038template <typename T>
39class INEWinogradLayerTransformInputKernel : public INEKernel
Pablo Tello3d4968a2017-12-04 15:03:35 +000040{
41public:
Pablo Tello52140b42018-01-30 14:48:11 +000042 /** Determine how much memory (in units of TIn) to allocate for the
43 * transformed input.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000044 *
Pablo Tello52140b42018-01-30 14:48:11 +000045 * @param[in] n_batches Number of batches in the input tensor.
46 * @param[in] n_channels Number of feature maps in the input tensor.
47 * @param[in] n_rows Number of rows in each feature map.
48 * @param[in] n_cols Number of columns in each feature map.
49 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +000050 *
51 * @return Storage size (in units of TIn) required.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000052 */
Pablo Tellof6c572c2018-02-14 12:47:30 +000053 virtual unsigned int get_input_storage_size(int n_batches, int n_channels, int n_rows, int n_cols, bool same_padding) const = 0;
54
55 /** Gets the stride between matrices in the input worspace
56 *
57 * @param[in] kernel_shape The shape of the weights tensor.
58 * @param[in] input_shape The shape of the input tensor.
59 * @param[in] padding_type The type of padding to be used.
60 *
61 * @return Stride expressed in bytes.
62 */
63 virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
64
65 /** Configure the output transform kernel.
66 *
67 * @param[in] input Input tensor data
68 * @param[in] n_batches Number of batches in input tensor.
69 * @param[in] n_rows Number of rows in input tensor.
70 * @param[in] n_cols Number of columns in input tensor.
71 * @param[in] n_channels Number of channels in input tensor.
72 * @param[in] padding Padding type.
73 * @param[out] output Base of output matrices.
74 * @param[in] matrix_stride Stride between output matrices.
75 */
76 virtual void configure(const T *const input, const int n_batches, const int n_rows, const int n_cols, const int n_channels, const PaddingType padding, T *const output, const int matrix_stride) = 0;
77
Alex Gildayc357c472018-03-21 13:54:09 +000078 /** Destructor */
Pablo Tellof6c572c2018-02-14 12:47:30 +000079 virtual ~INEWinogradLayerTransformInputKernel()
80 {
81 }
82};
83
Alex Gildayc357c472018-03-21 13:54:09 +000084/** NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000085template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
86class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel<T>
87{
88public:
89 /** Determine how much memory (in units of TIn) to allocate for the
90 * transformed input.
91 *
92 * @param[in] n_batches Number of batches in the input tensor.
93 * @param[in] n_channels Number of feature maps in the input tensor.
94 * @param[in] n_rows Number of rows in each feature map.
95 * @param[in] n_cols Number of columns in each feature map.
96 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +000097 *
98 * @return Storage size (in units of TIn) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +000099 */
100 unsigned int get_input_storage_size(
Pablo Tello52140b42018-01-30 14:48:11 +0000101 int n_batches,
102 int n_channels,
103 int n_rows,
104 int n_cols,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000105 bool same_padding) const override;
106
107 /** Gets the stride between matrices in the input worspace
108 *
109 * @param[in] kernel_shape The shape of the weights tensor.
110 * @param[in] input_shape The shape of the input tensor.
111 * @param[in] padding_type The type of padding to be used.
112 *
113 * @return Stride expressed in bytes.
114 */
115 int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000116
Alex Gildayc357c472018-03-21 13:54:09 +0000117 /** Default constructor */
Pablo Tello52140b42018-01-30 14:48:11 +0000118 NEWinogradLayerTransformInputKernel();
Pablo Tellof6c572c2018-02-14 12:47:30 +0000119
Pablo Tellod6ca4782018-01-23 09:36:04 +0000120 const char *name() const override
121 {
122 return "NEWinogradLayerTransformInputKernel";
123 }
Pablo Tello52140b42018-01-30 14:48:11 +0000124
125 /** Configure the output transform kernel.
126 *
127 * @param[in] input Input tensor data
128 * @param[in] n_batches Number of batches in input tensor.
129 * @param[in] n_rows Number of rows in input tensor.
130 * @param[in] n_cols Number of columns in input tensor.
131 * @param[in] n_channels Number of channels in input tensor.
132 * @param[in] padding Padding type.
133 * @param[out] output Base of output matrices.
134 * @param[in] matrix_stride Stride between output matrices.
135 */
136 void configure(
Pablo Tellof6c572c2018-02-14 12:47:30 +0000137 const T *const input,
138 const int n_batches,
139 const int n_rows,
140 const int n_cols,
141 const int n_channels,
142 const PaddingType padding,
143 T *const output,
144 const int matrix_stride) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000145
Pablo Tellod6ca4782018-01-23 09:36:04 +0000146 // Inherited methods overridden:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000147 void run(const Window &window, const ThreadInfo &info) override;
148 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +0000149
Alex Gildayc357c472018-03-21 13:54:09 +0000150 /** Winograd base kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000151 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelCols, KernelCols>;
Alex Gildayc357c472018-03-21 13:54:09 +0000152 /** Winograd convolution kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000153 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
154
Pablo Tello52140b42018-01-30 14:48:11 +0000155private:
Pablo Tellof6c572c2018-02-14 12:47:30 +0000156 using InputTransform = typename WinogradBase::template InputTransform<T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000157 std::unique_ptr<InputTransform> _transform;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000158};
159
Alex Gildayc357c472018-03-21 13:54:09 +0000160/** Interface for the NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000161template <typename T>
162class INEWinogradLayerTransformOutputKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000163{
164public:
Pablo Tello52140b42018-01-30 14:48:11 +0000165 /** Determine how much memory (in units of TOut) to allocate for the
166 * (Winograd domain) output.
167 *
168 * @param[in] n_batches Number of batches in the output tensor.
169 * @param[in] n_rows Number of rows in each feature map of the input tensor.
170 * @param[in] n_cols Number of columns in each feature map of the input tensor.
171 * @param[in] n_output_channels Number of feature maps in the output tensor.
172 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000173 *
174 * @return Storage size (in units of TOut) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000175 */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000176 virtual unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000177
Pablo Tellof6c572c2018-02-14 12:47:30 +0000178 /** Gets the stride between matrices in the output worspace
179 *
180 * @param[in] kernel_shape The shape of the weights tensor.
181 * @param[in] input_shape The shape of the input tensor.
182 * @param[in] padding_type The type of padding to be used.
183 *
184 * @return Stride expressed in bytes.
185 */
186 virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
187
188 /** Get the output shape of a convolution.
189 *
190 * @param[in] kernel_shape The shape of the weights tensor.
191 * @param[in] in_shape The shape of the input tensor.
192 * @param[in] padding The type of padding to be used.
193 *
194 * @return Stride expressed in bytes.
195 */
196 virtual Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const = 0;
197
198 /** Configure the output transform kernel.
199 *
200 * @param[in] biases Pointer to the biases tensor.
201 * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
202 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
203 * @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
204 * @param[in] n_batches Number of batches in the input tensor.
205 * @param[in] n_rows Number of rows in output tensor.
206 * @param[in] n_cols Number of columns in output tensor.
207 * @param[in] n_channels Number of feature maps in the output tensor.
208 */
209 virtual void configure(
210 const ITensor *biases,
211 const T *const output_workingspace,
212 const int matrix_stride,
213 T *const output,
214 const int n_batches,
215 const int n_rows,
216 const int n_cols,
217 const int n_channels) = 0;
218
219 virtual ~INEWinogradLayerTransformOutputKernel()
220 {
221 }
222};
223
Alex Gildayc357c472018-03-21 13:54:09 +0000224/** NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000225template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
226class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel<T>
227{
228public:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000229 const char *name() const override
230 {
231 return "NEWinogradLayerTransformOutputKernel";
232 }
233 /** Constructor */
234 NEWinogradLayerTransformOutputKernel();
235
236 /** Prevent instances of this class from being copied (As this class contains pointers) */
237 NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
238 /** Prevent instances of this class from being copied (As this class contains pointers) */
239 NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
240 /** Allow instances of this class to be moved */
241 NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
242 /** Allow instances of this class to be moved */
243 NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
Alex Gildayc357c472018-03-21 13:54:09 +0000244 /** Default destructor */
Pablo Tellod6ca4782018-01-23 09:36:04 +0000245 ~NEWinogradLayerTransformOutputKernel() = default;
246
Pablo Tellof6c572c2018-02-14 12:47:30 +0000247 // Inherited methods overridden:
248 /** Determine how much memory (in units of TOut) to allocate for the
249 * (Winograd domain) output.
250 *
251 * @param[in] n_batches Number of batches in the output tensor.
252 * @param[in] n_rows Number of rows in each feature map of the input tensor.
253 * @param[in] n_cols Number of columns in each feature map of the input tensor.
254 * @param[in] n_output_channels Number of feature maps in the output tensor.
255 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000256 *
257 * @return Storage size (in units of TOut) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000258 */
259 unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const override;
260
261 /** Gets the stride between matrices in the output worspace
262 *
263 * @param[in] kernel_shape The shape of the weights tensor.
264 * @param[in] input_shape The shape of the input tensor.
265 * @param[in] padding_type The type of padding to be used.
266 *
267 * @return Stride expressed in bytes.
268 */
269 int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
270 /** Get the output shape of a convolution.
271 *
272 * @param[in] kernel_shape The shape of the weights tensor.
273 * @param[in] in_shape The shape of the input tensor.
274 * @param[in] padding The type of padding to be used.
275 *
276 * @return Stride expressed in bytes.
277 */
278 Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const override;
279
Pablo Tellod6ca4782018-01-23 09:36:04 +0000280 /** Configure the output transform kernel.
281 *
282 * @param[in] biases Pointer to the biases tensor.
283 * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
284 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
285 * @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
286 * @param[in] n_batches Number of batches in the input tensor.
287 * @param[in] n_rows Number of rows in output tensor.
288 * @param[in] n_cols Number of columns in output tensor.
289 * @param[in] n_channels Number of feature maps in the output tensor.
290 */
291 void configure(
Pablo Tellof6c572c2018-02-14 12:47:30 +0000292 const ITensor *biases,
293 const T *const output_workingspace,
294 const int matrix_stride,
295 T *const output,
296 const int n_batches,
297 const int n_rows,
298 const int n_cols,
299 const int n_channels) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000300
Pablo Tellod6ca4782018-01-23 09:36:04 +0000301 void run(const Window &window, const ThreadInfo &info) override;
302 bool is_parallelisable() const override;
303
304private:
Pablo Tello52140b42018-01-30 14:48:11 +0000305 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000306 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
307 using OutputTransform = typename WinogradBase::template OutputTransform<T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000308
Pablo Tellod6ca4782018-01-23 09:36:04 +0000309 const ITensor *_biases;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000310 const T *_output_workspace;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000311 int _matrix_stride;
312 int _matrix_row_stride;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000313 T *_output;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000314 int _n_batches;
315 int _n_rows;
316 int _n_cols;
317 int _n_channels;
318};
319
Alex Gildayc357c472018-03-21 13:54:09 +0000320/** Interface for the NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000321template <typename T>
322class INEWinogradLayerTransformWeightsKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000323{
324public:
Pablo Tellof6c572c2018-02-14 12:47:30 +0000325 /** Determine how much memory (in units of T) to allocate for the
Pablo Tello52140b42018-01-30 14:48:11 +0000326 * transformed weights.
327 *
328 * @param[in] n_output_channels Number of output feature maps.
329 * @param[in] n_input_channels Number of input feature maps.
Alex Gildayc357c472018-03-21 13:54:09 +0000330 *
331 * @return Storage size (in units of T) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000332 */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000333 virtual unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const = 0;
334 /** Gets the stride between matrices in the kernel worspace
335 *
336 * @param[in] kernel_shape The shape of the weights tensor.
337 *
338 * @return Stride expressed in bytes.
339 */
340 virtual int get_matrix_stride(const KernelShape &kernel_shape) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000341
Pablo Tellof6c572c2018-02-14 12:47:30 +0000342 /** Configure the weights transform kernel.
Pablo Tello52140b42018-01-30 14:48:11 +0000343 *
344 * @param[in] weights_hwio Pointer to the weights tensor
345 * @param[in] output Pointer to working space for the output tensor in the Winograd domain.
346 * @param[in] matrix_stride Stride across matrices in the output workspace.
347 * @param[in] n_output_channels Number of filters.
348 * @param[in] n_input_channels Number of channels in each filter.
349 */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000350 virtual void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) = 0;
351
352 virtual ~INEWinogradLayerTransformWeightsKernel()
353 {
354 }
355};
356
Alex Gildayc357c472018-03-21 13:54:09 +0000357/** NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000358template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
359class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel<T>
360{
361public:
Alex Gildayc357c472018-03-21 13:54:09 +0000362 /** Default constructor. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000363 NEWinogradLayerTransformWeightsKernel();
364 const char *name() const override
365 {
366 return "NEWinogradLayerTransformWeightsKernel";
367 }
Pablo Tello52140b42018-01-30 14:48:11 +0000368
Pablo Tellod6ca4782018-01-23 09:36:04 +0000369 // Inherited methods overridden:
Pablo Tellof6c572c2018-02-14 12:47:30 +0000370 void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) override;
371 unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const override;
372 int get_matrix_stride(const KernelShape &kernel_shape) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000373 void run(const Window &window, const ThreadInfo &info) override;
374 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +0000375
376private:
377 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000378 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
379 using WeightsTransform = typename WinogradBase::template WeightsTransform<T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000380 std::unique_ptr<WeightsTransform> _transform;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000381};
382
Alex Gildayc357c472018-03-21 13:54:09 +0000383/** Interface for the NEON kernel to perform Winograd. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000384template <typename TIn, typename TOut>
385class INEWinogradLayerBatchedGEMMKernel : public INEKernel
386{
387public:
388 /** Get the number of GEMMs to compute
389 */
390 virtual unsigned int get_number_gemms() const = 0;
391 /** Initialise the kernel
392 *
393 * @param[in] n_gemms Number of GEMMs to compute.
394 * @param[in] M in_shape.n_batches * tile_rows * tile_cols.
395 * @param[in] K Number of channels in the input tensor.
396 * @param[in] N Number of channels in the output tensor.
397 * @param[in] a_matrix_stride Stride between input matrices.
398 * @param[in] a_row_stride Row stride inside input matrix.
399 * @param[in] b_matrix_stride Stride between weights matrices.
400 * @param[in] b_row_stride Row stride inside the weights matrix.
401 * @param[in] c_matrix_stride Stride between output matrices.
402 * @param[in] c_row_stride Row stride inside the output matrix.
403 * @param[out] a_ptr Input workspace.
404 * @param[out] b_ptr Kernel workspace.
405 * @param[out] c_ptr Output workspace.
406 */
407 virtual void configure(
408 const unsigned int n_gemms,
409 const int M, const int K, const int N,
410 const int a_matrix_stride,
411 const int a_row_stride,
412 const int b_matrix_stride,
413 const int b_row_stride,
414 const int c_matrix_stride,
415 const int c_row_stride,
416 const TIn *const a_ptr,
417 const TIn *const b_ptr,
418 TOut *const c_ptr) = 0;
419
420 /** Get the number of tiles per row
421 */
422 virtual int get_output_tile_rows() const = 0;
423 /** Get the number of tiles per columns
424 */
425 virtual int get_output_tile_cols() const = 0;
426 /** Get the number of blocks
427 */
428 virtual int get_number_blocks() const = 0;
429};
430
Alex Gildayc357c472018-03-21 13:54:09 +0000431/** NEON kernel to perform Winograd. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000432template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
433class NEWinogradLayerBatchedGEMMKernel : public INEWinogradLayerBatchedGEMMKernel<TIn, TOut>
Pablo Tello89519332017-11-17 11:52:36 +0000434{
435public:
Alex Gildayc357c472018-03-21 13:54:09 +0000436 /** Winograd base kernel */
Pablo Tello52140b42018-01-30 14:48:11 +0000437 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Alex Gildayc357c472018-03-21 13:54:09 +0000438 /** Winograd convolution kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000439 using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
Alex Gildayc357c472018-03-21 13:54:09 +0000440 /** Winograd batched blocked GEMM operator */
441 using MultiGEMM = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, TIn, TOut>;
Pablo Tello52140b42018-01-30 14:48:11 +0000442
Anthony Barbiere8a49832018-01-18 10:04:05 +0000443 const char *name() const override
444 {
Pablo Tellof6c572c2018-02-14 12:47:30 +0000445 return "NEWinogradLayerBatchedGEMMKernel";
Anthony Barbiere8a49832018-01-18 10:04:05 +0000446 }
Pablo Tello89519332017-11-17 11:52:36 +0000447 /** Constructor */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000448 NEWinogradLayerBatchedGEMMKernel();
Pablo Tello89519332017-11-17 11:52:36 +0000449
450 /** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000451 NEWinogradLayerBatchedGEMMKernel(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello89519332017-11-17 11:52:36 +0000452 /** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000453 NEWinogradLayerBatchedGEMMKernel &operator=(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello89519332017-11-17 11:52:36 +0000454 /** Allow instances of this class to be moved */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000455 NEWinogradLayerBatchedGEMMKernel(NEWinogradLayerBatchedGEMMKernel &&) = default;
Pablo Tello89519332017-11-17 11:52:36 +0000456 /** Allow instances of this class to be moved */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000457 NEWinogradLayerBatchedGEMMKernel &operator=(NEWinogradLayerBatchedGEMMKernel &&) = default;
Alex Gildayc357c472018-03-21 13:54:09 +0000458 /** Default destructor. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000459 ~NEWinogradLayerBatchedGEMMKernel() = default;
460
461 // Inherited methods overridden:
462
463 unsigned int get_number_gemms() const override;
464 int get_output_tile_rows() const override;
465 int get_output_tile_cols() const override;
466 int get_number_blocks() const override;
Pablo Tello89519332017-11-17 11:52:36 +0000467
468 /** Initialise the kernel
469 *
Pablo Tello52140b42018-01-30 14:48:11 +0000470 * @param[in] n_gemms Number of GEMMs to compute.
471 * @param[in] M in_shape.n_batches * tile_rows * tile_cols.
472 * @param[in] K Number of channels in the input tensor.
473 * @param[in] N Number of channels in the output tensor.
474 * @param[in] a_matrix_stride Stride between input matrices.
475 * @param[in] a_row_stride Row stride inside input matrix.
476 * @param[in] b_matrix_stride Stride between weights matrices.
477 * @param[in] b_row_stride Row stride inside the weights matrix.
478 * @param[in] c_matrix_stride Stride between output matrices.
479 * @param[in] c_row_stride Row stride inside the output matrix.
480 * @param[out] a_ptr Input workspace.
481 * @param[out] b_ptr Kernel workspace.
482 * @param[out] c_ptr Output workspace.
Pablo Tello89519332017-11-17 11:52:36 +0000483 */
Pablo Tello52140b42018-01-30 14:48:11 +0000484 void configure(
485 const unsigned int n_gemms,
486 const int M, const int K, const int N,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000487 const int a_matrix_stride,
488 const int a_row_stride,
489 const int b_matrix_stride,
490 const int b_row_stride,
491 const int c_matrix_stride,
492 const int c_row_stride,
493 const TIn *const a_ptr,
494 const TIn *const b_ptr,
495 TOut *const c_ptr) override;
Pablo Tello89519332017-11-17 11:52:36 +0000496
Pablo Tello89519332017-11-17 11:52:36 +0000497 void run(const Window &window, const ThreadInfo &info) override;
498
Pablo Tello52140b42018-01-30 14:48:11 +0000499private:
Alex Gildayc357c472018-03-21 13:54:09 +0000500 static const int _output_tile_rows = OutputTileRows;
501 static const int _output_tile_cols = OutputTileCols;
Pablo Tello52140b42018-01-30 14:48:11 +0000502 std::unique_ptr<MultiGEMM> _gemms;
Pablo Tello89519332017-11-17 11:52:36 +0000503};
504
505} // namespace arm_compute
506#endif /*__ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__*/