blob: 9169b75d193f1362de3f9383640af63aeb8c21f3 [file] [log] [blame]
Pablo Tello89519332017-11-17 11:52:36 +00001/*
Pablo Tello9ceebbe2018-01-10 16:44:13 +00002 * Copyright (c) 2017-2018 ARM Limited.
Pablo Tello89519332017-11-17 11:52:36 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
25#define __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
26
27#include "arm_compute/core/NEON/INEKernel.h"
Georgios Pinitas4074c992018-01-30 18:13:46 +000028#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp"
29#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp"
30#include "arm_compute/core/NEON/kernels/convolution/winograd/batched_blocked_gemm.hpp"
31#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp"
Pablo Tello89519332017-11-17 11:52:36 +000032
33namespace arm_compute
34{
35class ITensor;
Pablo Tello02541fb2017-12-15 09:48:59 +000036
Pablo Tellof6c572c2018-02-14 12:47:30 +000037template <typename T>
38class INEWinogradLayerTransformInputKernel : public INEKernel
Pablo Tello3d4968a2017-12-04 15:03:35 +000039{
40public:
Pablo Tello52140b42018-01-30 14:48:11 +000041 /** Determine how much memory (in units of TIn) to allocate for the
42 * transformed input.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000043 *
Pablo Tello52140b42018-01-30 14:48:11 +000044 * @param[in] n_batches Number of batches in the input tensor.
45 * @param[in] n_channels Number of feature maps in the input tensor.
46 * @param[in] n_rows Number of rows in each feature map.
47 * @param[in] n_cols Number of columns in each feature map.
48 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tello6c6e77a2018-01-23 10:03:27 +000049 */
Pablo Tellof6c572c2018-02-14 12:47:30 +000050 virtual unsigned int get_input_storage_size(int n_batches, int n_channels, int n_rows, int n_cols, bool same_padding) const = 0;
51
52 /** Gets the stride between matrices in the input worspace
53 *
54 * @param[in] kernel_shape The shape of the weights tensor.
55 * @param[in] input_shape The shape of the input tensor.
56 * @param[in] padding_type The type of padding to be used.
57 *
58 * @return Stride expressed in bytes.
59 */
60 virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
61
62 /** Configure the output transform kernel.
63 *
64 * @param[in] input Input tensor data
65 * @param[in] n_batches Number of batches in input tensor.
66 * @param[in] n_rows Number of rows in input tensor.
67 * @param[in] n_cols Number of columns in input tensor.
68 * @param[in] n_channels Number of channels in input tensor.
69 * @param[in] padding Padding type.
70 * @param[out] output Base of output matrices.
71 * @param[in] matrix_stride Stride between output matrices.
72 */
73 virtual void configure(const T *const input, const int n_batches, const int n_rows, const int n_cols, const int n_channels, const PaddingType padding, T *const output, const int matrix_stride) = 0;
74
75 virtual ~INEWinogradLayerTransformInputKernel()
76 {
77 }
78};
79
80template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
81class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel<T>
82{
83public:
84 /** Determine how much memory (in units of TIn) to allocate for the
85 * transformed input.
86 *
87 * @param[in] n_batches Number of batches in the input tensor.
88 * @param[in] n_channels Number of feature maps in the input tensor.
89 * @param[in] n_rows Number of rows in each feature map.
90 * @param[in] n_cols Number of columns in each feature map.
91 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
92 */
93 unsigned int get_input_storage_size(
Pablo Tello52140b42018-01-30 14:48:11 +000094 int n_batches,
95 int n_channels,
96 int n_rows,
97 int n_cols,
Pablo Tellof6c572c2018-02-14 12:47:30 +000098 bool same_padding) const override;
99
100 /** Gets the stride between matrices in the input worspace
101 *
102 * @param[in] kernel_shape The shape of the weights tensor.
103 * @param[in] input_shape The shape of the input tensor.
104 * @param[in] padding_type The type of padding to be used.
105 *
106 * @return Stride expressed in bytes.
107 */
108 int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000109
Pablo Tello52140b42018-01-30 14:48:11 +0000110 NEWinogradLayerTransformInputKernel();
Pablo Tellof6c572c2018-02-14 12:47:30 +0000111
Pablo Tellod6ca4782018-01-23 09:36:04 +0000112 const char *name() const override
113 {
114 return "NEWinogradLayerTransformInputKernel";
115 }
Pablo Tello52140b42018-01-30 14:48:11 +0000116
117 /** Configure the output transform kernel.
118 *
119 * @param[in] input Input tensor data
120 * @param[in] n_batches Number of batches in input tensor.
121 * @param[in] n_rows Number of rows in input tensor.
122 * @param[in] n_cols Number of columns in input tensor.
123 * @param[in] n_channels Number of channels in input tensor.
124 * @param[in] padding Padding type.
125 * @param[out] output Base of output matrices.
126 * @param[in] matrix_stride Stride between output matrices.
127 */
128 void configure(
Pablo Tellof6c572c2018-02-14 12:47:30 +0000129 const T *const input,
130 const int n_batches,
131 const int n_rows,
132 const int n_cols,
133 const int n_channels,
134 const PaddingType padding,
135 T *const output,
136 const int matrix_stride) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000137
Pablo Tellod6ca4782018-01-23 09:36:04 +0000138 // Inherited methods overridden:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000139 void run(const Window &window, const ThreadInfo &info) override;
140 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +0000141
Pablo Tellof6c572c2018-02-14 12:47:30 +0000142 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelCols, KernelCols>;
143 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
144
Pablo Tello52140b42018-01-30 14:48:11 +0000145private:
Pablo Tellof6c572c2018-02-14 12:47:30 +0000146 using InputTransform = typename WinogradBase::template InputTransform<T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000147 std::unique_ptr<InputTransform> _transform;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000148};
149
Pablo Tellof6c572c2018-02-14 12:47:30 +0000150template <typename T>
151class INEWinogradLayerTransformOutputKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000152{
153public:
Pablo Tello52140b42018-01-30 14:48:11 +0000154 /** Determine how much memory (in units of TOut) to allocate for the
155 * (Winograd domain) output.
156 *
157 * @param[in] n_batches Number of batches in the output tensor.
158 * @param[in] n_rows Number of rows in each feature map of the input tensor.
159 * @param[in] n_cols Number of columns in each feature map of the input tensor.
160 * @param[in] n_output_channels Number of feature maps in the output tensor.
161 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
162 */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000163 virtual unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000164
Pablo Tellof6c572c2018-02-14 12:47:30 +0000165 /** Gets the stride between matrices in the output worspace
166 *
167 * @param[in] kernel_shape The shape of the weights tensor.
168 * @param[in] input_shape The shape of the input tensor.
169 * @param[in] padding_type The type of padding to be used.
170 *
171 * @return Stride expressed in bytes.
172 */
173 virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
174
175 /** Get the output shape of a convolution.
176 *
177 * @param[in] kernel_shape The shape of the weights tensor.
178 * @param[in] in_shape The shape of the input tensor.
179 * @param[in] padding The type of padding to be used.
180 *
181 * @return Stride expressed in bytes.
182 */
183 virtual Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const = 0;
184
185 /** Configure the output transform kernel.
186 *
187 * @param[in] biases Pointer to the biases tensor.
188 * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
189 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
190 * @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
191 * @param[in] n_batches Number of batches in the input tensor.
192 * @param[in] n_rows Number of rows in output tensor.
193 * @param[in] n_cols Number of columns in output tensor.
194 * @param[in] n_channels Number of feature maps in the output tensor.
195 */
196 virtual void configure(
197 const ITensor *biases,
198 const T *const output_workingspace,
199 const int matrix_stride,
200 T *const output,
201 const int n_batches,
202 const int n_rows,
203 const int n_cols,
204 const int n_channels) = 0;
205
206 virtual ~INEWinogradLayerTransformOutputKernel()
207 {
208 }
209};
210
211template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
212class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel<T>
213{
214public:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000215 const char *name() const override
216 {
217 return "NEWinogradLayerTransformOutputKernel";
218 }
219 /** Constructor */
220 NEWinogradLayerTransformOutputKernel();
221
222 /** Prevent instances of this class from being copied (As this class contains pointers) */
223 NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
224 /** Prevent instances of this class from being copied (As this class contains pointers) */
225 NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
226 /** Allow instances of this class to be moved */
227 NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
228 /** Allow instances of this class to be moved */
229 NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
230
231 ~NEWinogradLayerTransformOutputKernel() = default;
232
Pablo Tellof6c572c2018-02-14 12:47:30 +0000233 // Inherited methods overridden:
234 /** Determine how much memory (in units of TOut) to allocate for the
235 * (Winograd domain) output.
236 *
237 * @param[in] n_batches Number of batches in the output tensor.
238 * @param[in] n_rows Number of rows in each feature map of the input tensor.
239 * @param[in] n_cols Number of columns in each feature map of the input tensor.
240 * @param[in] n_output_channels Number of feature maps in the output tensor.
241 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
242 */
243 unsigned int get_output_storage_size(int n_batches, int n_rows, int n_cols, int n_output_channels, bool same_padding) const override;
244
245 /** Gets the stride between matrices in the output worspace
246 *
247 * @param[in] kernel_shape The shape of the weights tensor.
248 * @param[in] input_shape The shape of the input tensor.
249 * @param[in] padding_type The type of padding to be used.
250 *
251 * @return Stride expressed in bytes.
252 */
253 int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
254 /** Get the output shape of a convolution.
255 *
256 * @param[in] kernel_shape The shape of the weights tensor.
257 * @param[in] in_shape The shape of the input tensor.
258 * @param[in] padding The type of padding to be used.
259 *
260 * @return Stride expressed in bytes.
261 */
262 Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const override;
263
Pablo Tellod6ca4782018-01-23 09:36:04 +0000264 /** Configure the output transform kernel.
265 *
266 * @param[in] biases Pointer to the biases tensor.
267 * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
268 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
269 * @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
270 * @param[in] n_batches Number of batches in the input tensor.
271 * @param[in] n_rows Number of rows in output tensor.
272 * @param[in] n_cols Number of columns in output tensor.
273 * @param[in] n_channels Number of feature maps in the output tensor.
274 */
275 void configure(
Pablo Tellof6c572c2018-02-14 12:47:30 +0000276 const ITensor *biases,
277 const T *const output_workingspace,
278 const int matrix_stride,
279 T *const output,
280 const int n_batches,
281 const int n_rows,
282 const int n_cols,
283 const int n_channels) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000284
Pablo Tellod6ca4782018-01-23 09:36:04 +0000285 void run(const Window &window, const ThreadInfo &info) override;
286 bool is_parallelisable() const override;
287
288private:
Pablo Tello52140b42018-01-30 14:48:11 +0000289 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000290 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
291 using OutputTransform = typename WinogradBase::template OutputTransform<T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000292
Pablo Tellod6ca4782018-01-23 09:36:04 +0000293 const ITensor *_biases;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000294 const T *_output_workspace;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000295 int _matrix_stride;
296 int _matrix_row_stride;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000297 T *_output;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000298 int _n_batches;
299 int _n_rows;
300 int _n_cols;
301 int _n_channels;
302};
303
Pablo Tellof6c572c2018-02-14 12:47:30 +0000304template <typename T>
305class INEWinogradLayerTransformWeightsKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000306{
307public:
Pablo Tellof6c572c2018-02-14 12:47:30 +0000308 /** Determine how much memory (in units of T) to allocate for the
Pablo Tello52140b42018-01-30 14:48:11 +0000309 * transformed weights.
310 *
311 * @param[in] n_output_channels Number of output feature maps.
312 * @param[in] n_input_channels Number of input feature maps.
313 */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000314 virtual unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const = 0;
315 /** Gets the stride between matrices in the kernel worspace
316 *
317 * @param[in] kernel_shape The shape of the weights tensor.
318 *
319 * @return Stride expressed in bytes.
320 */
321 virtual int get_matrix_stride(const KernelShape &kernel_shape) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000322
Pablo Tellof6c572c2018-02-14 12:47:30 +0000323 /** Configure the weights transform kernel.
Pablo Tello52140b42018-01-30 14:48:11 +0000324 *
325 * @param[in] weights_hwio Pointer to the weights tensor
326 * @param[in] output Pointer to working space for the output tensor in the Winograd domain.
327 * @param[in] matrix_stride Stride across matrices in the output workspace.
328 * @param[in] n_output_channels Number of filters.
329 * @param[in] n_input_channels Number of channels in each filter.
330 */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000331 virtual void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) = 0;
332
333 virtual ~INEWinogradLayerTransformWeightsKernel()
334 {
335 }
336};
337
338template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
339class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel<T>
340{
341public:
342 NEWinogradLayerTransformWeightsKernel();
343 const char *name() const override
344 {
345 return "NEWinogradLayerTransformWeightsKernel";
346 }
Pablo Tello52140b42018-01-30 14:48:11 +0000347
Pablo Tellod6ca4782018-01-23 09:36:04 +0000348 // Inherited methods overridden:
Pablo Tellof6c572c2018-02-14 12:47:30 +0000349 void configure(const ITensor *weights_hwio, T *const output, const int matrix_stride, const int n_output_channels, const int n_input_channels) override;
350 unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels) const override;
351 int get_matrix_stride(const KernelShape &kernel_shape) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000352 void run(const Window &window, const ThreadInfo &info) override;
353 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +0000354
355private:
356 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000357 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
358 using WeightsTransform = typename WinogradBase::template WeightsTransform<T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000359 std::unique_ptr<WeightsTransform> _transform;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000360};
361
Pablo Tellof6c572c2018-02-14 12:47:30 +0000362template <typename TIn, typename TOut>
363class INEWinogradLayerBatchedGEMMKernel : public INEKernel
364{
365public:
366 /** Get the number of GEMMs to compute
367 */
368 virtual unsigned int get_number_gemms() const = 0;
369 /** Initialise the kernel
370 *
371 * @param[in] n_gemms Number of GEMMs to compute.
372 * @param[in] M in_shape.n_batches * tile_rows * tile_cols.
373 * @param[in] K Number of channels in the input tensor.
374 * @param[in] N Number of channels in the output tensor.
375 * @param[in] a_matrix_stride Stride between input matrices.
376 * @param[in] a_row_stride Row stride inside input matrix.
377 * @param[in] b_matrix_stride Stride between weights matrices.
378 * @param[in] b_row_stride Row stride inside the weights matrix.
379 * @param[in] c_matrix_stride Stride between output matrices.
380 * @param[in] c_row_stride Row stride inside the output matrix.
381 * @param[out] a_ptr Input workspace.
382 * @param[out] b_ptr Kernel workspace.
383 * @param[out] c_ptr Output workspace.
384 */
385 virtual void configure(
386 const unsigned int n_gemms,
387 const int M, const int K, const int N,
388 const int a_matrix_stride,
389 const int a_row_stride,
390 const int b_matrix_stride,
391 const int b_row_stride,
392 const int c_matrix_stride,
393 const int c_row_stride,
394 const TIn *const a_ptr,
395 const TIn *const b_ptr,
396 TOut *const c_ptr) = 0;
397
398 /** Get the number of tiles per row
399 */
400 virtual int get_output_tile_rows() const = 0;
401 /** Get the number of tiles per columns
402 */
403 virtual int get_output_tile_cols() const = 0;
404 /** Get the number of blocks
405 */
406 virtual int get_number_blocks() const = 0;
407};
408
409template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
410class NEWinogradLayerBatchedGEMMKernel : public INEWinogradLayerBatchedGEMMKernel<TIn, TOut>
Pablo Tello89519332017-11-17 11:52:36 +0000411{
412public:
Pablo Tello52140b42018-01-30 14:48:11 +0000413 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000414 using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
415 using MultiGEMM = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, TIn, TOut>;
Pablo Tello52140b42018-01-30 14:48:11 +0000416
417 static const int _output_tile_rows = OutputTileRows;
418 static const int _output_tile_cols = OutputTileCols;
419
Anthony Barbiere8a49832018-01-18 10:04:05 +0000420 const char *name() const override
421 {
Pablo Tellof6c572c2018-02-14 12:47:30 +0000422 return "NEWinogradLayerBatchedGEMMKernel";
Anthony Barbiere8a49832018-01-18 10:04:05 +0000423 }
Pablo Tello89519332017-11-17 11:52:36 +0000424 /** Constructor */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000425 NEWinogradLayerBatchedGEMMKernel();
Pablo Tello89519332017-11-17 11:52:36 +0000426
427 /** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000428 NEWinogradLayerBatchedGEMMKernel(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello89519332017-11-17 11:52:36 +0000429 /** Prevent instances of this class from being copied (As this class contains pointers) */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000430 NEWinogradLayerBatchedGEMMKernel &operator=(const NEWinogradLayerBatchedGEMMKernel &) = delete;
Pablo Tello89519332017-11-17 11:52:36 +0000431 /** Allow instances of this class to be moved */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000432 NEWinogradLayerBatchedGEMMKernel(NEWinogradLayerBatchedGEMMKernel &&) = default;
Pablo Tello89519332017-11-17 11:52:36 +0000433 /** Allow instances of this class to be moved */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000434 NEWinogradLayerBatchedGEMMKernel &operator=(NEWinogradLayerBatchedGEMMKernel &&) = default;
Pablo Tello89519332017-11-17 11:52:36 +0000435
Pablo Tellof6c572c2018-02-14 12:47:30 +0000436 ~NEWinogradLayerBatchedGEMMKernel() = default;
437
438 // Inherited methods overridden:
439
440 unsigned int get_number_gemms() const override;
441 int get_output_tile_rows() const override;
442 int get_output_tile_cols() const override;
443 int get_number_blocks() const override;
Pablo Tello89519332017-11-17 11:52:36 +0000444
445 /** Initialise the kernel
446 *
Pablo Tello52140b42018-01-30 14:48:11 +0000447 * @param[in] n_gemms Number of GEMMs to compute.
448 * @param[in] M in_shape.n_batches * tile_rows * tile_cols.
449 * @param[in] K Number of channels in the input tensor.
450 * @param[in] N Number of channels in the output tensor.
451 * @param[in] a_matrix_stride Stride between input matrices.
452 * @param[in] a_row_stride Row stride inside input matrix.
453 * @param[in] b_matrix_stride Stride between weights matrices.
454 * @param[in] b_row_stride Row stride inside the weights matrix.
455 * @param[in] c_matrix_stride Stride between output matrices.
456 * @param[in] c_row_stride Row stride inside the output matrix.
457 * @param[out] a_ptr Input workspace.
458 * @param[out] b_ptr Kernel workspace.
459 * @param[out] c_ptr Output workspace.
Pablo Tello89519332017-11-17 11:52:36 +0000460 */
Pablo Tello52140b42018-01-30 14:48:11 +0000461 void configure(
462 const unsigned int n_gemms,
463 const int M, const int K, const int N,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000464 const int a_matrix_stride,
465 const int a_row_stride,
466 const int b_matrix_stride,
467 const int b_row_stride,
468 const int c_matrix_stride,
469 const int c_row_stride,
470 const TIn *const a_ptr,
471 const TIn *const b_ptr,
472 TOut *const c_ptr) override;
Pablo Tello89519332017-11-17 11:52:36 +0000473
Pablo Tello89519332017-11-17 11:52:36 +0000474 void run(const Window &window, const ThreadInfo &info) override;
475
Pablo Tello52140b42018-01-30 14:48:11 +0000476private:
477 std::unique_ptr<MultiGEMM> _gemms;
Pablo Tello89519332017-11-17 11:52:36 +0000478};
479
480} // namespace arm_compute
481#endif /*__ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__*/