blob: 96580053dde1e0ea791e230b4f365de92d73685f [file] [log] [blame]
Pablo Tello89519332017-11-17 11:52:36 +00001/*
Pablo Tello9ceebbe2018-01-10 16:44:13 +00002 * Copyright (c) 2017-2018 ARM Limited.
Pablo Tello89519332017-11-17 11:52:36 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Georgios Pinitas9fb11592018-04-26 20:34:58 +010024#ifndef __ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H__
25#define __ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H__
Pablo Tello89519332017-11-17 11:52:36 +000026
27#include "arm_compute/core/NEON/INEKernel.h"
Georgios Pinitas4074c992018-01-30 18:13:46 +000028#include "arm_compute/core/NEON/kernels/convolution/common/convolution.hpp"
29#include "arm_compute/core/NEON/kernels/convolution/common/tensor.hpp"
30#include "arm_compute/core/NEON/kernels/convolution/winograd/batched_blocked_gemm.hpp"
31#include "arm_compute/core/NEON/kernels/convolution/winograd/winograd_gemm.hpp"
Pablo Tello89519332017-11-17 11:52:36 +000032
33namespace arm_compute
34{
35class ITensor;
Pablo Tello02541fb2017-12-15 09:48:59 +000036
Alex Gildayc357c472018-03-21 13:54:09 +000037/** Interface for the NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000038template <typename T>
39class INEWinogradLayerTransformInputKernel : public INEKernel
Pablo Tello3d4968a2017-12-04 15:03:35 +000040{
41public:
Pablo Tello52140b42018-01-30 14:48:11 +000042 /** Determine how much memory (in units of TIn) to allocate for the
43 * transformed input.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000044 *
Pablo Tello7df27862018-05-30 11:44:26 +010045 * @param[in] num_batches Number of batches in the input tensor.
46 * @param[in] num_channels Number of feature maps in the input tensor.
47 * @param[in] num_rows Number of rows in each feature map.
48 * @param[in] num_cols Number of columns in each feature map.
Pablo Tello52140b42018-01-30 14:48:11 +000049 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +000050 *
51 * @return Storage size (in units of TIn) required.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000052 */
Pablo Tello7df27862018-05-30 11:44:26 +010053 virtual unsigned int get_input_storage_size(int num_batches, int num_channels, int num_rows, int num_cols, bool same_padding) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000054
55 /** Gets the stride between matrices in the input worspace
56 *
57 * @param[in] kernel_shape The shape of the weights tensor.
58 * @param[in] input_shape The shape of the input tensor.
59 * @param[in] padding_type The type of padding to be used.
60 *
61 * @return Stride expressed in bytes.
62 */
63 virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
64
65 /** Configure the output transform kernel.
66 *
Pablo Tello7df27862018-05-30 11:44:26 +010067 * @param[in] input_nhwc Input tensor in NHWC data layout format.
68 * @param[in] num_batches Number of batches in input tensor.
69 * @param[in] num_rows Number of rows in input tensor.
70 * @param[in] num_cols Number of columns in input tensor.
71 * @param[in] num_channels Number of channels in input tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +000072 * @param[in] padding Padding type.
73 * @param[out] output Base of output matrices.
74 * @param[in] matrix_stride Stride between output matrices.
75 */
Pablo Tello7df27862018-05-30 11:44:26 +010076 virtual void configure(const ITensor *input_nhwc, const int num_batches, const int num_rows, const int num_cols, const int num_channels,
Anthony Barbiere1553372018-07-16 18:53:52 +010077 const PaddingType padding, ITensor *output, const int matrix_stride) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +000078
Alex Gildayc357c472018-03-21 13:54:09 +000079 /** Destructor */
Pablo Tellof6c572c2018-02-14 12:47:30 +000080 virtual ~INEWinogradLayerTransformInputKernel()
81 {
82 }
83};
84
Alex Gildayc357c472018-03-21 13:54:09 +000085/** NEON kernel to perform Winograd input transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +000086template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
87class NEWinogradLayerTransformInputKernel : public INEWinogradLayerTransformInputKernel<T>
88{
89public:
Pablo Tello7df27862018-05-30 11:44:26 +010090 /** Prevent instances of this class from being copied (As this class contains pointers) */
91 NEWinogradLayerTransformInputKernel(const NEWinogradLayerTransformInputKernel &) = delete;
92 /** Prevent instances of this class from being copied (As this class contains pointers) */
93 NEWinogradLayerTransformInputKernel &operator=(const NEWinogradLayerTransformInputKernel &) = delete;
94 /** Allow instances of this class to be moved */
95 NEWinogradLayerTransformInputKernel(NEWinogradLayerTransformInputKernel &&) = default;
96 /** Allow instances of this class to be moved */
97 NEWinogradLayerTransformInputKernel &operator=(NEWinogradLayerTransformInputKernel &&) = default;
98 /** Default destructor */
99 ~NEWinogradLayerTransformInputKernel() = default;
100
Pablo Tellof6c572c2018-02-14 12:47:30 +0000101 /** Determine how much memory (in units of TIn) to allocate for the
102 * transformed input.
103 *
Pablo Tello7df27862018-05-30 11:44:26 +0100104 * @param[in] num_batches Number of batches in the input tensor.
105 * @param[in] num_channels Number of feature maps in the input tensor.
106 * @param[in] num_rows Number of rows in each feature map.
107 * @param[in] num_cols Number of columns in each feature map.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000108 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000109 *
110 * @return Storage size (in units of TIn) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000111 */
112 unsigned int get_input_storage_size(
Pablo Tello7df27862018-05-30 11:44:26 +0100113 int num_batches,
114 int num_channels,
115 int num_rows,
116 int num_cols,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000117 bool same_padding) const override;
118
119 /** Gets the stride between matrices in the input worspace
120 *
121 * @param[in] kernel_shape The shape of the weights tensor.
122 * @param[in] input_shape The shape of the input tensor.
123 * @param[in] padding_type The type of padding to be used.
124 *
125 * @return Stride expressed in bytes.
126 */
127 int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000128
Alex Gildayc357c472018-03-21 13:54:09 +0000129 /** Default constructor */
Pablo Tello52140b42018-01-30 14:48:11 +0000130 NEWinogradLayerTransformInputKernel();
Pablo Tellof6c572c2018-02-14 12:47:30 +0000131
Pablo Tellod6ca4782018-01-23 09:36:04 +0000132 const char *name() const override
133 {
134 return "NEWinogradLayerTransformInputKernel";
135 }
Pablo Tello52140b42018-01-30 14:48:11 +0000136
137 /** Configure the output transform kernel.
138 *
Pablo Tello7df27862018-05-30 11:44:26 +0100139 * @param[in] input_nhwc Input tensor. Data types supported: F32. Layout supported NHWC.
140 * @param[in] num_batches Number of batches in input tensor.
141 * @param[in] num_rows Number of rows in input tensor.
142 * @param[in] num_cols Number of columns in input tensor.
143 * @param[in] num_channels Number of channels in input tensor.
Pablo Tello52140b42018-01-30 14:48:11 +0000144 * @param[in] padding Padding type.
145 * @param[out] output Base of output matrices.
146 * @param[in] matrix_stride Stride between output matrices.
147 */
148 void configure(
Pablo Tello7df27862018-05-30 11:44:26 +0100149 const ITensor *input_nhwc,
150 const int num_batches,
151 const int num_rows,
152 const int num_cols,
153 const int num_channels,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000154 const PaddingType padding,
Anthony Barbiere1553372018-07-16 18:53:52 +0100155 ITensor *output,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000156 const int matrix_stride) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000157
Pablo Tellod6ca4782018-01-23 09:36:04 +0000158 // Inherited methods overridden:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000159 void run(const Window &window, const ThreadInfo &info) override;
Pablo Tello52140b42018-01-30 14:48:11 +0000160
Alex Gildayc357c472018-03-21 13:54:09 +0000161 /** Winograd base kernel */
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100162 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Alex Gildayc357c472018-03-21 13:54:09 +0000163 /** Winograd convolution kernel */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000164 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
165
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100166 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformInputKernel
167 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100168 * @param[in] input First tensor input info. Data types supported: F32.
169 * @param[in] output Output tensor info. Data types supported: same as @p input.
170 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100171 *
172 * @return a status
173 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100174 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100175
Pablo Tello52140b42018-01-30 14:48:11 +0000176private:
Pablo Tellof6c572c2018-02-14 12:47:30 +0000177 using InputTransform = typename WinogradBase::template InputTransform<T>;
Pablo Tello7df27862018-05-30 11:44:26 +0100178 const ITensor *_input_nhwc;
179 int _num_batches; /**< Number of batches in input tensor. */
180 int _num_rows; /**< Number of rows in input tensor. */
181 int _num_cols; /**< Number of columns in input tensor. */
182 int _num_channels; /**< Number of channels in input tensor. */
183 PaddingType _padding; /**< Padding type. */
Anthony Barbiere1553372018-07-16 18:53:52 +0100184 ITensor *_output; /**< Base of output matrices. */
Pablo Tello7df27862018-05-30 11:44:26 +0100185 int _matrix_stride; /**< Stride between output matrices. */
Pablo Tellod6ca4782018-01-23 09:36:04 +0000186};
187
Alex Gildayc357c472018-03-21 13:54:09 +0000188/** Interface for the NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000189template <typename T>
190class INEWinogradLayerTransformOutputKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000191{
192public:
Pablo Tello52140b42018-01-30 14:48:11 +0000193 /** Determine how much memory (in units of TOut) to allocate for the
194 * (Winograd domain) output.
195 *
Pablo Tello7df27862018-05-30 11:44:26 +0100196 * @param[in] num_batches Number of batches in the output tensor.
197 * @param[in] num_rows Number of rows in each feature map of the input tensor.
198 * @param[in] num_cols Number of columns in each feature map of the input tensor.
199 * @param[in] num_output_channels Number of feature maps in the output tensor.
200 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000201 *
202 * @return Storage size (in units of TOut) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000203 */
Pablo Tello7df27862018-05-30 11:44:26 +0100204 virtual unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels, bool same_padding) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000205
Pablo Tellof6c572c2018-02-14 12:47:30 +0000206 /** Gets the stride between matrices in the output worspace
207 *
208 * @param[in] kernel_shape The shape of the weights tensor.
209 * @param[in] input_shape The shape of the input tensor.
210 * @param[in] padding_type The type of padding to be used.
211 *
212 * @return Stride expressed in bytes.
213 */
214 virtual int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const = 0;
215
216 /** Get the output shape of a convolution.
217 *
218 * @param[in] kernel_shape The shape of the weights tensor.
219 * @param[in] in_shape The shape of the input tensor.
220 * @param[in] padding The type of padding to be used.
221 *
222 * @return Stride expressed in bytes.
223 */
224 virtual Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const = 0;
225
226 /** Configure the output transform kernel.
227 *
228 * @param[in] biases Pointer to the biases tensor.
229 * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
230 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
Pablo Tello7df27862018-05-30 11:44:26 +0100231 * @param[out] output_nhwc Pointer to a tensor in NHWC data layout ordered output tensor, in the spatial domain.
232 * @param[in] num_batches Number of batches in the input tensor.
233 * @param[in] num_rows Number of rows in output tensor.
234 * @param[in] num_cols Number of columns in output tensor.
235 * @param[in] num_channels Number of feature maps in the output tensor.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000236 */
237 virtual void configure(
238 const ITensor *biases,
Anthony Barbiere1553372018-07-16 18:53:52 +0100239 const ITensor *output_workingspace,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000240 const int matrix_stride,
Anthony Barbiere1553372018-07-16 18:53:52 +0100241 ITensor *output_nhwc,
Pablo Tello7df27862018-05-30 11:44:26 +0100242 const int num_batches,
243 const int num_rows,
244 const int num_cols,
245 const int num_channels) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000246
247 virtual ~INEWinogradLayerTransformOutputKernel()
248 {
249 }
250};
251
Alex Gildayc357c472018-03-21 13:54:09 +0000252/** NEON kernel to perform Winograd output transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000253template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
254class NEWinogradLayerTransformOutputKernel : public INEWinogradLayerTransformOutputKernel<T>
255{
256public:
Pablo Tellod6ca4782018-01-23 09:36:04 +0000257 const char *name() const override
258 {
259 return "NEWinogradLayerTransformOutputKernel";
260 }
261 /** Constructor */
262 NEWinogradLayerTransformOutputKernel();
263
264 /** Prevent instances of this class from being copied (As this class contains pointers) */
265 NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
266 /** Prevent instances of this class from being copied (As this class contains pointers) */
267 NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
268 /** Allow instances of this class to be moved */
269 NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
270 /** Allow instances of this class to be moved */
271 NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
Alex Gildayc357c472018-03-21 13:54:09 +0000272 /** Default destructor */
Pablo Tellod6ca4782018-01-23 09:36:04 +0000273 ~NEWinogradLayerTransformOutputKernel() = default;
274
Pablo Tellof6c572c2018-02-14 12:47:30 +0000275 // Inherited methods overridden:
276 /** Determine how much memory (in units of TOut) to allocate for the
277 * (Winograd domain) output.
278 *
Pablo Tello7df27862018-05-30 11:44:26 +0100279 * @param[in] num_batches Number of batches in the output tensor.
280 * @param[in] num_rows Number of rows in each feature map of the input tensor.
281 * @param[in] num_cols Number of columns in each feature map of the input tensor.
282 * @param[in] num_output_channels Number of feature maps in the output tensor.
283 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Alex Gildayc357c472018-03-21 13:54:09 +0000284 *
285 * @return Storage size (in units of TOut) required.
Pablo Tellof6c572c2018-02-14 12:47:30 +0000286 */
Pablo Tello7df27862018-05-30 11:44:26 +0100287 unsigned int get_output_storage_size(int num_batches, int num_rows, int num_cols, int num_output_channels, bool same_padding) const override;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000288
289 /** Gets the stride between matrices in the output worspace
290 *
291 * @param[in] kernel_shape The shape of the weights tensor.
292 * @param[in] input_shape The shape of the input tensor.
293 * @param[in] padding_type The type of padding to be used.
294 *
295 * @return Stride expressed in bytes.
296 */
297 int get_matrix_stride(const KernelShape &kernel_shape, const Tensor4DShape &input_shape, const PaddingType padding_type) const override;
298 /** Get the output shape of a convolution.
299 *
300 * @param[in] kernel_shape The shape of the weights tensor.
301 * @param[in] in_shape The shape of the input tensor.
302 * @param[in] padding The type of padding to be used.
303 *
304 * @return Stride expressed in bytes.
305 */
306 Tensor4DShape get_output_shape(const KernelShape &kernel_shape, const Tensor4DShape &in_shape, const PaddingType padding) const override;
307
Pablo Tellod6ca4782018-01-23 09:36:04 +0000308 /** Configure the output transform kernel.
309 *
310 * @param[in] biases Pointer to the biases tensor.
311 * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
312 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
Pablo Tello7df27862018-05-30 11:44:26 +0100313 * @param[out] output_nhwc Pointer to a tensor with NHWC data layout, in the spatial domain.
314 * @param[in] num_batches Number of batches in the input tensor.
315 * @param[in] num_rows Number of rows in output tensor.
316 * @param[in] num_cols Number of columns in output tensor.
317 * @param[in] num_channels Number of feature maps in the output tensor.
Pablo Tellod6ca4782018-01-23 09:36:04 +0000318 */
319 void configure(
Pablo Tellof6c572c2018-02-14 12:47:30 +0000320 const ITensor *biases,
Anthony Barbiere1553372018-07-16 18:53:52 +0100321 const ITensor *output_workingspace,
Pablo Tellof6c572c2018-02-14 12:47:30 +0000322 const int matrix_stride,
Anthony Barbiere1553372018-07-16 18:53:52 +0100323 ITensor *output_nhwc,
Pablo Tello7df27862018-05-30 11:44:26 +0100324 const int num_batches,
325 const int num_rows,
326 const int num_cols,
327 const int num_channels) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000328
Pablo Tellod6ca4782018-01-23 09:36:04 +0000329 void run(const Window &window, const ThreadInfo &info) override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000330
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100331 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformOutputKernel
332 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100333 * @param[in] input Source tensor with shape [C, N, 16, batches] or [C, N, 36, batches]. Data types supported: F32.
334 * @param[in] bias Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. It can be a nullptr. Data type supported: as @p input
335 * @param[out] output Destination tensor with shape [output_convolved_dims.width, output_convolved_dims.height, C, batches]. Data type supported: same as @p input
336 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100337 *
338 * @return a status
339 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100340 static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100341
Pablo Tellod6ca4782018-01-23 09:36:04 +0000342private:
Pablo Tello52140b42018-01-30 14:48:11 +0000343 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000344 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
345 using OutputTransform = typename WinogradBase::template OutputTransform<T>;
Pablo Tello52140b42018-01-30 14:48:11 +0000346
Pablo Tellod6ca4782018-01-23 09:36:04 +0000347 const ITensor *_biases;
Anthony Barbiere1553372018-07-16 18:53:52 +0100348 const ITensor *_output_workspace;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000349 int _matrix_stride;
350 int _matrix_row_stride;
Pablo Tello7df27862018-05-30 11:44:26 +0100351 ITensor *_output_nhwc;
352 int _num_batches;
353 int _num_rows;
354 int _num_cols;
355 int _num_channels;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000356};
357
Alex Gildayc357c472018-03-21 13:54:09 +0000358/** Interface for the NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000359template <typename T>
360class INEWinogradLayerTransformWeightsKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000361{
362public:
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100363 /** Prevent instances of this class from being copied (As this class contains pointers) */
364 INEWinogradLayerTransformWeightsKernel(const INEWinogradLayerTransformWeightsKernel &) = default;
365 /** Prevent instances of this class from being copied (As this class contains pointers) */
366 INEWinogradLayerTransformWeightsKernel &operator=(const INEWinogradLayerTransformWeightsKernel &) = default;
367 /** Allow instances of this class to be moved */
368 INEWinogradLayerTransformWeightsKernel(INEWinogradLayerTransformWeightsKernel &&) = default;
369 /** Allow instances of this class to be moved */
370 INEWinogradLayerTransformWeightsKernel &operator=(INEWinogradLayerTransformWeightsKernel &&) = default;
371
372 INEWinogradLayerTransformWeightsKernel()
373 {
374 }
375 virtual ~INEWinogradLayerTransformWeightsKernel()
376 {
377 }
Pablo Tellof6c572c2018-02-14 12:47:30 +0000378 /** Determine how much memory (in units of T) to allocate for the
Pablo Tello52140b42018-01-30 14:48:11 +0000379 * transformed weights.
380 *
Pablo Tello7df27862018-05-30 11:44:26 +0100381 * @param[in] num_output_channels Number of output feature maps.
382 * @param[in] num_input_channels Number of input feature maps.
Alex Gildayc357c472018-03-21 13:54:09 +0000383 *
384 * @return Storage size (in units of T) required.
Pablo Tello52140b42018-01-30 14:48:11 +0000385 */
Pablo Tello7df27862018-05-30 11:44:26 +0100386 virtual unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000387 /** Gets the stride between matrices in the kernel worspace
388 *
389 * @param[in] kernel_shape The shape of the weights tensor.
390 *
391 * @return Stride expressed in bytes.
392 */
393 virtual int get_matrix_stride(const KernelShape &kernel_shape) const = 0;
Pablo Tello52140b42018-01-30 14:48:11 +0000394
Pablo Tellof6c572c2018-02-14 12:47:30 +0000395 /** Configure the weights transform kernel.
Pablo Tello52140b42018-01-30 14:48:11 +0000396 *
Anthony Barbiere1553372018-07-16 18:53:52 +0100397 * @param[in] weights_hwio Pointer to the weights tensor
398 * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
399 * @param[in] matrix_stride Stride across matrices in the output workspace.
400 * @param[in] num_output_channels Number of filters.
401 * @param[in] num_input_channels Number of channels in each filter.
Pablo Tello52140b42018-01-30 14:48:11 +0000402 */
Pablo Tello7df27862018-05-30 11:44:26 +0100403
Anthony Barbiere1553372018-07-16 18:53:52 +0100404 virtual void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) = 0;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000405
Pablo Tellobda6e4b2018-08-22 11:40:33 +0100406 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
407 *
408 * @param[in] input First tensor input info. Data types supported: F32.
409 * @param[in] weights Weights tensor info. Data types supported: same as @p input.
410 *
411 * @return a status
412 */
413 static Status validate(const ITensorInfo *input, const ITensorInfo *weights);
Pablo Tellof6c572c2018-02-14 12:47:30 +0000414};
415
Alex Gildayc357c472018-03-21 13:54:09 +0000416/** NEON kernel to perform Winograd weights transform. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000417template <typename T, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
418class NEWinogradLayerTransformWeightsKernel final : public INEWinogradLayerTransformWeightsKernel<T>
419{
420public:
Pablo Tello7df27862018-05-30 11:44:26 +0100421 /** Prevent instances of this class from being copied (As this class contains pointers) */
422 NEWinogradLayerTransformWeightsKernel(const NEWinogradLayerTransformWeightsKernel &) = delete;
423 /** Prevent instances of this class from being copied (As this class contains pointers) */
424 NEWinogradLayerTransformWeightsKernel &operator=(const NEWinogradLayerTransformWeightsKernel &) = delete;
425 /** Allow instances of this class to be moved */
426 NEWinogradLayerTransformWeightsKernel(NEWinogradLayerTransformWeightsKernel &&) = default;
427 /** Allow instances of this class to be moved */
428 NEWinogradLayerTransformWeightsKernel &operator=(NEWinogradLayerTransformWeightsKernel &&) = default;
429 /** Default destructor */
430 ~NEWinogradLayerTransformWeightsKernel() = default;
431
Alex Gildayc357c472018-03-21 13:54:09 +0000432 /** Default constructor. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000433 NEWinogradLayerTransformWeightsKernel();
434 const char *name() const override
435 {
436 return "NEWinogradLayerTransformWeightsKernel";
437 }
Pablo Tello52140b42018-01-30 14:48:11 +0000438
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100439 /** Static function to check if given info will lead to a valid configuration of @ref NEWinogradLayerTransformWeightsKernel
440 *
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100441 * @param[in] input Source tensor info. The input is a 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] (NCHW data layout).
442 * kernel_x must be 3 and equal to kernel_y. Data types supported: F32.
443 * @param[in] output Destination tensor info. The output is a 3D tensor with dimensions [OFM, IFM, 16] or [OFM, IFM, 36]. Data type supported: same as @p input
444 * @param[in] winograd_info Contains Winograd's information described in @ref WinogradInfo
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100445 *
446 * @return a status
447 */
Vidhya Sudhan Loganathan84ce1f92018-04-25 13:00:09 +0100448 static Status validate(const ITensorInfo *input, const ITensorInfo *output, const WinogradInfo &winograd_info);
Vidhya Sudhan Loganathan3ca97862018-04-23 08:20:04 +0100449
Pablo Tellod6ca4782018-01-23 09:36:04 +0000450 // Inherited methods overridden:
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000451
452#ifndef DOXYGEN_SKIP_THIS
453 /** Configure the weights transform kernel.
454 *
455 * @param[in] weights_hwio Pointer to the weights tensor
456 * @param[out] output Pointer to working space for the output tensor in the Winograd domain.
457 * @param[in] matrix_stride Stride across matrices in the output workspace.
458 * @param[in] num_output_channels Number of filters.
459 * @param[in] num_input_channels Number of channels in each filter.
460 */
Anthony Barbiere1553372018-07-16 18:53:52 +0100461 void configure(const ITensor *weights_hwio, ITensor *output, const int matrix_stride, const int num_output_channels, const int num_input_channels) override;
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000462#endif /* DOXYGEN_SKIP_THIS */
463
464 /** Determine how much memory (in units of T) to allocate for the
465 * transformed weights.
466 *
467 * @param[in] num_output_channels Number of output feature maps.
468 * @param[in] num_input_channels Number of input feature maps.
469 *
470 * @return Storage size (in units of T) required.
471 */
Pablo Tello7df27862018-05-30 11:44:26 +0100472 unsigned int get_weight_storage_size(int num_output_channels, int num_input_channels) const override;
Vidhya Sudhan Loganathand646ae12018-11-19 15:18:20 +0000473
474 /** Gets the stride between matrices in the input worspace
475 *
476 * @param[in] kernel_shape The shape of the weights tensor.
477 *
478 * @return Stride expressed in bytes.
479 */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000480 int get_matrix_stride(const KernelShape &kernel_shape) const override;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000481 void run(const Window &window, const ThreadInfo &info) override;
482 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +0000483
484private:
485 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tellof6c572c2018-02-14 12:47:30 +0000486 using WinogradConv = typename WinogradBase::template Convolution<T, T>;
487 using WeightsTransform = typename WinogradBase::template WeightsTransform<T>;
Pablo Tello7df27862018-05-30 11:44:26 +0100488
489 const ITensor *_weights_hwio;
Anthony Barbiere1553372018-07-16 18:53:52 +0100490 ITensor *_output;
Pablo Tello7df27862018-05-30 11:44:26 +0100491 int _matrix_stride;
492 int _num_output_channels;
493 int _num_input_channels;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000494};
495
Alex Gildayc357c472018-03-21 13:54:09 +0000496/** NEON kernel to perform Winograd. */
Pablo Tellof6c572c2018-02-14 12:47:30 +0000497template <typename TIn, typename TOut, int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
Anthony Barbiere1553372018-07-16 18:53:52 +0100498class NEWinogradLayerConfiguration
Pablo Tello89519332017-11-17 11:52:36 +0000499{
500public:
Alex Gildayc357c472018-03-21 13:54:09 +0000501 /** Winograd base kernel */
Pablo Tello52140b42018-01-30 14:48:11 +0000502 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Alex Gildayc357c472018-03-21 13:54:09 +0000503 /** Winograd convolution kernel */
Anthony Barbiere1553372018-07-16 18:53:52 +0100504
Pablo Tellof6c572c2018-02-14 12:47:30 +0000505 using WinogradConv = typename WinogradBase::template Convolution<TIn, TOut>;
Pablo Tello52140b42018-01-30 14:48:11 +0000506
Anthony Barbiere1553372018-07-16 18:53:52 +0100507 using TransformInputKernel = NEWinogradLayerTransformInputKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
508 using TransformWeightsKernel = NEWinogradLayerTransformWeightsKernel<TIn, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
509 using TransformOutputKernel = NEWinogradLayerTransformOutputKernel<TOut, OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
Pablo Tello89519332017-11-17 11:52:36 +0000510};
511
512} // namespace arm_compute
Georgios Pinitas9fb11592018-04-26 20:34:58 +0100513#endif /*__ARM_COMPUTE_NEGEMMWINOGRADCONVOLUTIONLAYERKERNEL_H__*/