blob: 97532f3574d025558fcfad8f1474a625f858cb9b [file] [log] [blame]
Pablo Tello89519332017-11-17 11:52:36 +00001/*
Pablo Tello9ceebbe2018-01-10 16:44:13 +00002 * Copyright (c) 2017-2018 ARM Limited.
Pablo Tello89519332017-11-17 11:52:36 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
25#define __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__
26
27#include "arm_compute/core/NEON/INEKernel.h"
Pablo Tello52140b42018-01-30 14:48:11 +000028#include "arm_compute/core/NEON/kernels/winograd/batched_blocked_gemm.hpp"
Pablo Tello9ceebbe2018-01-10 16:44:13 +000029#include "arm_compute/core/NEON/kernels/winograd/convolution.hpp"
Pablo Tello3d4968a2017-12-04 15:03:35 +000030#include "arm_compute/core/NEON/kernels/winograd/tensor.hpp"
Pablo Tello52140b42018-01-30 14:48:11 +000031#include "arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp"
Pablo Tello89519332017-11-17 11:52:36 +000032
33namespace arm_compute
34{
35class ITensor;
Pablo Tello02541fb2017-12-15 09:48:59 +000036
Pablo Tello52140b42018-01-30 14:48:11 +000037template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
38class NEWinogradLayerTransformInputKernel : public INEKernel
Pablo Tello3d4968a2017-12-04 15:03:35 +000039{
40public:
Pablo Tello52140b42018-01-30 14:48:11 +000041 /** Determine how much memory (in units of TIn) to allocate for the
42 * transformed input.
Pablo Tello6c6e77a2018-01-23 10:03:27 +000043 *
Pablo Tello52140b42018-01-30 14:48:11 +000044 * @param[in] n_batches Number of batches in the input tensor.
45 * @param[in] n_channels Number of feature maps in the input tensor.
46 * @param[in] n_rows Number of rows in each feature map.
47 * @param[in] n_cols Number of columns in each feature map.
48 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
Pablo Tello6c6e77a2018-01-23 10:03:27 +000049 */
Pablo Tello52140b42018-01-30 14:48:11 +000050 static unsigned int get_input_storage_size(
51 int n_batches,
52 int n_channels,
53 int n_rows,
54 int n_cols,
55 bool same_padding);
Pablo Tellod6ca4782018-01-23 09:36:04 +000056
Pablo Tello52140b42018-01-30 14:48:11 +000057 NEWinogradLayerTransformInputKernel();
Pablo Tellod6ca4782018-01-23 09:36:04 +000058 const char *name() const override
59 {
60 return "NEWinogradLayerTransformInputKernel";
61 }
Pablo Tello52140b42018-01-30 14:48:11 +000062
63 /** Configure the output transform kernel.
64 *
65 * @param[in] input Input tensor data
66 * @param[in] n_batches Number of batches in input tensor.
67 * @param[in] n_rows Number of rows in input tensor.
68 * @param[in] n_cols Number of columns in input tensor.
69 * @param[in] n_channels Number of channels in input tensor.
70 * @param[in] padding Padding type.
71 * @param[out] output Base of output matrices.
72 * @param[in] matrix_stride Stride between output matrices.
73 */
74 void configure(
75 const float *const input,
76 const int n_batches,
77 const int n_rows,
78 const int n_cols,
79 const int n_channels,
80 const PaddingType padding,
81 float *const output,
82 const int matrix_stride);
83
Pablo Tellod6ca4782018-01-23 09:36:04 +000084 // Inherited methods overridden:
Pablo Tellod6ca4782018-01-23 09:36:04 +000085 void run(const Window &window, const ThreadInfo &info) override;
86 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +000087
88private:
89 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelCols, KernelCols>;
90 using WinogradConv = typename WinogradBase::template Convolution<float, float>;
91 using InputTransform = typename WinogradBase::template InputTransform<float>;
92 std::unique_ptr<InputTransform> _transform;
Pablo Tellod6ca4782018-01-23 09:36:04 +000093};
94
Pablo Tello52140b42018-01-30 14:48:11 +000095template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
96class NEWinogradLayerTransformOutputKernel : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +000097{
98public:
Pablo Tello52140b42018-01-30 14:48:11 +000099 /** Determine how much memory (in units of TOut) to allocate for the
100 * (Winograd domain) output.
101 *
102 * @param[in] n_batches Number of batches in the output tensor.
103 * @param[in] n_rows Number of rows in each feature map of the input tensor.
104 * @param[in] n_cols Number of columns in each feature map of the input tensor.
105 * @param[in] n_output_channels Number of feature maps in the output tensor.
106 * @param[in] same_padding Use "SAME" padding, otherwise use "VALID".
107 */
108 static unsigned int get_output_storage_size(
109 int n_batches,
110 int n_rows,
111 int n_cols,
112 int n_output_channels,
113 bool same_padding);
114
Pablo Tellod6ca4782018-01-23 09:36:04 +0000115 const char *name() const override
116 {
117 return "NEWinogradLayerTransformOutputKernel";
118 }
119 /** Constructor */
120 NEWinogradLayerTransformOutputKernel();
121
122 /** Prevent instances of this class from being copied (As this class contains pointers) */
123 NEWinogradLayerTransformOutputKernel(const NEWinogradLayerTransformOutputKernel &) = delete;
124 /** Prevent instances of this class from being copied (As this class contains pointers) */
125 NEWinogradLayerTransformOutputKernel &operator=(const NEWinogradLayerTransformOutputKernel &) = delete;
126 /** Allow instances of this class to be moved */
127 NEWinogradLayerTransformOutputKernel(NEWinogradLayerTransformOutputKernel &&) = default;
128 /** Allow instances of this class to be moved */
129 NEWinogradLayerTransformOutputKernel &operator=(NEWinogradLayerTransformOutputKernel &&) = default;
130
131 ~NEWinogradLayerTransformOutputKernel() = default;
132
133 /** Configure the output transform kernel.
134 *
135 * @param[in] biases Pointer to the biases tensor.
136 * @param[in] output_workingspace Pointer to working space for the output tensor in the Winograd domain.
137 * @param[in] matrix_stride Output matrix stride, can be computed with winograd::WinogradGEMM<2, 2, 3, 3>::Convolution<float, float>::get_output_matrix_stride()
138 * @param[out] output Pointer to NHWC ordered output tensor, in the spatial domain.
139 * @param[in] n_batches Number of batches in the input tensor.
140 * @param[in] n_rows Number of rows in output tensor.
141 * @param[in] n_cols Number of columns in output tensor.
142 * @param[in] n_channels Number of feature maps in the output tensor.
143 */
144 void configure(
145 const ITensor *biases,
146 const float *const output_workingspace,
147 const int matrix_stride,
148 float *const output,
149 const int n_batches,
150 const int n_rows,
151 const int n_cols,
152 const int n_channels);
153
154 // Inherited methods overridden:
155 void run(const Window &window, const ThreadInfo &info) override;
156 bool is_parallelisable() const override;
157
158private:
Pablo Tello52140b42018-01-30 14:48:11 +0000159 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
160 using WinogradConv = typename WinogradBase::template Convolution<float, float>;
161 using OutputTransform = typename WinogradBase::template OutputTransform<float>;
162
Pablo Tellod6ca4782018-01-23 09:36:04 +0000163 const ITensor *_biases;
164 const float *_output_workspace;
165 int _matrix_stride;
166 int _matrix_row_stride;
167 float *_output;
168 int _n_batches;
169 int _n_rows;
170 int _n_cols;
171 int _n_channels;
172};
173
Pablo Tello52140b42018-01-30 14:48:11 +0000174template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
175class NEWinogradLayerTransformWeightsKernel final : public INEKernel
Pablo Tellod6ca4782018-01-23 09:36:04 +0000176{
177public:
Pablo Tello52140b42018-01-30 14:48:11 +0000178 /** Determine how much memory (in units of TIn) to allocate for the
179 * transformed weights.
180 *
181 * @param[in] n_output_channels Number of output feature maps.
182 * @param[in] n_input_channels Number of input feature maps.
183 */
184 static unsigned int get_weight_storage_size(int n_output_channels, int n_input_channels);
185
186 NEWinogradLayerTransformWeightsKernel();
Pablo Tellod6ca4782018-01-23 09:36:04 +0000187 const char *name() const override
188 {
189 return "NEWinogradLayerTransformWeightsKernel";
190 }
Pablo Tello52140b42018-01-30 14:48:11 +0000191 /** Configure the output transform kernel.
192 *
193 * @param[in] weights_hwio Pointer to the weights tensor
194 * @param[in] output Pointer to working space for the output tensor in the Winograd domain.
195 * @param[in] matrix_stride Stride across matrices in the output workspace.
196 * @param[in] n_output_channels Number of filters.
197 * @param[in] n_input_channels Number of channels in each filter.
198 */
199 void configure(
200 const ITensor *weights_hwio,
201 float *const output,
202 const int matrix_stride,
203 const int n_output_channels,
204 const int n_input_channels);
205
Pablo Tellod6ca4782018-01-23 09:36:04 +0000206 // Inherited methods overridden:
Pablo Tello52140b42018-01-30 14:48:11 +0000207
Pablo Tellod6ca4782018-01-23 09:36:04 +0000208 void run(const Window &window, const ThreadInfo &info) override;
209 bool is_parallelisable() const override;
Pablo Tello52140b42018-01-30 14:48:11 +0000210
211private:
212 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
213 using WinogradConv = typename WinogradBase::template Convolution<float, float>;
214 using WeightsTransform = typename WinogradBase::template WeightsTransform<float>;
215 std::unique_ptr<WeightsTransform> _transform;
Pablo Tellod6ca4782018-01-23 09:36:04 +0000216};
217
Pablo Tello52140b42018-01-30 14:48:11 +0000218template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols>
219class NEWinogradLayerKernel : public INEKernel
Pablo Tello89519332017-11-17 11:52:36 +0000220{
221public:
Pablo Tello52140b42018-01-30 14:48:11 +0000222 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols>;
223 using WinogradConv = typename WinogradBase::template Convolution<float, float>;
224 using MultiGEMM = winograd::BatchedBlockedGemm<WinogradConv::M_BLOCK, WinogradConv::N_BLOCK, float, float>;
225
226 static const int _output_tile_rows = OutputTileRows;
227 static const int _output_tile_cols = OutputTileCols;
228
Anthony Barbiere8a49832018-01-18 10:04:05 +0000229 const char *name() const override
230 {
231 return "NEWinogradLayerKernel";
232 }
Pablo Tello89519332017-11-17 11:52:36 +0000233 /** Constructor */
234 NEWinogradLayerKernel();
235
236 /** Prevent instances of this class from being copied (As this class contains pointers) */
237 NEWinogradLayerKernel(const NEWinogradLayerKernel &) = delete;
238 /** Prevent instances of this class from being copied (As this class contains pointers) */
239 NEWinogradLayerKernel &operator=(const NEWinogradLayerKernel &) = delete;
240 /** Allow instances of this class to be moved */
241 NEWinogradLayerKernel(NEWinogradLayerKernel &&) = default;
242 /** Allow instances of this class to be moved */
243 NEWinogradLayerKernel &operator=(NEWinogradLayerKernel &&) = default;
244
Pablo Tellod6ca4782018-01-23 09:36:04 +0000245 ~NEWinogradLayerKernel() = default;
Pablo Tello89519332017-11-17 11:52:36 +0000246
247 /** Initialise the kernel
248 *
Pablo Tello52140b42018-01-30 14:48:11 +0000249 * @param[in] n_gemms Number of GEMMs to compute.
250 * @param[in] M in_shape.n_batches * tile_rows * tile_cols.
251 * @param[in] K Number of channels in the input tensor.
252 * @param[in] N Number of channels in the output tensor.
253 * @param[in] a_matrix_stride Stride between input matrices.
254 * @param[in] a_row_stride Row stride inside input matrix.
255 * @param[in] b_matrix_stride Stride between weights matrices.
256 * @param[in] b_row_stride Row stride inside the weights matrix.
257 * @param[in] c_matrix_stride Stride between output matrices.
258 * @param[in] c_row_stride Row stride inside the output matrix.
259 * @param[out] a_ptr Input workspace.
260 * @param[out] b_ptr Kernel workspace.
261 * @param[out] c_ptr Output workspace.
Pablo Tello89519332017-11-17 11:52:36 +0000262 */
Pablo Tello52140b42018-01-30 14:48:11 +0000263 void configure(
264 const unsigned int n_gemms,
265 const int M, const int K, const int N,
266 const int a_matrix_stride,
267 const int a_row_stride,
268 const int b_matrix_stride,
269 const int b_row_stride,
270 const int c_matrix_stride,
271 const int c_row_stride,
272 const float *const a_ptr,
273 const float *const b_ptr,
274 float *const c_ptr);
Pablo Tello89519332017-11-17 11:52:36 +0000275
276 // Inherited methods overridden:
277 void run(const Window &window, const ThreadInfo &info) override;
278
Pablo Tello52140b42018-01-30 14:48:11 +0000279private:
280 std::unique_ptr<MultiGEMM> _gemms;
Pablo Tello89519332017-11-17 11:52:36 +0000281};
282
283} // namespace arm_compute
284#endif /*__ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__*/