blob: 3cfb6e6646f9d06c75ee1addec82d5f1bfc2f2c7 [file] [log] [blame]
Pablo Tello8f43d742019-03-27 09:28:32 +00001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2017-2019 Arm Limited.
Pablo Tello8f43d742019-03-27 09:28:32 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#pragma once
Pablo Tello8f43d742019-03-27 09:28:32 +000026#include "arm_gemm_local.hpp"
27#include "arm_gemm.hpp"
28#include "winograd.hpp"
29
30namespace winograd
31{
32
33
34class IWinogradConvolutionLayer
35{
36 public:
37 virtual ~IWinogradConvolutionLayer() = default;
38
39 virtual unsigned int weight_transform_get_window(void) const = 0;
40 virtual void weight_transform_run(unsigned int start, unsigned int stop) = 0;
41
Pablo Tello5264b7d2019-10-21 14:25:41 +010042 virtual IInputTransform& input_transform(void) = 0; // Expose the input transform
43 virtual IOutputTransform& output_transform(void) = 0; // Expose the output transform
Pablo Tello8f43d742019-03-27 09:28:32 +000044 virtual arm_gemm::IGemmCommon *gemm(void) = 0; // Expose the underlying GEMM
45};
46
47/** Example of how to construct an ACL-like interface.
48 *
49 * Use `get_weight_storage_size`, `get_input_storage_size` and
50 * `get_output_storage_size` to allocate memory for the convolution engine.
51 * Then create a `WinogradConvolutionLayer`.
52 *
53 * Initialise the weights using `weights_transform.run(...)`.
54 *
55 * For each inference:
56 * 1. Transform the inputs to the Winograd domain using `input_transform.run(...)`
57 * 2. Perform a number of GEMMs using `gemms.run(...)`
58 * 3. Transform the output to the spatial domain using `output_transform.run(...)`
59 */
60template <int OutputTileRows, int OutputTileCols, int KernelRows, int KernelCols,
61 typename TIn, typename TInGEMM, typename TOutGEMM, typename TOut,
62 WinogradRoots Roots>
63class WinogradConvolutionLayer : public IWinogradConvolutionLayer
64{
Pablo Tello8f43d742019-03-27 09:28:32 +000065 public:
66 using WinogradBase = winograd::WinogradGEMM<OutputTileRows, OutputTileCols, KernelRows, KernelCols, Roots>;
67 using WeightsTransform = typename WinogradBase::template WeightsTransform<TIn, TInGEMM>;
68 using InputTransform = typename WinogradBase::template InputTransform<TIn, TInGEMM>;
69 using WinogradConv = typename WinogradBase::template Convolution<TOut, TIn, TInGEMM, TOutGEMM>;
70 using OutputTransform = typename WinogradBase::template OutputTransform<TOutGEMM, TOut>;
71
Pablo Tello5264b7d2019-10-21 14:25:41 +010072 private:
73 static constexpr int InnerTileRows = OutputTileRows + KernelRows - 1;
74 static constexpr int InnerTileCols = OutputTileCols + KernelCols - 1;
75 static constexpr int N_GEMMS = InnerTileRows * InnerTileCols;
76
77 const int _n_output_rows, _n_output_cols;
78 const int _kernel_matrix_stride, _kernel_matrix_row_stride;
79 const int _input_matrix_stride, _input_matrix_row_stride;
80 const int _output_matrix_stride, _output_matrix_row_stride;
81 const int _tile_rows, _tile_cols;
82 const int _m, _k, _n;
83
Pablo Tello8f43d742019-03-27 09:28:32 +000084 WeightsTransform weights_transform; /** Operator to transform weights to Winograd domain. */
85 InputTransform _input_transform; /** Operator to transform input to Winograd domain. */
Pablo Tello5264b7d2019-10-21 14:25:41 +010086 const arm_gemm::GemmArgs gemm_args;
Pablo Tello8f43d742019-03-27 09:28:32 +000087 arm_gemm::UniqueGemmCommon<TInGEMM, TOutGEMM> gemms; /** Operator to perform multiple GEMMs. */
88 OutputTransform _output_transform; /** Operator to transform output from Winograd domain. */
89
Pablo Tello5264b7d2019-10-21 14:25:41 +010090 public:
91
Pablo Tello8f43d742019-03-27 09:28:32 +000092 /** Determine how much memory (in units of TIn) to allocate for the
93 * transformed weights.
94 */
95 static unsigned int get_weight_storage_size(
96 const int n_output_channels, /** Number of output feature maps. */
97 const int n_input_channels /** Number of input feature maps. */
98 );
99
100 static unsigned int get_weight_stride(
101 const int n_output_channels, /** Number of output feature maps. */
102 const int n_input_channels /** Number of input feature maps. */
103 );
104
105 static unsigned int get_weight_multi_stride(
106 const int n_output_channels, /** Number of output feature maps. */
107 const int n_input_channels /** Number of input feature maps. */
108 );
109
110 /** Determine how much memory (in units of TIn) to allocate for the
111 * transformed input.
112 */
113 static unsigned int get_input_storage_size(
114 const int n_batches, /** Number of batches in the input tensor. */
115 const int n_channels, /** Number of feature maps in the input tensor. */
116 const int n_rows, /** Number of rows in each feature map. */
117 const int n_cols, /** Number of columns in each feature map. */
118 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
119 );
120
121 /** Get the row stride for the A matrix in the Winograd domain. */
122 static unsigned int get_input_stride(
123 const int n_batches, /** Number of batches in the input tensor. */
124 const int n_channels, /** Number of feature maps in the input tensor. */
125 const int n_rows, /** Number of rows in each feature map. */
126 const int n_cols, /** Number of columns in each feature map. */
127 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
128 );
129
130 /** Get the stride between A matrices in the Winograd domain. */
131 static unsigned int get_input_multi_stride(
132 const int n_batches, /** Number of batches in the input tensor. */
133 const int n_channels, /** Number of feature maps in the input tensor. */
134 const int n_rows, /** Number of rows in each feature map. */
135 const int n_cols, /** Number of columns in each feature map. */
136 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
137 );
138
139 /** Determine how much memory (in units of TOut) to allocate for the
140 * (Winograd domain) output.
141 */
142 static unsigned int get_output_storage_size(
143 const int n_batches, /** Number of batches in the output tensor. */
144 const int n_rows, /** Number of rows in each feature map of the input tensor. */
145 const int n_cols, /** Number of columns in each feature map of the input tensor. */
146 const int n_output_channels, /** Number of feature maps in the output tensor. */
147 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
148 );
149
150 static unsigned int get_output_stride(
151 const int n_batches, /** Number of batches in the output tensor. */
152 const int n_rows, /** Number of rows in each feature map of the input tensor. */
153 const int n_cols, /** Number of columns in each feature map of the input tensor. */
154 const int n_output_channels, /** Number of feature maps in the output tensor. */
155 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
156 );
157
158 static unsigned int get_output_multi_stride(
159 const int n_batches, /** Number of batches in the output tensor. */
160 const int n_rows, /** Number of rows in each feature map of the input tensor. */
161 const int n_cols, /** Number of columns in each feature map of the input tensor. */
162 const int n_output_channels, /** Number of feature maps in the output tensor. */
163 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
164 );
165
166 /** Get the shape (rows, cols) of a feature map of the output tensor. */
167 static std::pair<int, int> get_output_feature_map_shape(
168 const int n_input_rows, /** Number of rows in the input feature map. */
169 const int n_input_cols, /** Number of columns in the input feature map. */
170 const bool same_padding /** Use "SAME" padding, otherwise use "VALID". */
171 );
172
173 /** Create a new Winograd convolution layer.
174 */
175 WinogradConvolutionLayer(
176 const arm_gemm::CPUInfo &cpuinfo, /** Describes CPU properties. */
177 const int n_threads, /** Maximum number of threads used to execute the convolution. */
178 const int n_batches, /** Number of batches in the input and output tensors. */
179 const int n_input_channels, /** Number of feature maps in a batch of the input tensor. */
180 const int n_input_rows, /** Number of rows in a feature map of the input tensor. */
181 const int n_input_cols, /** Number of columns in a feature map of the input tensor. */
182 const int n_output_channels, /** Number of feature maps in the output tensor. */
183 const bool same_padding, /** Use "SAME" padding, otherwise use "VALID". */
Pablo Tello5264b7d2019-10-21 14:25:41 +0100184 const arm_gemm::Activation &activation,
Pablo Tello8f43d742019-03-27 09:28:32 +0000185 const TIn* const weights, /** Pointer to weight tensor in spatial domain. Must be ordered as "Height x Rows x Input Feature Maps x Output Feature Maps. */
186 TInGEMM* const weights_storage, /** Pointer to storage for weight tensor in the Winograd domain. Must be at least the size returned by `get_weight_storage_size`. */
187 const TIn* const input, /** Pointer to NHWC ordered input tensor, in the spatial domain. */
188 TInGEMM* const winograd_input, /** Pointer to working space for the input tensor in the Winograd domain. Must be at least the size returned by `get_input_storage_size`. */
189 const TOut* const biases, /** Pointer to biases vector. Pass nullptr if no bias is provided. */
190 TOut* const output, /** Pointer to NHWC ordered output tensor, in the spatial domain. */
191 TOutGEMM* const winograd_output, /** Pointer to working space for the output tensor in the Winograd domain. Must be at least the size returned by `get_output_storage_size`. */
192 const bool pretranspose_B=true, /** Hint that the B matrix can be pretransposed. */
193 arm_gemm::GemmConfig *gemm_cfg=nullptr /** Pointer to GEMM configuration. */
194 );
195
196 /* Utility methods for interacting with the layer. */
197 unsigned int weight_transform_get_window(void) const;
198 void weight_transform_run(const unsigned int start, const unsigned int stop);
199
Pablo Tello5264b7d2019-10-21 14:25:41 +0100200 IInputTransform& input_transform(void);
201 IOutputTransform& output_transform(void);
Pablo Tello8f43d742019-03-27 09:28:32 +0000202
203 /* Get a pointer to the GEMM underlying the Winograd transform. */
204 arm_gemm::IGemmCommon *gemm(void);
205};
206
207}