blob: 249e5757f02ca7f1e0a34f7da585150d5ed16296 [file] [log] [blame]
Pablo Tello89519332017-11-17 11:52:36 +00001/*
2 * Copyright (c) 2017 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#pragma once
26
27/** Re-order a weight tensor from [Output feature map x Input feature map x
28 * Height x Width] format to [Height x Width x Input feature map x Output
29 * feature map] format.
30 */
31template <typename T>
32inline void ofm_ifm_h_w_to_h_w_ifm_ofm(
33 const T* const in, // Input in [Output x Input x Height x Width] form
34 T* const out, // Output in [Height x Width x Input x Output] form
35 const int n_output_feature_maps,
36 const int n_input_feature_maps,
37 const int n_rows,
38 const int n_cols,
39 int in_output_feature_map_stride=0,
40 int in_input_feature_map_stride=0,
41 int in_row_stride=0,
42 int out_row_stride=0,
43 int out_col_stride=0,
44 int out_input_feature_map_stride=0
45);
46
47/** Re-order a weight tensor from [Height x Width x Input feature map x Output
48 * feature map] format to [Output feature map x Input feature map x Height x
49 * Width] format.
50 */
51template <typename T>
52inline void h_w_ifm_ofm_to_ofm_ifm_h_w(
53 const T* const in, // Input in [Height x Width x Input x Output] form
54 T* const out, // Output in [Output x Input x Height x Width] form
55 const int n_rows,
56 const int n_cols,
57 const int n_input_feature_maps,
58 const int n_output_feature_maps,
59 int in_row_stride=0,
60 int in_col_stride=0,
61 int in_input_feature_map_stride=0,
62 int out_output_feature_map_stride=0,
63 int out_input_feature_map_stride=0,
64 int out_row_stride=0
65);
66
67
68/* Re-order a tensor from NCHW format to NHWC.
69 */
70template <typename T>
71inline void nchw_to_nhwc(
72 const T* const in,
73 T* const out,
74 const int n_batches,
75 const int n_channels,
76 const int n_rows,
77 const int n_cols,
78 int in_batch_stride=0,
79 int in_channel_stride=0,
80 int in_row_stride=0,
81 int out_batch_stride=0,
82 int out_row_stride=0,
83 int out_col_stride=0
84)
85{
86 // Fill in the stride values
87 in_row_stride = (in_row_stride) ? in_row_stride : n_cols;
88 in_channel_stride = (in_channel_stride) ? in_channel_stride
89 : n_rows * in_row_stride;
90 in_batch_stride = (in_batch_stride) ? in_batch_stride
91 : n_channels * in_channel_stride;
92
93 out_col_stride = (out_col_stride) ? out_col_stride : n_channels;
94 out_row_stride = (out_row_stride) ? out_row_stride : n_cols * out_col_stride;
95 out_batch_stride = (out_batch_stride) ? out_batch_stride
96 : n_rows * out_row_stride;
97
98 // Perform the re-ordering
99 for (int n = 0; n < n_batches; n++)
100 {
101 const T* const in_batch = in + n*in_batch_stride;
102 T* const out_batch = out + n*out_batch_stride;
103
104 for (int i = 0; i < n_rows; i++)
105 {
106 const T* const in_row = in_batch + i*in_row_stride;
107 T* const out_row = out_batch + i*out_row_stride;
108
109 for (int j = 0; j < n_cols; j++)
110 {
111 const T* const in_col = in_row + j;
112 T* const out_col = out_row + j*out_col_stride;
113
114 for (int c = 0; c < n_channels; c++)
115 {
116 const T* const in_channel = in_col + c*in_channel_stride;
117 out_col[c] = *(in_channel);
118 }
119 }
120 }
121 }
122}
123
124/* Re-order a tensor from NHWC format to NCHW.
125 */
126template <typename T>
127inline void nhwc_to_nchw(
128 const T* const in, // Input data in NHWC form
129 T* const out, // Output data in NCHW form
130 const int n_batches,
131 const int n_rows,
132 const int n_cols,
133 const int n_channels,
134 int in_batch_stride=0,
135 int in_row_stride=0,
136 int in_col_stride=0,
137 int out_batch_stride=0,
138 int out_channel_stride=0,
139 int out_row_stride=0
140)
141{
142 // Fill in stride values
143 in_col_stride = (in_col_stride) ? in_col_stride : n_channels;
144 in_row_stride = (in_row_stride) ? in_row_stride : n_cols * in_col_stride;
145 in_batch_stride = (in_batch_stride) ? in_batch_stride
146 : n_rows * in_row_stride;
147
148 out_row_stride = (out_row_stride) ? out_row_stride : n_cols;
149 out_channel_stride = (out_channel_stride) ? out_channel_stride
150 : n_rows * out_row_stride;
151 out_batch_stride = (out_batch_stride) ? out_batch_stride
152 : n_channels * out_channel_stride;
153
154 // Perform the re-ordering
155 // For every batch
156 for (int n = 0; n < n_batches; n++)
157 {
158 const T* const in_batch = in + n*in_batch_stride;
159 T* const out_batch = out + n*out_batch_stride;
160
161 // For every row
162 for (int i = 0; i < n_rows; i++)
163 {
164 const T* const in_i = in_batch + i*in_row_stride;
165 T* const out_i = out_batch + i*out_row_stride;
166
167 // For every column
168 for (int j = 0; j < n_cols; j++)
169 {
170 const T* const in_j = in_i + j*in_col_stride;
171 T* const out_j = out_i + j;
172
173 // For every channel
174 for (int c = 0; c < n_channels; c++)
175 {
176 const T* const in_channel = in_j + c;
177 T* const out_channel = out_j + c*out_channel_stride;
178 *(out_channel) = *(in_channel);
179 }
180 }
181 }
182 }
183}
184
185
186/*****************************************************************************/
187/* Generic weight re-order implementation.
188 */
189template <typename T>
190inline void ofm_ifm_h_w_to_h_w_ifm_ofm(
191 const T* const in, // Input in [Output x Input x Height x Width] form
192 T* const out, // Output in [Height x Width x Input x Output] form
193 const int n_output_feature_maps,
194 const int n_input_feature_maps,
195 const int n_rows,
196 const int n_cols,
197 int in_output_feature_map_stride,
198 int in_input_feature_map_stride,
199 int in_row_stride,
200 int out_row_stride,
201 int out_col_stride,
202 int out_input_feature_map_stride
203)
204{
205 // Fill in stride values
206 in_row_stride = (in_row_stride)
207 ? in_row_stride
208 : n_cols;
209 in_input_feature_map_stride = (in_input_feature_map_stride)
210 ? in_input_feature_map_stride
211 : n_rows * in_row_stride;
212 in_output_feature_map_stride = (in_output_feature_map_stride)
213 ? in_output_feature_map_stride
214 : n_input_feature_maps * in_input_feature_map_stride;
215
216 out_input_feature_map_stride = (out_input_feature_map_stride)
217 ? out_input_feature_map_stride
218 : n_output_feature_maps;
219 out_col_stride = (out_col_stride)
220 ? out_col_stride
221 : n_input_feature_maps * out_input_feature_map_stride;
222 out_row_stride = (out_row_stride)
223 ? out_row_stride
224 : n_cols * out_col_stride;
225
226 // Perform the re-ordering
227 for (int i = 0; i < n_rows; i++)
228 {
229 const T* const in_row = in + i * in_row_stride;
230 T* out_row = out + i * out_row_stride;
231
232 for (int j = 0; j < n_cols; j++)
233 {
234 const T* const in_col = in_row + j;
235 T* const out_col = out_row + j * out_col_stride;
236
237 for (int ifm = 0; ifm < n_input_feature_maps; ifm++)
238 {
239 const T* const in_ifm = in_col + ifm * in_input_feature_map_stride;
240 T* const out_ifm = out_col + ifm * out_input_feature_map_stride;
241
242 for (int ofm = 0; ofm < n_output_feature_maps; ofm++)
243 {
244 const T* const in_ofm = in_ifm + ofm * in_output_feature_map_stride;
245 T* const out_ofm = out_ifm + ofm;
246 *(out_ofm) = *(in_ofm);
247 }
248 }
249 }
250 }
251}
252
253/*****************************************************************************/
254/* Generic weight re-order implementation.
255 */
256template <typename T>
257inline void h_w_ifm_ofm_to_ofm_ifm_h_w(
258 const T* const in, // Input in [Height x Width x Input x Output] form
259 T* const out, // Output in [Output x Input x Height x Width] form
260 const int n_rows,
261 const int n_cols,
262 const int n_input_feature_maps,
263 const int n_output_feature_maps,
264 int in_row_stride,
265 int in_col_stride,
266 int in_input_feature_map_stride,
267 int out_output_feature_map_stride,
268 int out_input_feature_map_stride,
269 int out_row_stride
270)
271{
272 // Fill in the stride values
273 in_input_feature_map_stride = (in_input_feature_map_stride)
274 ? in_input_feature_map_stride
275 : n_output_feature_maps;
276 in_col_stride = (in_col_stride)
277 ? in_col_stride
278 : n_input_feature_maps * in_input_feature_map_stride;
279 in_row_stride = (in_row_stride)
280 ? in_row_stride
281 : n_cols * in_col_stride;
282
283 out_row_stride = (out_row_stride)
284 ? out_row_stride
285 : n_cols;
286 out_input_feature_map_stride = (out_input_feature_map_stride)
287 ? out_input_feature_map_stride
288 : n_rows * out_row_stride;
289 out_output_feature_map_stride = (out_output_feature_map_stride)
290 ? out_output_feature_map_stride
291 : n_input_feature_maps * out_input_feature_map_stride;
292
293 // Perform the re-ordering
294 for (int i = 0; i < n_rows; i++)
295 {
296 const T* const in_row = in + i * in_row_stride;
297 T* const out_row = out + i * out_row_stride;
298
299 for (int j = 0; j < n_cols; j++)
300 {
301 const T* const in_col = in_row + j * in_col_stride;
302 T* const out_col = out_row + j;
303
304 for (int ifm = 0; ifm < n_input_feature_maps; ifm++)
305 {
306 const T* const in_ifm = in_col + ifm * in_input_feature_map_stride;
307 T* const out_ifm = out_col + ifm * out_input_feature_map_stride;
308
309 for (int ofm = 0; ofm < n_output_feature_maps; ofm++)
310 {
311 const T* const in_ofm = in_ifm + ofm;
312 T* const out_ofm = out_ifm + ofm * out_output_feature_map_stride;
313 *(out_ofm) = *(in_ofm);
314 }
315 }
316 }
317 }
318}
319