blob: 4715fb737f0a9077188baa3053e3226f954589ca [file] [log] [blame]
Usama Arif0681e3b2019-04-25 14:28:07 +01001/*
2 * Copyright (c) 2019 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +010024#include "activation_float_helpers.h"
Usama Arif0681e3b2019-04-25 14:28:07 +010025#include "helpers.h"
26
27#define LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
28 VEC_DATA_TYPE(DATA_TYPE, N0) \
29 BASENAME##0 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y + Z##0));
30
31#define LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
32 LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
33 VEC_DATA_TYPE(DATA_TYPE, N0) \
34 BASENAME##1 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y + Z##1));
35
36#define LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
37 LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
38 VEC_DATA_TYPE(DATA_TYPE, N0) \
39 BASENAME##2 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y + Z##2));
40
41#define LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
42 LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
43 VEC_DATA_TYPE(DATA_TYPE, N0) \
44 BASENAME##3 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y + Z##3));
45
46#define LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
47 LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
48 VEC_DATA_TYPE(DATA_TYPE, N0) \
49 BASENAME##4 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y + Z##4));
50
51#define LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
52 LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
53 VEC_DATA_TYPE(DATA_TYPE, N0) \
54 BASENAME##5 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y + Z##5));
55
56#define LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
57 LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
58 VEC_DATA_TYPE(DATA_TYPE, N0) \
59 BASENAME##6 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y + Z##6));
60
61#define LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
62 LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
63 VEC_DATA_TYPE(DATA_TYPE, N0) \
64 BASENAME##7 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y + Z##7));
65
66#define LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
67 LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
68 VEC_DATA_TYPE(DATA_TYPE, N0) \
69 BASENAME##8 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y + Z##8));
70
71#define LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
72 LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
73 VEC_DATA_TYPE(DATA_TYPE, N0) \
74 BASENAME##9 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y + Z##9));
75
76#define LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
77 LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
78 VEC_DATA_TYPE(DATA_TYPE, N0) \
79 BASENAME##A = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y + Z##A));
80
81#define LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
82 LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
83 VEC_DATA_TYPE(DATA_TYPE, N0) \
84 BASENAME##B = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y + Z##B));
85
86#define LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
87 LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
88 VEC_DATA_TYPE(DATA_TYPE, N0) \
89 BASENAME##C = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y + Z##C));
90
91#define LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
92 LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
93 VEC_DATA_TYPE(DATA_TYPE, N0) \
94 BASENAME##D = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y + Z##D));
95
96#define LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
97 LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
98 VEC_DATA_TYPE(DATA_TYPE, N0) \
99 BASENAME##E = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y + Z##E));
100
101#define LOAD_ROW_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
102 LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
103 VEC_DATA_TYPE(DATA_TYPE, N0) \
104 BASENAME##F = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y + Z##F));
105
106// LOAD_ROW_n loads the rows 0..n-1 in variables BASENAME##0 to BASENAME##(n-1)
107#define LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) LOAD_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
108/** Load Blocks of M0 consecutive rows and N0 consecutive columns when using Z offset as well
109 * Supported cases M0=1,2,3..16. N0=1,2,3,4,8,16, for variables BASENAME[0..M0]
110 * The data to load is expected to have consecutive names for each row, For e.g. For M0=3, and basename=c, the expected data is c0, c1 and c2.
111 * The Z offset is expected to have consecutive names For e.g. For M0=3, and Z=zin, the expected z offsets are zin0, zin1 and zin2.
112 */
113#define LOAD_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
114
115#define CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100116 Z##0 = (0 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
117 Z##0 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##0); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100118 Z##0 *= (CROSS_PLANE_PAD * STRIDE_Y);
119
120#define CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
121 CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100122 Z##1 = (1 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
123 Z##1 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##1); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100124 Z##1 *= (CROSS_PLANE_PAD * STRIDE_Y);
125
126#define CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
127 CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100128 Z##2 = (2 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
129 Z##2 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##2); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100130 Z##2 *= (CROSS_PLANE_PAD * STRIDE_Y);
131
132#define CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
133 CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100134 Z##3 = (3 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
135 Z##3 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##3); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100136 Z##3 *= (CROSS_PLANE_PAD * STRIDE_Y);
137
138#define CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
139 CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100140 Z##4 = (4 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
141 Z##4 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##4); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100142 Z##4 *= (CROSS_PLANE_PAD * STRIDE_Y);
143
144#define CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
145 CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100146 Z##5 = (5 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
147 Z##5 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##5); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100148 Z##5 *= (CROSS_PLANE_PAD * STRIDE_Y);
149
150#define CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
151 CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100152 Z##6 = (6 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
153 Z##6 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##6); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100154 Z##6 *= (CROSS_PLANE_PAD * STRIDE_Y);
155
156#define CALCULATE_Z_OFFSET_8(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
157 CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100158 Z##7 = (7 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
159 Z##7 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##7); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100160 Z##7 *= (CROSS_PLANE_PAD * STRIDE_Y);
161
162// CALCULATE_Z_OFFSET_n calculates Z for Z##0 to Z##(n-1)
163#define CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) CALCULATE_Z_OFFSET_##M0(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
164/** The Z offsets are expected to have consecutive names, For e.g. For M0=3, and Z=zin, the expected Z offsets are zin1, zin2, zin3.
165 * Note for the REINTERPRET_INPUT_AS_3D case
166 * Since we load a 2D input tile from a 3D tensor, we need to check when the plane changes across the z dimension
167 * in order to take into account the presence of possible cross plane paddings
168 *
169 * | |
170 * | plane0 |
171 * | |
172 * |__________________|
173 * |******************|
174 * | cross_plane_pad |
175 * |******************|
176 * | |
177 * | plane1 |
178 * | |
179 * |__________________|
180 */
181#define CALCULATE_Z_OFFSET(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
182
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100183// STORE_ROW_n macros
Usama Arif0681e3b2019-04-25 14:28:07 +0100184#define STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
185 VSTORE(N0) \
186 (BASENAME##0, 0, (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
187
188#define STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
189 STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
190 VSTORE(N0) \
191 (BASENAME##1, 0, (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
192
193#define STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
194 STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
195 VSTORE(N0) \
196 (BASENAME##2, 0, (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
197
198#define STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
199 STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
200 VSTORE(N0) \
201 (BASENAME##3, 0, (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
202
203#define STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
204 STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
205 VSTORE(N0) \
206 (BASENAME##4, 0, (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
207
208#define STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
209 STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
210 VSTORE(N0) \
211 (BASENAME##5, 0, (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
212
213#define STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
214 STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
215 VSTORE(N0) \
216 (BASENAME##6, 0, (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
217
218#define STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
219 STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
220 VSTORE(N0) \
221 (BASENAME##7, 0, (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
222
223#define STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
224 STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
225 VSTORE(N0) \
226 (BASENAME##8, 0, (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
227
228#define STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
229 STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
230 VSTORE(N0) \
231 (BASENAME##9, 0, (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
232
233#define STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
234 STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
235 VSTORE(N0) \
236 (BASENAME##A, 0, (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
237
238#define STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
239 STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
240 VSTORE(N0) \
241 (BASENAME##B, 0, (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
242
243#define STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
244 STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
245 VSTORE(N0) \
246 (BASENAME##C, 0, (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
247
248#define STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
249 STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
250 VSTORE(N0) \
251 (BASENAME##D, 0, (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
252
253#define STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
254 STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
255 VSTORE(N0) \
256 (BASENAME##E, 0, (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
257
258#define STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
259 STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
260 VSTORE(N0) \
261 (BASENAME##F, 0, (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
262
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100263// CONVERT_STORE_ROW_n macros
264#define CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
265 VSTORE(N0) \
266 (CONVERT_SAT((BASENAME##0), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
267
268#define CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
269 CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
270 VSTORE(N0) \
271 (CONVERT_SAT((BASENAME##1), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
272
273#define CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
274 CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
275 VSTORE(N0) \
276 (CONVERT_SAT((BASENAME##2), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
277
278#define CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
279 CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
280 VSTORE(N0) \
281 (CONVERT_SAT((BASENAME##3), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
282
283#define CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
284 CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
285 VSTORE(N0) \
286 (CONVERT_SAT((BASENAME##4), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
287
288#define CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
289 CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
290 VSTORE(N0) \
291 (CONVERT_SAT((BASENAME##5), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
292
293#define CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
294 CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
295 VSTORE(N0) \
296 (CONVERT_SAT((BASENAME##6), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
297
298#define CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
299 CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
300 VSTORE(N0) \
301 (CONVERT_SAT((BASENAME##7), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
302
303#define CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
304 CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
305 VSTORE(N0) \
306 (CONVERT_SAT((BASENAME##8), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
307
308#define CONVERT_STORE_ROW_10(N0, DATA, BASENAME, PTR, STRIDE_Y, Z) \
309 CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
310 VSTORE(N0) \
311 (CONVERT_SAT((BASENAME##9), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
312
313#define CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
314 CONVERT_STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
315 VSTORE(N0) \
316 (CONVERT_SAT((BASENAME##A), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
317
318#define CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
319 CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
320 VSTORE(N0) \
321 (CONVERT_SAT((BASENAME##B), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
322
323#define CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
324 CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
325 VSTORE(N0) \
326 (CONVERT_SAT((BASENAME##C), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
327
328#define CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
329 CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
330 VSTORE(N0) \
331 (CONVERT_SAT((BASENAME##D), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
332
333#define CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
334 CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
335 VSTORE(N0) \
336 (CONVERT_SAT((BASENAME##E), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
337
338#define CONVERT_STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
339 CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
340 VSTORE(N0) \
341 (CONVERT_SAT((BASENAME##F), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
342
Usama Arif0681e3b2019-04-25 14:28:07 +0100343// STORE_ROW_n stores the rows 0..n-1 from variables BASENAME##0 to BASENAME##(n-1)
344#define STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100345
346// CONVERT_STORE_ROW_n converts and stores the rows 0..n-1 from variables BASENAME##0 to BASENAME##(n-1)
347#define CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) CONVERT_STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
348
349/** Store a block of size M0 (rows) x NO (columns).
350 * Supported cases M0=1,2,3..16. N0=2,3,4,8,16, for variables BASENAME[0..M]
351 * The data to store is expected to have consecutive names for each row, For e.g. For M0=3, and basename=c, the expected data is c0, c1 and c2.
352 * The Z offset is expected to have consecutive names For e.g. For M0=3, and Z=zin, the expected z offsets are zin0, zin1 and zin2.
Usama Arif0681e3b2019-04-25 14:28:07 +0100353 */
354#define STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
355
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100356/** Convert and store a block of size M0 (rows) x NO (columns).
357 * Supported cases M0=1,2,3..16. N0=2,3,4,8,16, for variables BASENAME[0..M]
358 * The data to store is expected to have consecutive names for each row, For e.g. For M0=3, and basename=c, the expected data is c0, c1 and c2.
359 * The Z offset is expected to have consecutive names For e.g. For M0=3, and Z=zin, the expected z offsets are zin0, zin1 and zin2.
360 */
361#define CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
362
Usama Arif0681e3b2019-04-25 14:28:07 +0100363#define SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100364 BASENAME##0 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100365
366#define SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \
367 SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100368 BASENAME##1 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100369
370#define SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \
371 SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100372 BASENAME##2 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100373
374#define SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \
375 SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100376 BASENAME##3 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100377
378#define SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \
379 SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100380 BASENAME##4 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100381
382#define SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \
383 SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100384 BASENAME##5 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100385
386#define SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \
387 SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100388 BASENAME##6 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100389
390#define SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \
391 SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100392 BASENAME##7 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100393
394#define SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \
395 SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100396 BASENAME##8 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100397
398#define SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \
399 SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100400 BASENAME##9 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100401
402#define SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \
403 SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100404 BASENAME##A *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100405
406#define SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \
407 SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100408 BASENAME##B *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100409
410#define SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \
411 SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100412 BASENAME##C *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100413
414#define SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \
415 SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100416 BASENAME##D *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100417
418#define SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \
419 SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100420 BASENAME##E *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100421
422#define SCALE_ROW_16(DATA_TYPE, BASENAME, SCALE) \
423 SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100424 BASENAME##F *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100425
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100426// SCALE_BLOCK_n scales the variables BASENAME##0 to BASENAME##(n-1) by SCALE
Usama Arif0681e3b2019-04-25 14:28:07 +0100427#define SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE) SCALE_ROW_##N(DATA_TYPE, BASENAME, SCALE)
428/** Scale elements stored in variables BASENAME##0 to BASENAME##(N-1) by SCALE
429 * Supported cases N=1,2,3..16, for variables BASENAME[0..N]
430 */
431#define SCALE_BLOCK(N, DATA_TYPE, BASENAME, SCALE) SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE)
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100432
433/** Given a set of vectors of size K0, these macros create a new vector to contain the values at index IDX_COL (with IDX_COL < N0) for all input vectors */
Gian Marco Iodicee7510622019-06-03 17:28:17 +0100434#define COLUMN_VECTOR1(IDX_COL, BASENAME, X) \
435 uchar BASENAME##IDX_COL = (uchar)((X##0).s##IDX_COL);
436#define COLUMN_VECTOR2(IDX_COL, BASENAME, X) \
437 uchar2 BASENAME##IDX_COL = (uchar2)((X##0).s##IDX_COL, (X##1).s##IDX_COL);
438#define COLUMN_VECTOR3(IDX_COL, BASENAME, X) \
439 uchar3 BASENAME##IDX_COL = (uchar3)((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL);
440#define COLUMN_VECTOR4(IDX_COL, BASENAME, X) \
441 uchar4 BASENAME##IDX_COL = (uchar4)((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL);
442#define COLUMN_VECTOR8(IDX_COL, BASENAME, X) \
443 uchar8 BASENAME##IDX_COL = (uchar8)((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL);
444#define COLUMN_VECTOR16(IDX_COL, BASENAME, X) \
445 uchar16 BASENAME##IDX_COL = (uchar16)((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL, (X##8).s##IDX_COL, (X##9).s##IDX_COL, (X##A).s##IDX_COL, (X##B).s##IDX_COL, (X##C).s##IDX_COL, (X##D).s##IDX_COL, (X##E).s##IDX_COL, (X##F).s##IDX_COL);
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100446
447/** Given N0 vectors of size K0, these macros create K0 vectors of size N0 which are the result of a transposition */
448#define TRANSPOSE_K0X1(K0, BASENAME, B) \
449 COLUMN_VECTOR(K0, 0, BASENAME, B);
450#define TRANSPOSE_K0X2(K0, BASENAME, B) \
451 TRANSPOSE_K0X1(K0, BASENAME, B); \
452 COLUMN_VECTOR(K0, 1, BASENAME, B);
453#define TRANSPOSE_K0X3(K0, BASENAME, B) \
454 TRANSPOSE_K0X2(K0, BASENAME, B); \
455 COLUMN_VECTOR(K0, 2, BASENAME, B);
456#define TRANSPOSE_K0X4(K0, BASENAME, B) \
457 TRANSPOSE_K0X3(K0, BASENAME, B); \
458 COLUMN_VECTOR(K0, 3, BASENAME, B);
459#define TRANSPOSE_K0X8(K0, BASENAME, B) \
460 TRANSPOSE_K0X4(K0, BASENAME, B); \
461 COLUMN_VECTOR(K0, 4, BASENAME, B); \
462 COLUMN_VECTOR(K0, 5, BASENAME, B); \
463 COLUMN_VECTOR(K0, 6, BASENAME, B); \
464 COLUMN_VECTOR(K0, 7, BASENAME, B);
465#define TRANSPOSE_K0X16(K0, BASENAME, B) \
466 TRANSPOSE_K0X8(K0, BASENAME, B); \
467 COLUMN_VECTOR(K0, 8, BASENAME, B); \
468 COLUMN_VECTOR(K0, 9, BASENAME, B); \
469 COLUMN_VECTOR(K0, A, BASENAME, B); \
470 COLUMN_VECTOR(K0, B, BASENAME, B); \
471 COLUMN_VECTOR(K0, C, BASENAME, B); \
472 COLUMN_VECTOR(K0, D, BASENAME, B); \
473 COLUMN_VECTOR(K0, E, BASENAME, B); \
474 COLUMN_VECTOR(K0, F, BASENAME, B);
475
476#define COLUMN_VECTOR(K0, IDX_COL, BASENAME, B) \
477 CONCAT(COLUMN_VECTOR, K0) \
478 (IDX_COL, BASENAME, B);
479
480#define TRANSPOSE_K0XN0(K0, N0, BASENAME, B) \
481 CONCAT(TRANSPOSE_K0X, N0) \
482 (K0, BASENAME, B);
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100483
484#define ADD_ROW_1(BASENAME, BIAS) \
485 BASENAME##0 += BIAS##0;
486
487#define ADD_ROW_2(BASENAME, BIAS) \
488 ADD_ROW_1(BASENAME, BIAS) \
489 BASENAME##1 += BIAS##1;
490
491#define ADD_ROW_3(BASENAME, BIAS) \
492 ADD_ROW_2(BASENAME, BIAS) \
493 BASENAME##2 += BIAS##2;
494
495#define ADD_ROW_4(BASENAME, BIAS) \
496 ADD_ROW_3(BASENAME, BIAS) \
497 BASENAME##3 += BIAS##3;
498
499#define ADD_ROW_5(BASENAME, BIAS) \
500 ADD_ROW_4(BASENAME, BIAS) \
501 BASENAME##4 += BIAS##4;
502
503#define ADD_ROW_6(BASENAME, BIAS) \
504 ADD_ROW_5(BASENAME, BIAS) \
505 BASENAME##5 += BIAS##5;
506
507#define ADD_ROW_7(BASENAME, BIAS) \
508 ADD_ROW_6(BASENAME, BIAS) \
509 BASENAME##6 += BIAS##6;
510
511#define ADD_ROW_8(BASENAME, BIAS) \
512 ADD_ROW_7(BASENAME, BIAS) \
513 BASENAME##7 += BIAS##7;
514
515#define ADD_ROW_9(BASENAME, BIAS) \
516 ADD_ROW_8(BASENAME, BIAS) \
517 BASENAME##8 += BIAS##8;
518
519#define ADD_ROW_10(BASENAME, BIAS) \
520 ADD_ROW_9(BASENAME, BIAS) \
521 BASENAME##9 += BIAS##9;
522
523#define ADD_ROW_11(BASENAME, BIAS) \
524 ADD_ROW_10(BASENAME, BIAS) \
525 BASENAME##A += BIAS##A;
526
527#define ADD_ROW_12(BASENAME, BIAS) \
528 ADD_ROW_11(BASENAME, BIAS) \
529 BASENAME##B += BIAS##B;
530
531#define ADD_ROW_13(BASENAME, BIAS) \
532 ADD_ROW_12(BASENAME, BIAS) \
533 BASENAME##C += BIAS##C;
534
535#define ADD_ROW_14(BASENAME, BIAS) \
536 ADD_ROW_13(BASENAME, BIAS) \
537 BASENAME##D += BIAS##D;
538
539#define ADD_ROW_15(BASENAME, BIAS) \
540 ADD_ROW_14(BASENAME, BIAS) \
541 BASENAME##E += BIAS##E;
542
543#define ADD_ROW_16(BASENAME, BIAS) \
544 ADD_ROW_15(BASENAME, BIAS) \
545 BASENAME##F += BIAS##F;
546
547// ADD_ROW_n add the variables BIAS##0... BIAS##(n-1) to BASENAME##0 to BASENAME##(n-1)
548#define ADD_BLOCK_STR(N, BASENAME, BIAS) ADD_ROW_##N(BASENAME, BIAS)
549/** Add BIAS to BASENAME##0 ... BASENAME##(N-1)
550 * Supported cases N=1,2,3..16, for variables BASENAME[0..N]
551 */
552#define ADD_BLOCK(N, BASENAME, BIAS) ADD_BLOCK_STR(N, BASENAME, BIAS)
553
554#define ADD_ROW_BROADCAST_1(BASENAME, BIAS) \
555 BASENAME##0 += BIAS;
556
557#define ADD_ROW_BROADCAST_2(BASENAME, BIAS) \
558 ADD_ROW_BROADCAST_1(BASENAME, BIAS) \
559 BASENAME##1 += BIAS;
560
561#define ADD_ROW_BROADCAST_3(BASENAME, BIAS) \
562 ADD_ROW_BROADCAST_2(BASENAME, BIAS) \
563 BASENAME##2 += BIAS;
564
565#define ADD_ROW_BROADCAST_4(BASENAME, BIAS) \
566 ADD_ROW_BROADCAST_3(BASENAME, BIAS) \
567 BASENAME##3 += BIAS;
568
569#define ADD_ROW_BROADCAST_5(BASENAME, BIAS) \
570 ADD_ROW_BROADCAST_4(BASENAME, BIAS) \
571 BASENAME##4 += BIAS;
572
573#define ADD_ROW_BROADCAST_6(BASENAME, BIAS) \
574 ADD_ROW_BROADCAST_5(BASENAME, BIAS) \
575 BASENAME##5 += BIAS;
576
577#define ADD_ROW_BROADCAST_7(BASENAME, BIAS) \
578 ADD_ROW_BROADCAST_6(BASENAME, BIAS) \
579 BASENAME##6 += BIAS;
580
581#define ADD_ROW_BROADCAST_8(BASENAME, BIAS) \
582 ADD_ROW_BROADCAST_7(BASENAME, BIAS) \
583 BASENAME##7 += BIAS;
584
585#define ADD_ROW_BROADCAST_9(BASENAME, BIAS) \
586 ADD_ROW_BROADCAST_8(BASENAME, BIAS) \
587 BASENAME##8 += BIAS;
588
589#define ADD_ROW_BROADCAST_10(BASENAME, BIAS) \
590 ADD_ROW_BROADCAST_9(BASENAME, BIAS) \
591 BASENAME##9 += BIAS;
592
593#define ADD_ROW_BROADCAST_11(BASENAME, BIAS) \
594 ADD_ROW_BROADCAST_10(BASENAME, BIAS) \
595 BASENAME##A += BIAS;
596
597#define ADD_ROW_BROADCAST_12(BASENAME, BIAS) \
598 ADD_ROW_BROADCAST_11(BASENAME, BIAS) \
599 BASENAME##B += BIAS;
600
601#define ADD_ROW_BROADCAST_13(BASENAME, BIAS) \
602 ADD_ROW_BROADCAST_12(BASENAME, BIAS) \
603 BASENAME##C += BIAS;
604
605#define ADD_ROW_BROADCAST_14(BASENAME, BIAS) \
606 ADD_ROW_BROADCAST_13(BASENAME, BIAS) \
607 BASENAME##D += BIAS;
608
609#define ADD_ROW_BROADCAST_15(BASENAME, BIAS) \
610 ADD_ROW_BROADCAST_14(BASENAME, BIAS) \
611 BASENAME##E += BIAS;
612
613#define ADD_ROW_BROADCAST_16(BASENAME, BIAS) \
614 ADD_ROW_BROADCAST_15(BASENAME, BIAS) \
615 BASENAME##F += BIAS;
616
617// ADD_ROW_n add the variables BIAS to BASENAME##0 to BASENAME##(n-1)
618#define ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS) ADD_ROW_BROADCAST_##N(BASENAME, BIAS)
619/** Add elements stored in variables BIAS##0 ... BIAS##(N-1) to BASENAME##0 ... BASENAME##(N-1)
620 * Supported cases N=1,2,3..16, for variables BASENAME[0..N]
621 */
622#define ADD_BLOCK_BROADCAST(N, BASENAME, BIAS) ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS)
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +0100623
624#define ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
625 BASENAME##0 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##0, A_VAL, B_VAL);
626
627#define ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
628 ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
629 BASENAME##1 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##1, A_VAL, B_VAL);
630
631#define ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
632 ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
633 BASENAME##2 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##2, A_VAL, B_VAL);
634
635#define ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
636 ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
637 BASENAME##3 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##3, A_VAL, B_VAL);
638
639#define ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
640 ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
641 BASENAME##4 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##4, A_VAL, B_VAL);
642
643#define ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
644 ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
645 BASENAME##5 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##5, A_VAL, B_VAL);
646
647#define ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
648 ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
649 BASENAME##6 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##6, A_VAL, B_VAL);
650
651#define ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
652 ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
653 BASENAME##7 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##7, A_VAL, B_VAL);
654
655#define ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
656 ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
657 BASENAME##8 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##8, A_VAL, B_VAL);
658
659#define ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
660 ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
661 BASENAME##9 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##9, A_VAL, B_VAL);
662
663#define ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
664 ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
665 BASENAME##A = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##A, A_VAL, B_VAL);
666
667#define ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
668 ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
669 BASENAME##B = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##B, A_VAL, B_VAL);
670
671#define ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
672 ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
673 BASENAME##C = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##C, A_VAL, B_VAL);
674
675#define ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
676 ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
677 BASENAME##D = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##D, A_VAL, B_VAL);
678
679#define ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
680 ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
681 BASENAME##E = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##E, A_VAL, B_VAL);
682
683#define ACTIVATION_ROW_16(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
684 ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
685 BASENAME##F = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##F, A_VAL, B_VAL);
686
687// ACTIVATION_ROW_n apply activation to the variables BASENAME##0... BASENAME##(n-1)
688#define ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) ACTIVATION_ROW_##N(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
689/** Apply activation to the variables BASENAME##0... BASENAME##(n-1)
690 * Supported cases N=1,2,3..16, for variables BASENAME[0..N]
691 */
692#define ACTIVATION_BLOCK(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)