blob: 08e737d3b094d2896bfd7aca9d7c6e790c4b9bff [file] [log] [blame]
Usama Arif0681e3b2019-04-25 14:28:07 +01001/*
Michele Di Giorgiod9eaf612020-07-08 11:12:57 +01002 * Copyright (c) 2019-2020 Arm Limited.
Usama Arif0681e3b2019-04-25 14:28:07 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +010024#include "activation_float_helpers.h"
Usama Arif0681e3b2019-04-25 14:28:07 +010025#include "helpers.h"
26
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +000027/** Loads the rows from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1).
28 * @name LOAD_ROW_n
29 *
30 * @param[in] N0 The number of rows to load
31 * @param[in] DATA_TYPE The data type of variables
32 * @param[in] BASENAME The basename of the destination variables for the loaded rows
33 * @param[in] PTR The base pointer
34 * @param[in] OFFSET The offset within a row
35 * @param[in] STRIDE_Y The stride value in y-axis direction
36 * @param[in] Z The z-axis offset vector
37 * @{
38 */
Usama Arif0681e3b2019-04-25 14:28:07 +010039#define LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
40 VEC_DATA_TYPE(DATA_TYPE, N0) \
41 BASENAME##0 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y + Z##0));
42
43#define LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
44 LOAD_ROW_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
45 VEC_DATA_TYPE(DATA_TYPE, N0) \
46 BASENAME##1 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y + Z##1));
47
48#define LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
49 LOAD_ROW_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
50 VEC_DATA_TYPE(DATA_TYPE, N0) \
51 BASENAME##2 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y + Z##2));
52
53#define LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
54 LOAD_ROW_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
55 VEC_DATA_TYPE(DATA_TYPE, N0) \
56 BASENAME##3 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y + Z##3));
57
58#define LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
59 LOAD_ROW_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
60 VEC_DATA_TYPE(DATA_TYPE, N0) \
61 BASENAME##4 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y + Z##4));
62
63#define LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
64 LOAD_ROW_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
65 VEC_DATA_TYPE(DATA_TYPE, N0) \
66 BASENAME##5 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y + Z##5));
67
68#define LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
69 LOAD_ROW_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
70 VEC_DATA_TYPE(DATA_TYPE, N0) \
71 BASENAME##6 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y + Z##6));
72
73#define LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
74 LOAD_ROW_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
75 VEC_DATA_TYPE(DATA_TYPE, N0) \
76 BASENAME##7 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y + Z##7));
77
78#define LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
79 LOAD_ROW_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
80 VEC_DATA_TYPE(DATA_TYPE, N0) \
81 BASENAME##8 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y + Z##8));
82
83#define LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
84 LOAD_ROW_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
85 VEC_DATA_TYPE(DATA_TYPE, N0) \
86 BASENAME##9 = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y + Z##9));
87
88#define LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
89 LOAD_ROW_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
90 VEC_DATA_TYPE(DATA_TYPE, N0) \
91 BASENAME##A = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y + Z##A));
92
93#define LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
94 LOAD_ROW_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
95 VEC_DATA_TYPE(DATA_TYPE, N0) \
96 BASENAME##B = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y + Z##B));
97
98#define LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
99 LOAD_ROW_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
100 VEC_DATA_TYPE(DATA_TYPE, N0) \
101 BASENAME##C = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y + Z##C));
102
103#define LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
104 LOAD_ROW_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
105 VEC_DATA_TYPE(DATA_TYPE, N0) \
106 BASENAME##D = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y + Z##D));
107
108#define LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
109 LOAD_ROW_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
110 VEC_DATA_TYPE(DATA_TYPE, N0) \
111 BASENAME##E = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y + Z##E));
112
113#define LOAD_ROW_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
114 LOAD_ROW_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) \
115 VEC_DATA_TYPE(DATA_TYPE, N0) \
116 BASENAME##F = VLOAD(N0)(0, (__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y + Z##F));
117
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000118/** @}*/ // end of group LOAD_ROW_n
Usama Arif0681e3b2019-04-25 14:28:07 +0100119
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000120/** Load Blocks (consecutive rows and columns) with Z offset.
121 * @name LOAD_BLOCK
122 *
123 * Supported cases are M0=1,2,3,...,16 and N0=1,2,3,4,8,16
124 * The data to load is expected to have consecutive names for each row.
125 * E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2.
126 * The Z offset is expected to have consecutive names.
127 * E.g., for M0=3, and Z=zin, the expected Z offsets are zin0, zin1 and zin2.
128 *
129 * @param[in] M0 The number of consecutive rows
130 * @param[in] N0 The number of consecutive columns
131 * @param[in] DATA_TYPE The data type of the target
132 * @param[in] BASENAME The basename of the result variables
133 * @param[in] PTR The base pointer for the data
134 * @param[in] OFFSET The offset within a row
135 * @param[in] STRIDE_Y The stride in y-axis direction
136 * @param[in] Z The z-axis offset vector
137 * @{
138 */
139#define LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) LOAD_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
140#define LOAD_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z) LOAD_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y, Z)
141/** @} */ // end of group LOAD_BLOCK
142
Gian Marco Iodicee3a849a2020-06-10 17:59:30 +0100143/** Loads the rows from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1).
144 * @name LOAD_TEXTURE2D_ROW_n
145 *
146 * @param[in] N0 The number of pixels to read
147 * @param[in] DATA_TYPE The data type of variables
148 * @param[in] BASENAME The basename of the destination variables for the loaded rows
149 * @param[in] IMG The 2D OpenCL image object
150 * @param[in] X_COORD The x coordinate for the top-left pixel
151 * @param[in] Y_COORD The y coordinate for the top-left pixel
152 * @param[in] X_STEP_ROW The incremental step row for the x coordinate (in pixels)
153 * @param[in] Y_STEP_ROW The incremental step row for the y coordinate (in pixels)
154 * @{
155 */
156#define LOAD_TEXTURE2D_ROW_1(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
157 BASENAME##0 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 0 * X_STEP_ROW), (Y_COORD + 0 * Y_STEP_ROW))
158
159#define LOAD_TEXTURE2D_ROW_2(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
160 LOAD_TEXTURE2D_ROW_1(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
161 BASENAME##1 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 1 * X_STEP_ROW), (Y_COORD + 1 * Y_STEP_ROW))
162
163#define LOAD_TEXTURE2D_ROW_3(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
164 LOAD_TEXTURE2D_ROW_2(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
165 BASENAME##2 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 2 * X_STEP_ROW), (Y_COORD + 2 * Y_STEP_ROW))
166
167#define LOAD_TEXTURE2D_ROW_4(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
168 LOAD_TEXTURE2D_ROW_3(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
169 BASENAME##3 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 3 * X_STEP_ROW), (Y_COORD + 3 * Y_STEP_ROW))
170
171#define LOAD_TEXTURE2D_ROW_5(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
172 LOAD_TEXTURE2D_ROW_4(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
173 BASENAME##4 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 4 * X_STEP_ROW), (Y_COORD + 4 * Y_STEP_ROW))
174
175#define LOAD_TEXTURE2D_ROW_6(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
176 LOAD_TEXTURE2D_ROW_5(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
177 BASENAME##5 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 5 * X_STEP_ROW), (Y_COORD + 5 * Y_STEP_ROW))
178
179#define LOAD_TEXTURE2D_ROW_7(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
180 LOAD_TEXTURE2D_ROW_6(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
181 BASENAME##6 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 6 * X_STEP_ROW), (Y_COORD + 6 * Y_STEP_ROW))
182
183#define LOAD_TEXTURE2D_ROW_8(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
184 LOAD_TEXTURE2D_ROW_7(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
185 BASENAME##7 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 7 * X_STEP_ROW), (Y_COORD + 7 * Y_STEP_ROW))
186
187#define LOAD_TEXTURE2D_ROW_9(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
188 LOAD_TEXTURE2D_ROW_8(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
189 BASENAME##8 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 8 * X_STEP_ROW), (Y_COORD + 8 * Y_STEP_ROW))
190
191#define LOAD_TEXTURE2D_ROW_10(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
192 LOAD_TEXTURE2D_ROW_9(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
193 BASENAME##9 = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 9 * X_STEP_ROW), (Y_COORD + 9 * Y_STEP_ROW))
194
195#define LOAD_TEXTURE2D_ROW_11(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
196 LOAD_TEXTURE2D_ROW_10(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
197 BASENAME##A = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 10 * X_STEP_ROW), (Y_COORD + 10 * Y_STEP_ROW))
198
199#define LOAD_TEXTURE2D_ROW_12(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
200 LOAD_TEXTURE2D_ROW_11(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
201 BASENAME##B = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 11 * X_STEP_ROW), (Y_COORD + 11 * Y_STEP_ROW))
202
203#define LOAD_TEXTURE2D_ROW_13(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
204 LOAD_TEXTURE2D_ROW_12(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
205 BASENAME##C = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 12 * X_STEP_ROW), (Y_COORD + 12 * Y_STEP_ROW))
206
207#define LOAD_TEXTURE2D_ROW_14(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
208 LOAD_TEXTURE2D_ROW_13(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
209 BASENAME##D = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 13 * X_STEP_ROW), (Y_COORD + 13 * Y_STEP_ROW))
210
211#define LOAD_TEXTURE2D_ROW_15(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
212 LOAD_TEXTURE2D_ROW_14(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
213 BASENAME##E = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 14 * X_STEP_ROW), (Y_COORD + 14 * Y_STEP_ROW))
214
215#define LOAD_TEXTURE2D_ROW_16(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
216 LOAD_TEXTURE2D_ROW_15(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) \
217 BASENAME##F = READ_IMAGE2D(DATA_TYPE, N0, IMG, (X_COORD + 15 * X_STEP_ROW), (Y_COORD + 15 * Y_STEP_ROW))
218/** @} */ // end of group LOAD_TEXTURE2D_ROW_n
219
220/** Load a 2D texture in unit of pixel. A pixel is made of 4 floating point values
221 * @name LOAD_TEXTURE2D
222 *
223 * Supported cases are M0=1,2,3,...,16 and N0=1
224 * The data to load is expected to have consecutive names for each row.
225 * E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2.
226 *
227 * @param[in] M0 The number of consecutive rows
228 * @param[in] N0 The number of consecutive pixels. Only 1, 2 and 4 are supported
229 * @param[in] DATA_TYPE The data type of the target
230 * @param[in] BASENAME The basename of the result variables
231 * @param[in] IMG The 2D OpenCL image object
232 * @param[in] X_COORD The x coordinate for the top-left pixel
233 * @param[in] Y_COORD The y coordinate for the top-left pixel
234 * @param[in] X_STEP_ROW The incremental step row for the x coordinate (in pixels)
235 * @param[in] Y_STEP_ROW The incremental step row for the y coordinate (in pixels)
236 * @{
237 */
238#define LOAD_TEXTURE2D_STR(M0, N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) LOAD_TEXTURE2D_ROW_##M0(N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
239#define LOAD_TEXTURE2D(M0, N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW) LOAD_TEXTURE2D_STR(M0, N0, DATA_TYPE, BASENAME, IMG, X_COORD, Y_COORD, X_STEP_ROW, Y_STEP_ROW)
240/** @} */ // end of group LOAD_TEXTURE2D
241
Michele Di Giorgiob54ba282020-01-14 15:31:55 +0000242/** Loads the elements from 0 to n-1 in the given variables (BASENAME0 to BASENAMEn-1).
243 * @name LOAD_ELEMENT_n
244 *
245 * @param[in] N0 The number of rows to load
246 * @param[in] DATA_TYPE The data type of variables
247 * @param[in] BASENAME The basename of the destination variables for the loaded rows
248 * @param[in] PTR The base pointer
249 * @param[in] OFFSET The offset within a row
250 * @param[in] STRIDE_Y The stride value in y-axis direction
251 * @{
252 */
253#define LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
254 VEC_DATA_TYPE(DATA_TYPE, N0) \
255 BASENAME##0 = *((__global DATA_TYPE *)(PTR + OFFSET + 0 * STRIDE_Y));
256
257#define LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
258 LOAD_ELEMENT_1(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
259 VEC_DATA_TYPE(DATA_TYPE, N0) \
260 BASENAME##1 = *((__global DATA_TYPE *)(PTR + OFFSET + 1 * STRIDE_Y));
261
262#define LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
263 LOAD_ELEMENT_2(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
264 VEC_DATA_TYPE(DATA_TYPE, N0) \
265 BASENAME##2 = *((__global DATA_TYPE *)(PTR + OFFSET + 2 * STRIDE_Y));
266
267#define LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
268 LOAD_ELEMENT_3(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
269 VEC_DATA_TYPE(DATA_TYPE, N0) \
270 BASENAME##3 = *((__global DATA_TYPE *)(PTR + OFFSET + 3 * STRIDE_Y));
271
272#define LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
273 LOAD_ELEMENT_4(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
274 VEC_DATA_TYPE(DATA_TYPE, N0) \
275 BASENAME##4 = *((__global DATA_TYPE *)(PTR + OFFSET + 4 * STRIDE_Y));
276
277#define LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
278 LOAD_ELEMENT_5(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
279 VEC_DATA_TYPE(DATA_TYPE, N0) \
280 BASENAME##5 = *((__global DATA_TYPE *)(PTR + OFFSET + 5 * STRIDE_Y));
281
282#define LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
283 LOAD_ELEMENT_6(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
284 VEC_DATA_TYPE(DATA_TYPE, N0) \
285 BASENAME##6 = *((__global DATA_TYPE *)(PTR + OFFSET + 6 * STRIDE_Y));
286
287#define LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
288 LOAD_ELEMENT_7(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
289 VEC_DATA_TYPE(DATA_TYPE, N0) \
290 BASENAME##7 = *((__global DATA_TYPE *)(PTR + OFFSET + 7 * STRIDE_Y));
291
292#define LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
293 LOAD_ELEMENT_8(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
294 VEC_DATA_TYPE(DATA_TYPE, N0) \
295 BASENAME##8 = *((__global DATA_TYPE *)(PTR + OFFSET + 8 * STRIDE_Y));
296
297#define LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
298 LOAD_ELEMENT_9(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
299 VEC_DATA_TYPE(DATA_TYPE, N0) \
300 BASENAME##9 = *((__global DATA_TYPE *)(PTR + OFFSET + 9 * STRIDE_Y));
301
302#define LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
303 LOAD_ELEMENT_10(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
304 VEC_DATA_TYPE(DATA_TYPE, N0) \
305 BASENAME##A = *((__global DATA_TYPE *)(PTR + OFFSET + 10 * STRIDE_Y));
306
307#define LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
308 LOAD_ELEMENT_11(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
309 VEC_DATA_TYPE(DATA_TYPE, N0) \
310 BASENAME##B = *((__global DATA_TYPE *)(PTR + OFFSET + 11 * STRIDE_Y));
311
312#define LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
313 LOAD_ELEMENT_12(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
314 VEC_DATA_TYPE(DATA_TYPE, N0) \
315 BASENAME##C = *((__global DATA_TYPE *)(PTR + OFFSET + 12 * STRIDE_Y));
316
317#define LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
318 LOAD_ELEMENT_13(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
319 VEC_DATA_TYPE(DATA_TYPE, N0) \
320 BASENAME##D = *((__global DATA_TYPE *)(PTR + OFFSET + 13 * STRIDE_Y));
321
322#define LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
323 LOAD_ELEMENT_14(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
324 VEC_DATA_TYPE(DATA_TYPE, N0) \
325 BASENAME##E = *((__global DATA_TYPE *)(PTR + OFFSET + 14 * STRIDE_Y));
326
327#define LOAD_ELEMENT_16(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
328 LOAD_ELEMENT_15(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) \
329 VEC_DATA_TYPE(DATA_TYPE, N0) \
330 BASENAME##F = *((__global DATA_TYPE *)(PTR + OFFSET + 15 * STRIDE_Y));
331
332/** @}*/ // end of group LOAD_ELEMENT_n
333
334/** Load Scalar as Vector (consecutive elements).
335 * @name LOAD_SCALAR_AS_VECTOR
336 *
337 * Supported cases are M0=1,2,3,...,16 and N0=1,2,3,4,8,16
338 * The data to load is expected to have consecutive names for each row.
339 * E.g., for M0=3, and BASENAME=c, the expected data is c0, c1 and c2.
340 *
341 * @param[in] M0 The number of consecutive rows
342 * @param[in] N0 The number of consecutive columns
343 * @param[in] DATA_TYPE The data type of the target
344 * @param[in] BASENAME The basename of the result variables
345 * @param[in] PTR The base pointer for the data
346 * @param[in] OFFSET The offset within a row
347 * @param[in] STRIDE_Y The stride in y-axis direction
348 * @{
349 */
350#define LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) LOAD_ELEMENT_##M0(N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
351#define LOAD_SCALAR_AS_VECTOR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y) LOAD_SCALAR_AS_VECTOR_STR(M0, N0, DATA_TYPE, BASENAME, PTR, OFFSET, STRIDE_Y)
352/** @} */ // end of group LOAD_SCALAR_AS_VECTOR
353
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000354/** Basic macros to calculate Z offset values from Z0 to Zn-1
355 * @name CALCULATE_Z_OFFSET_n
356 *
357 * @param[in] M0 The number of offset values to calculate
358 * @param[in] DATA_TYPE The data type of the results
359 * @param[in] Z The basename of the result variables
360 * @param[in] Y The work-itme ID of y-axis
361 * @param[in] HEIGHT_GEMM3D The height of GEMM3D
362 * @param[in] DEPTH_GEMM3D The depth of GEMM3D
363 * @param[in] CROSS_PLANE_PAD The padding required for plane changes accross the z-dimension
364 * @param[in] STRIDE_Y The stride value in y-axis direction
365 *
366 * @{
367 */
Usama Arif0681e3b2019-04-25 14:28:07 +0100368#define CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100369 Z##0 = (0 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
370 Z##0 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##0); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100371 Z##0 *= (CROSS_PLANE_PAD * STRIDE_Y);
372
373#define CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
374 CALCULATE_Z_OFFSET_1(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100375 Z##1 = (1 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
376 Z##1 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##1); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100377 Z##1 *= (CROSS_PLANE_PAD * STRIDE_Y);
378
379#define CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
380 CALCULATE_Z_OFFSET_2(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100381 Z##2 = (2 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
382 Z##2 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##2); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100383 Z##2 *= (CROSS_PLANE_PAD * STRIDE_Y);
384
385#define CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
386 CALCULATE_Z_OFFSET_3(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100387 Z##3 = (3 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
388 Z##3 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##3); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100389 Z##3 *= (CROSS_PLANE_PAD * STRIDE_Y);
390
391#define CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
392 CALCULATE_Z_OFFSET_4(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100393 Z##4 = (4 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
394 Z##4 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##4); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100395 Z##4 *= (CROSS_PLANE_PAD * STRIDE_Y);
396
397#define CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
398 CALCULATE_Z_OFFSET_5(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100399 Z##5 = (5 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
400 Z##5 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##5); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100401 Z##5 *= (CROSS_PLANE_PAD * STRIDE_Y);
402
403#define CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
404 CALCULATE_Z_OFFSET_6(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100405 Z##6 = (6 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
406 Z##6 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##6); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100407 Z##6 *= (CROSS_PLANE_PAD * STRIDE_Y);
408
409#define CALCULATE_Z_OFFSET_8(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
410 CALCULATE_Z_OFFSET_7(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) \
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100411 Z##7 = (7 + (DATA_TYPE)(Y * (DATA_TYPE)M0)) / (DATA_TYPE)HEIGHT_GEMM3D; \
412 Z##7 = min((DATA_TYPE)(DEPTH_GEMM3D - 1), Z##7); \
Usama Arif0681e3b2019-04-25 14:28:07 +0100413 Z##7 *= (CROSS_PLANE_PAD * STRIDE_Y);
414
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000415/** @} */ // end of group CALCULATE_Z_OFFSET_n
Usama Arif0681e3b2019-04-25 14:28:07 +0100416
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000417/** Calculate Z offset values from Z0 to Zn-1
418 * @name CALCULATE_Z_OFFSET
419 *
420 * The Z offsets are expected to have consecutive names.
421 * E.g., for M0=3 and Z=zin, the expected names of Z offsets are zin1, zin2, zin3.
422 * Note that, CROSS_PLANE_PAD (cross plain padding) is required to take into account
423 * the possible cross plane paddings in case of the plance changes across the z-dimension.
424 *
425 * <!--
426 * | |
427 * | plane0 |
428 * | |
429 * |__________________|
430 * |******************|
431 * | cross_plane_pad |
432 * |******************|
433 * | |
434 * | plane1 |
435 * | |
436 * |__________________|
437 * -->
438 *
439 * @param[in] M0 The number of offset values to calculate
440 * @param[in] DATA_TYPE The data type of the results
441 * @param[in] Z The basename of the result variables
442 * @param[in] Y The work-itme ID of y-axis
443 * @param[in] HEIGHT_GEMM3D The height of GEMM3D
444 * @param[in] DEPTH_GEMM3D The depth of GEMM3D
445 * @param[in] CROSS_PLANE_PAD The padding required for plane changes accross the z-dimension
446 * @param[in] STRIDE_Y The stride value in y-axis direction
447 * @{
448 */
449#define CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) CALCULATE_Z_OFFSET_##M0(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
450#define CALCULATE_Z_OFFSET(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y) CALCULATE_Z_OFFSET_STR(M0, DATA_TYPE, Z, Y, HEIGHT_GEMM3D, DEPTH_GEMM3D, CROSS_PLANE_PAD, STRIDE_Y)
451/** @} */ // end of group CALCULATE_Z_OFFSET
452
453/** Store the 0 to (n-1)th rows of the given variables
454 * @name STORE_ROW_n
455 *
456 * @param[in] N0 The size of the vectors
457 * @param[in] DATA_TYPE The data type of the vectors
458 * @param[in] BASENAME The basename of the variables
459 * @param[in] PTR The base pointer
460 * @param[in] STRIDE_Y The stride value in y-axis direction
461 * @param[in] Z The offset in z-axis direction
462 * @{
463 */
Usama Arif0681e3b2019-04-25 14:28:07 +0100464#define STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
465 VSTORE(N0) \
466 (BASENAME##0, 0, (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
467
468#define STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
469 STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
470 VSTORE(N0) \
471 (BASENAME##1, 0, (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
472
473#define STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
474 STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
475 VSTORE(N0) \
476 (BASENAME##2, 0, (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
477
478#define STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
479 STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
480 VSTORE(N0) \
481 (BASENAME##3, 0, (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
482
483#define STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
484 STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
485 VSTORE(N0) \
486 (BASENAME##4, 0, (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
487
488#define STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
489 STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
490 VSTORE(N0) \
491 (BASENAME##5, 0, (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
492
493#define STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
494 STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
495 VSTORE(N0) \
496 (BASENAME##6, 0, (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
497
498#define STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
499 STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
500 VSTORE(N0) \
501 (BASENAME##7, 0, (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
502
503#define STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
504 STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
505 VSTORE(N0) \
506 (BASENAME##8, 0, (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
507
508#define STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
509 STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
510 VSTORE(N0) \
511 (BASENAME##9, 0, (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
512
513#define STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
514 STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
515 VSTORE(N0) \
516 (BASENAME##A, 0, (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
517
518#define STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
519 STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
520 VSTORE(N0) \
521 (BASENAME##B, 0, (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
522
523#define STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
524 STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
525 VSTORE(N0) \
526 (BASENAME##C, 0, (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
527
528#define STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
529 STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
530 VSTORE(N0) \
531 (BASENAME##D, 0, (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
532
533#define STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
534 STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
535 VSTORE(N0) \
536 (BASENAME##E, 0, (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
537
538#define STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
539 STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
540 VSTORE(N0) \
541 (BASENAME##F, 0, (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000542/** @} */ // end of groupd STORE_ROW_n
Usama Arif0681e3b2019-04-25 14:28:07 +0100543
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000544/** Convert and store the 0th to (n-1)th rows of the given variables
545 * @name CONVERT_STORE_ROW_n
546 *
547 * @param[in] N0 The size of the vectors
548 * @param[in] DATA_TYPE The data type of the vectors
549 * @param[in] BASENAME The basename of the variables
550 * @param[in] PTR The base pointer
551 * @param[in] STRIDE_Y The stride value in y-axis direction
552 * @param[in] Z The offset in z-axis direction
553 * @{
554 */
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100555#define CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
556 VSTORE(N0) \
557 (CONVERT_SAT((BASENAME##0), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 0 * STRIDE_Y + Z##0));
558
559#define CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
560 CONVERT_STORE_ROW_1(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
561 VSTORE(N0) \
562 (CONVERT_SAT((BASENAME##1), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 1 * STRIDE_Y + Z##1));
563
564#define CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
565 CONVERT_STORE_ROW_2(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
566 VSTORE(N0) \
567 (CONVERT_SAT((BASENAME##2), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 2 * STRIDE_Y + Z##2));
568
569#define CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
570 CONVERT_STORE_ROW_3(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
571 VSTORE(N0) \
572 (CONVERT_SAT((BASENAME##3), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 3 * STRIDE_Y + Z##3));
573
574#define CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
575 CONVERT_STORE_ROW_4(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
576 VSTORE(N0) \
577 (CONVERT_SAT((BASENAME##4), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 4 * STRIDE_Y + Z##4));
578
579#define CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
580 CONVERT_STORE_ROW_5(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
581 VSTORE(N0) \
582 (CONVERT_SAT((BASENAME##5), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 5 * STRIDE_Y + Z##5));
583
584#define CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
585 CONVERT_STORE_ROW_6(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
586 VSTORE(N0) \
587 (CONVERT_SAT((BASENAME##6), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 6 * STRIDE_Y + Z##6));
588
589#define CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
590 CONVERT_STORE_ROW_7(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
591 VSTORE(N0) \
592 (CONVERT_SAT((BASENAME##7), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 7 * STRIDE_Y + Z##7));
593
594#define CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
595 CONVERT_STORE_ROW_8(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
596 VSTORE(N0) \
597 (CONVERT_SAT((BASENAME##8), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 8 * STRIDE_Y + Z##8));
598
599#define CONVERT_STORE_ROW_10(N0, DATA, BASENAME, PTR, STRIDE_Y, Z) \
600 CONVERT_STORE_ROW_9(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
601 VSTORE(N0) \
602 (CONVERT_SAT((BASENAME##9), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 9 * STRIDE_Y + Z##9));
603
604#define CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
605 CONVERT_STORE_ROW_10(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
606 VSTORE(N0) \
607 (CONVERT_SAT((BASENAME##A), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 10 * STRIDE_Y + Z##A));
608
609#define CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
610 CONVERT_STORE_ROW_11(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
611 VSTORE(N0) \
612 (CONVERT_SAT((BASENAME##B), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 11 * STRIDE_Y + Z##B));
613
614#define CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
615 CONVERT_STORE_ROW_12(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
616 VSTORE(N0) \
617 (CONVERT_SAT((BASENAME##C), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 12 * STRIDE_Y + Z##C));
618
619#define CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
620 CONVERT_STORE_ROW_13(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
621 VSTORE(N0) \
622 (CONVERT_SAT((BASENAME##D), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 13 * STRIDE_Y + Z##D));
623
624#define CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
625 CONVERT_STORE_ROW_14(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
626 VSTORE(N0) \
627 (CONVERT_SAT((BASENAME##E), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 14 * STRIDE_Y + Z##E));
628
629#define CONVERT_STORE_ROW_16(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
630 CONVERT_STORE_ROW_15(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) \
631 VSTORE(N0) \
632 (CONVERT_SAT((BASENAME##F), VEC_DATA_TYPE(DATA_TYPE, N0)), 0, (__global DATA_TYPE *)(PTR + 15 * STRIDE_Y + Z##F));
633
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000634/** @} */ // end of groupd CONVERT_STORE_ROW_n
635
636/** Store a block of the given size M0xN0
637 * @name STORE_BLOCK
638 *
639 * Supported cases are M0=1,2,3,...,16 and N0=2,3,4,8,16.
640 * The data to store is expected to have consecutive names for each row.
641 * E.g., for M0=3 and basename=c, the expected names are c0, c1 and c2.
642 * The Z offset is expected to have consecutive names.
643 * E.g., for M0=3 and Z=zin, the expected z offset names are zin0, zin1 and zin2.
644 *
645 * @param[in] M0 The number of rows to store
646 * @param[in] N0 The size of each vector
647 * @param[in] DATA_TYPE The data type of the vectors
648 * @param[in] BASENAME The basename of the variables
649 * @param[in] PTR The base pointer
650 * @param[in] STRIDE_Y The stride value in y-axis direction
651 * @param[in] Z The offset in z-axis direction
652 * @{
653 */
Usama Arif0681e3b2019-04-25 14:28:07 +0100654#define STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
Usama Arif0681e3b2019-04-25 14:28:07 +0100655#define STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000656/** @} */ // end of group STORE_BLOCK
Usama Arif0681e3b2019-04-25 14:28:07 +0100657
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000658/** Convert and store a block of the given size M0xN0
659 * @name CONVERT_STORE_BLOCK
660 *
661 * Supported cases are M0=1,2,3,...,16 and N0=2,3,4,8,16.
662 * The data to store is expected to have consecutive names for each row.
663 * E.g., for M0=3 and basename=c, the expected names are c0, c1 and c2.
664 * The Z offset is expected to have consecutive names.
665 * E.g., for M0=3 and Z=zin, the expected z offset names are zin0, zin1 and zin2.
666 *
667 * @param[in] M0 The number of rows to store
668 * @param[in] N0 The size of each vector
669 * @param[in] DATA_TYPE The data type of the vectors
670 * @param[in] BASENAME The basename of the variables
671 * @param[in] PTR The base pointer
672 * @param[in] STRIDE_Y The stride value in y-axis direction
673 * @param[in] Z The offset in z-axis direction
674 * @{
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100675 */
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000676#define CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) CONVERT_STORE_ROW_##M0(N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100677#define CONVERT_STORE_BLOCK(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z) CONVERT_STORE_BLOCK_STR(M0, N0, DATA_TYPE, BASENAME, PTR, STRIDE_Y, Z)
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000678/** @} */ // end of group CONVERT_STORE_BLOCK
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100679
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000680/** Scale the rows in the given variables (BASENAME0 to BASENAMEn-1)
681 * @name SCALE_ROW_n
682 *
683 * @param[in] DATA_TYPE The data type of the variables
684 * @param[in] BASENAME The basename of the variables
685 * @param[in] SCALE The scale factor
686 * @{
687 */
Usama Arif0681e3b2019-04-25 14:28:07 +0100688#define SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100689 BASENAME##0 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100690
691#define SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \
692 SCALE_ROW_1(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100693 BASENAME##1 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100694
695#define SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \
696 SCALE_ROW_2(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100697 BASENAME##2 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100698
699#define SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \
700 SCALE_ROW_3(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100701 BASENAME##3 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100702
703#define SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \
704 SCALE_ROW_4(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100705 BASENAME##4 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100706
707#define SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \
708 SCALE_ROW_5(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100709 BASENAME##5 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100710
711#define SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \
712 SCALE_ROW_6(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100713 BASENAME##6 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100714
715#define SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \
716 SCALE_ROW_7(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100717 BASENAME##7 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100718
719#define SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \
720 SCALE_ROW_8(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100721 BASENAME##8 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100722
723#define SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \
724 SCALE_ROW_9(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100725 BASENAME##9 *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100726
727#define SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \
728 SCALE_ROW_10(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100729 BASENAME##A *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100730
731#define SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \
732 SCALE_ROW_11(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100733 BASENAME##B *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100734
735#define SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \
736 SCALE_ROW_12(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100737 BASENAME##C *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100738
739#define SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \
740 SCALE_ROW_13(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100741 BASENAME##D *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100742
743#define SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \
744 SCALE_ROW_14(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100745 BASENAME##E *= (DATA_TYPE)SCALE;
Usama Arif0681e3b2019-04-25 14:28:07 +0100746
747#define SCALE_ROW_16(DATA_TYPE, BASENAME, SCALE) \
748 SCALE_ROW_15(DATA_TYPE, BASENAME, SCALE) \
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100749 BASENAME##F *= (DATA_TYPE)SCALE;
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000750/** @} */ // end of group SCALE_ROW_n
Usama Arif0681e3b2019-04-25 14:28:07 +0100751
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000752/** Scale elements stored in a block (BASENAME)
753 * @name SCALE_BLOCK
754 *
755 * Supported cases are N=1,2,3,...,16
756 *
757 * @param[in] N The number of rows in the block
758 * @param[in] DATA_TYPE The data type of the block
759 * @param[in] BASENAME The basename of the block
760 * @param[in] SCALE The scale factor
761 * @{
Usama Arif0681e3b2019-04-25 14:28:07 +0100762 */
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000763#define SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE) SCALE_ROW_##N(DATA_TYPE, BASENAME, SCALE)
Usama Arif0681e3b2019-04-25 14:28:07 +0100764#define SCALE_BLOCK(N, DATA_TYPE, BASENAME, SCALE) SCALE_BLOCK_STR(N, DATA_TYPE, BASENAME, SCALE)
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000765/** @} */ // end of group SCALE_BLOCK
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100766
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000767/** Create a new vector containing the values at the given index for a set of given vectors
768 * @name COLUMN_VECTORn
769 *
770 * @param[in] IDX_COL The index value
771 * @param[in] BASENAME The basename of the destination vectors
772 * @param[in] X The basename of the source vectors
Michele Di Giorgiof9179d32019-11-27 16:17:30 +0000773 * @param[in] TYPE The data type of the destination vectors
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000774 * @{
775 */
Michele Di Giorgiof9179d32019-11-27 16:17:30 +0000776#define COLUMN_VECTOR1(IDX_COL, BASENAME, X, TYPE) \
777 TYPE BASENAME##IDX_COL = (TYPE)((X##0).s##IDX_COL);
778#define COLUMN_VECTOR2(IDX_COL, BASENAME, X, TYPE) \
779 VEC_DATA_TYPE(TYPE, 2) \
780 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0).s##IDX_COL, (X##1).s##IDX_COL);
781#define COLUMN_VECTOR3(IDX_COL, BASENAME, X, TYPE) \
782 VEC_DATA_TYPE(TYPE, 3) \
783 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 3))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL);
784#define COLUMN_VECTOR4(IDX_COL, BASENAME, X, TYPE) \
785 VEC_DATA_TYPE(TYPE, 4) \
786 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL);
787#define COLUMN_VECTOR8(IDX_COL, BASENAME, X, TYPE) \
788 VEC_DATA_TYPE(TYPE, 8) \
789 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 8))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL);
790#define COLUMN_VECTOR16(IDX_COL, BASENAME, X, TYPE) \
791 VEC_DATA_TYPE(TYPE, 16) \
792 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 16))((X##0).s##IDX_COL, (X##1).s##IDX_COL, (X##2).s##IDX_COL, (X##3).s##IDX_COL, (X##4).s##IDX_COL, (X##5).s##IDX_COL, (X##6).s##IDX_COL, (X##7).s##IDX_COL, (X##8).s##IDX_COL, (X##9).s##IDX_COL, (X##A).s##IDX_COL, (X##B).s##IDX_COL, (X##C).s##IDX_COL, (X##D).s##IDX_COL, (X##E).s##IDX_COL, (X##F).s##IDX_COL);
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000793/** @} */ // end of group COLUMN_VECTORn
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100794
Gian Marco Iodice061eefd2020-04-23 13:40:00 +0100795/** Create a new vector containing the values at the given index. Utility macros for transposing a colum-vector
796 * @name COLUMN_VECTOR_SCALARn
797 *
798 * @param[in] IDX_COL The index value
799 * @param[in] BASENAME The basename of the destination vectors
800 * @param[in] X The basename of the source vectors
801 * @param[in] TYPE The data type of the destination vectors
802 * @{
803 */
804#define COLUMN_VECTOR_SCALAR1(IDX_COL, BASENAME, X, TYPE) \
805 TYPE BASENAME##IDX_COL = (TYPE)((X##0));
806#define COLUMN_VECTOR_SCALAR2(IDX_COL, BASENAME, X, TYPE) \
807 VEC_DATA_TYPE(TYPE, 2) \
808 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 2))((X##0), (X##1));
809#define COLUMN_VECTOR_SCALAR3(IDX_COL, BASENAME, X, TYPE) \
810 VEC_DATA_TYPE(TYPE, 3) \
811 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 3))((X##0), (X##1), (X##2));
812#define COLUMN_VECTOR_SCALAR4(IDX_COL, BASENAME, X, TYPE) \
813 VEC_DATA_TYPE(TYPE, 4) \
814 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 4))((X##0), (X##1), (X##2), (X##3));
815#define COLUMN_VECTOR_SCALAR8(IDX_COL, BASENAME, X, TYPE) \
816 VEC_DATA_TYPE(TYPE, 8) \
817 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 8))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7));
818#define COLUMN_VECTOR_SCALAR16(IDX_COL, BASENAME, X, TYPE) \
819 VEC_DATA_TYPE(TYPE, 16) \
820 BASENAME##IDX_COL = (VEC_DATA_TYPE(TYPE, 16))((X##0), (X##1), (X##2), (X##3), (X##4), (X##5), (X##6), (X##7), (X##8), (X##9), (X##A), (X##B), (X##C), (X##D), (X##E), (X##F));
821/** @} */ // end of group COLUMN_VECTORn
822
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000823/** Create transposed vectors of the given vectors
824 * @name TRANSPOSE_K0Xn
825 *
826 * @param[in] K0 The size of the source vectors
827 * @param[in] BASENAME The basename of transposed vectors
828 * @param[in] B The basename of source vectors for transposition
Michele Di Giorgiof9179d32019-11-27 16:17:30 +0000829 * @param[in] TYPE The data type of the transposed vectors
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000830 * @{
831 */
Michele Di Giorgiof9179d32019-11-27 16:17:30 +0000832#define TRANSPOSE_K0X1(K0, BASENAME, B, TYPE) \
Gian Marco Iodice061eefd2020-04-23 13:40:00 +0100833 COLUMN_VECTOR_SCALAR(K0, 0, BASENAME, B, TYPE);
Michele Di Giorgiof9179d32019-11-27 16:17:30 +0000834#define TRANSPOSE_K0X2(K0, BASENAME, B, TYPE) \
Gian Marco Iodice061eefd2020-04-23 13:40:00 +0100835 COLUMN_VECTOR(K0, 0, BASENAME, B, TYPE); \
Michele Di Giorgiof9179d32019-11-27 16:17:30 +0000836 COLUMN_VECTOR(K0, 1, BASENAME, B, TYPE);
837#define TRANSPOSE_K0X3(K0, BASENAME, B, TYPE) \
838 TRANSPOSE_K0X2(K0, BASENAME, B, TYPE); \
839 COLUMN_VECTOR(K0, 2, BASENAME, B, TYPE);
840#define TRANSPOSE_K0X4(K0, BASENAME, B, TYPE) \
841 TRANSPOSE_K0X3(K0, BASENAME, B, TYPE); \
842 COLUMN_VECTOR(K0, 3, BASENAME, B, TYPE);
843#define TRANSPOSE_K0X8(K0, BASENAME, B, TYPE) \
844 TRANSPOSE_K0X4(K0, BASENAME, B, TYPE); \
845 COLUMN_VECTOR(K0, 4, BASENAME, B, TYPE); \
846 COLUMN_VECTOR(K0, 5, BASENAME, B, TYPE); \
847 COLUMN_VECTOR(K0, 6, BASENAME, B, TYPE); \
848 COLUMN_VECTOR(K0, 7, BASENAME, B, TYPE);
849#define TRANSPOSE_K0X16(K0, BASENAME, B, TYPE) \
850 TRANSPOSE_K0X8(K0, BASENAME, B, TYPE); \
851 COLUMN_VECTOR(K0, 8, BASENAME, B, TYPE); \
852 COLUMN_VECTOR(K0, 9, BASENAME, B, TYPE); \
853 COLUMN_VECTOR(K0, A, BASENAME, B, TYPE); \
854 COLUMN_VECTOR(K0, B, BASENAME, B, TYPE); \
855 COLUMN_VECTOR(K0, C, BASENAME, B, TYPE); \
856 COLUMN_VECTOR(K0, D, BASENAME, B, TYPE); \
857 COLUMN_VECTOR(K0, E, BASENAME, B, TYPE); \
858 COLUMN_VECTOR(K0, F, BASENAME, B, TYPE);
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100859
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000860/** @} */ // end of group TRANSPOSE_K0Xn
861
862/** Create column vectors to contain the values at the given index for a set of given vectors
863 *
864 * @param[in] K0 The number of source vectors
865 * @param[in] IDX_COL The index value
866 * @param[in] BASENAME The basename of the destination vectors
867 * @param[in] B The basename of the source vectors
Michele Di Giorgiof9179d32019-11-27 16:17:30 +0000868 * @param[in] TYPE The data type of the destination vectors
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000869 */
Michele Di Giorgiof9179d32019-11-27 16:17:30 +0000870#define COLUMN_VECTOR(K0, IDX_COL, BASENAME, B, TYPE) \
871 CONCAT(COLUMN_VECTOR, K0) \
872 (IDX_COL, BASENAME, B, TYPE);
Gian Marco Iodice43a129e2019-05-14 10:14:08 +0100873
Gian Marco Iodice061eefd2020-04-23 13:40:00 +0100874/** Create column vectors to contain the values at the given index. Utility macro for transposing a column-vector
875 *
876 * @param[in] K0 The number of source vectors
877 * @param[in] IDX_COL The index value
878 * @param[in] BASENAME The basename of the destination vectors
879 * @param[in] B The basename of the source vectors
880 * @param[in] TYPE The data type of the destination vectors
881 */
882#define COLUMN_VECTOR_SCALAR(K0, IDX_COL, BASENAME, B, TYPE) \
883 CONCAT(COLUMN_VECTOR_SCALAR, K0) \
884 (IDX_COL, BASENAME, B, TYPE);
885
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000886/** Create transposed vectors form the given source vectors
887 *
888 * @param[in] K0 The size of source vectors
889 * @param[in] N0 The number of source vectors
890 * @param[in] BASENAME The basename of transposed vectors
891 * @param[in] B The basename of source vectors for transposition
Michele Di Giorgiof9179d32019-11-27 16:17:30 +0000892 * @param[in] TYPE The data type of the transposed vectors
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000893 *
894 */
Michele Di Giorgiof9179d32019-11-27 16:17:30 +0000895#define TRANSPOSE_K0XN0(K0, N0, BASENAME, B, TYPE) \
896 CONCAT(TRANSPOSE_K0X, N0) \
897 (K0, BASENAME, B, TYPE);
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100898
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000899/** Add the variables (BIAS0 to BIASn-1) to the others (BASENAME0 to BASENAMEn-1)
900 * @name ADD_ROW_n
901 *
902 * @param[in] BASENAME The basename of the destination variables
903 * @param[in] BIAS The basename of the added variables
904 * @{
905 */
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100906#define ADD_ROW_1(BASENAME, BIAS) \
907 BASENAME##0 += BIAS##0;
908
909#define ADD_ROW_2(BASENAME, BIAS) \
910 ADD_ROW_1(BASENAME, BIAS) \
911 BASENAME##1 += BIAS##1;
912
913#define ADD_ROW_3(BASENAME, BIAS) \
914 ADD_ROW_2(BASENAME, BIAS) \
915 BASENAME##2 += BIAS##2;
916
917#define ADD_ROW_4(BASENAME, BIAS) \
918 ADD_ROW_3(BASENAME, BIAS) \
919 BASENAME##3 += BIAS##3;
920
921#define ADD_ROW_5(BASENAME, BIAS) \
922 ADD_ROW_4(BASENAME, BIAS) \
923 BASENAME##4 += BIAS##4;
924
925#define ADD_ROW_6(BASENAME, BIAS) \
926 ADD_ROW_5(BASENAME, BIAS) \
927 BASENAME##5 += BIAS##5;
928
929#define ADD_ROW_7(BASENAME, BIAS) \
930 ADD_ROW_6(BASENAME, BIAS) \
931 BASENAME##6 += BIAS##6;
932
933#define ADD_ROW_8(BASENAME, BIAS) \
934 ADD_ROW_7(BASENAME, BIAS) \
935 BASENAME##7 += BIAS##7;
936
937#define ADD_ROW_9(BASENAME, BIAS) \
938 ADD_ROW_8(BASENAME, BIAS) \
939 BASENAME##8 += BIAS##8;
940
941#define ADD_ROW_10(BASENAME, BIAS) \
942 ADD_ROW_9(BASENAME, BIAS) \
943 BASENAME##9 += BIAS##9;
944
945#define ADD_ROW_11(BASENAME, BIAS) \
946 ADD_ROW_10(BASENAME, BIAS) \
947 BASENAME##A += BIAS##A;
948
949#define ADD_ROW_12(BASENAME, BIAS) \
950 ADD_ROW_11(BASENAME, BIAS) \
951 BASENAME##B += BIAS##B;
952
953#define ADD_ROW_13(BASENAME, BIAS) \
954 ADD_ROW_12(BASENAME, BIAS) \
955 BASENAME##C += BIAS##C;
956
957#define ADD_ROW_14(BASENAME, BIAS) \
958 ADD_ROW_13(BASENAME, BIAS) \
959 BASENAME##D += BIAS##D;
960
961#define ADD_ROW_15(BASENAME, BIAS) \
962 ADD_ROW_14(BASENAME, BIAS) \
963 BASENAME##E += BIAS##E;
964
965#define ADD_ROW_16(BASENAME, BIAS) \
966 ADD_ROW_15(BASENAME, BIAS) \
967 BASENAME##F += BIAS##F;
968
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000969/** @} */ // end of group ADD_ROW_n
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100970
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +0000971/** Add the block (BIAS) to another block (BASENAME)
972 * @name ADD_BLOCK
973 *
974 * Supported cases are N=1,2,3,...,16
975 *
976 * @param[in] N The number of vectors in the block
977 * @param[in] BASENAME The basename of the destination variables
978 * @param[in] BIAS The basename of the added variables
979 * @{
980 */
981#define ADD_BLOCK_STR(N, BASENAME, BIAS) ADD_ROW_##N(BASENAME, BIAS)
982#define ADD_BLOCK(N, BASENAME, BIAS) ADD_BLOCK_STR(N, BASENAME, BIAS)
983/** @} */ // end of group ADD_BLOCK
984
985/** Broadcast (add single value) to the each element of the destination variables
986 * @name ADD_ROW_BROADCAST_n
987 *
988 * @param[in] BASENAME The basename of the destination variables
989 * @param[in] BIAS The variable containing the value to add
990 * @{
991 */
Georgios Pinitasb0f342e2019-05-21 13:32:43 +0100992#define ADD_ROW_BROADCAST_1(BASENAME, BIAS) \
993 BASENAME##0 += BIAS;
994
995#define ADD_ROW_BROADCAST_2(BASENAME, BIAS) \
996 ADD_ROW_BROADCAST_1(BASENAME, BIAS) \
997 BASENAME##1 += BIAS;
998
999#define ADD_ROW_BROADCAST_3(BASENAME, BIAS) \
1000 ADD_ROW_BROADCAST_2(BASENAME, BIAS) \
1001 BASENAME##2 += BIAS;
1002
1003#define ADD_ROW_BROADCAST_4(BASENAME, BIAS) \
1004 ADD_ROW_BROADCAST_3(BASENAME, BIAS) \
1005 BASENAME##3 += BIAS;
1006
1007#define ADD_ROW_BROADCAST_5(BASENAME, BIAS) \
1008 ADD_ROW_BROADCAST_4(BASENAME, BIAS) \
1009 BASENAME##4 += BIAS;
1010
1011#define ADD_ROW_BROADCAST_6(BASENAME, BIAS) \
1012 ADD_ROW_BROADCAST_5(BASENAME, BIAS) \
1013 BASENAME##5 += BIAS;
1014
1015#define ADD_ROW_BROADCAST_7(BASENAME, BIAS) \
1016 ADD_ROW_BROADCAST_6(BASENAME, BIAS) \
1017 BASENAME##6 += BIAS;
1018
1019#define ADD_ROW_BROADCAST_8(BASENAME, BIAS) \
1020 ADD_ROW_BROADCAST_7(BASENAME, BIAS) \
1021 BASENAME##7 += BIAS;
1022
1023#define ADD_ROW_BROADCAST_9(BASENAME, BIAS) \
1024 ADD_ROW_BROADCAST_8(BASENAME, BIAS) \
1025 BASENAME##8 += BIAS;
1026
1027#define ADD_ROW_BROADCAST_10(BASENAME, BIAS) \
1028 ADD_ROW_BROADCAST_9(BASENAME, BIAS) \
1029 BASENAME##9 += BIAS;
1030
1031#define ADD_ROW_BROADCAST_11(BASENAME, BIAS) \
1032 ADD_ROW_BROADCAST_10(BASENAME, BIAS) \
1033 BASENAME##A += BIAS;
1034
1035#define ADD_ROW_BROADCAST_12(BASENAME, BIAS) \
1036 ADD_ROW_BROADCAST_11(BASENAME, BIAS) \
1037 BASENAME##B += BIAS;
1038
1039#define ADD_ROW_BROADCAST_13(BASENAME, BIAS) \
1040 ADD_ROW_BROADCAST_12(BASENAME, BIAS) \
1041 BASENAME##C += BIAS;
1042
1043#define ADD_ROW_BROADCAST_14(BASENAME, BIAS) \
1044 ADD_ROW_BROADCAST_13(BASENAME, BIAS) \
1045 BASENAME##D += BIAS;
1046
1047#define ADD_ROW_BROADCAST_15(BASENAME, BIAS) \
1048 ADD_ROW_BROADCAST_14(BASENAME, BIAS) \
1049 BASENAME##E += BIAS;
1050
1051#define ADD_ROW_BROADCAST_16(BASENAME, BIAS) \
1052 ADD_ROW_BROADCAST_15(BASENAME, BIAS) \
1053 BASENAME##F += BIAS;
1054
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +00001055/** Broadcast (add a value) to the each element of the destination block (BASENAME)
1056 * @name ADD_BLOCK_BROADCAST
1057 *
1058 * Supported cases are N=1,2,3,...,16.
1059 *
1060 * @param[in] N The number of vectors in the block
1061 * @param[in] BASENAME The basename of the destination variables
1062 * @param[in] BIAS The variable containing the value to add
1063 * @{
Georgios Pinitasb0f342e2019-05-21 13:32:43 +01001064 */
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +00001065#define ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS) ADD_ROW_BROADCAST_##N(BASENAME, BIAS)
Georgios Pinitasb0f342e2019-05-21 13:32:43 +01001066#define ADD_BLOCK_BROADCAST(N, BASENAME, BIAS) ADD_BLOCK_BROADCAST_STR(N, BASENAME, BIAS)
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +00001067/** @} */ // end of group ADD_BLOCK_BROADCAST
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001068
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +00001069/** Apply activation to the given variables
1070 * @name ACTIVATION_ROW_n
1071 *
1072 * @param[in] ACTIVATION_TYPE The type of the activation
1073 * @param[in] DATA_TYPE The data type of the vectors
1074 * @param[in] BASENAME The basename of the variables
1075 * @param[in] A_VAL Additional value required by the activation
1076 * @param[in] B_VAL Additional value required by the activation
1077 * @{
1078 */
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001079#define ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1080 BASENAME##0 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##0, A_VAL, B_VAL);
1081
1082#define ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1083 ACTIVATION_ROW_1(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1084 BASENAME##1 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##1, A_VAL, B_VAL);
1085
1086#define ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1087 ACTIVATION_ROW_2(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1088 BASENAME##2 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##2, A_VAL, B_VAL);
1089
1090#define ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1091 ACTIVATION_ROW_3(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1092 BASENAME##3 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##3, A_VAL, B_VAL);
1093
1094#define ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1095 ACTIVATION_ROW_4(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1096 BASENAME##4 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##4, A_VAL, B_VAL);
1097
1098#define ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1099 ACTIVATION_ROW_5(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1100 BASENAME##5 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##5, A_VAL, B_VAL);
1101
1102#define ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1103 ACTIVATION_ROW_6(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1104 BASENAME##6 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##6, A_VAL, B_VAL);
1105
1106#define ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1107 ACTIVATION_ROW_7(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1108 BASENAME##7 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##7, A_VAL, B_VAL);
1109
1110#define ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1111 ACTIVATION_ROW_8(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1112 BASENAME##8 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##8, A_VAL, B_VAL);
1113
1114#define ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1115 ACTIVATION_ROW_9(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1116 BASENAME##9 = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##9, A_VAL, B_VAL);
1117
1118#define ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1119 ACTIVATION_ROW_10(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1120 BASENAME##A = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##A, A_VAL, B_VAL);
1121
1122#define ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1123 ACTIVATION_ROW_11(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1124 BASENAME##B = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##B, A_VAL, B_VAL);
1125
1126#define ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1127 ACTIVATION_ROW_12(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1128 BASENAME##C = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##C, A_VAL, B_VAL);
1129
1130#define ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1131 ACTIVATION_ROW_13(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1132 BASENAME##D = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##D, A_VAL, B_VAL);
1133
1134#define ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1135 ACTIVATION_ROW_14(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1136 BASENAME##E = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##E, A_VAL, B_VAL);
1137
1138#define ACTIVATION_ROW_16(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1139 ACTIVATION_ROW_15(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) \
1140 BASENAME##F = ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, BASENAME##F, A_VAL, B_VAL);
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +00001141/** @} */ // end of group ACTIVATION_ROW_n
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001142
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +00001143/** Apply activation to a block (BASENAME)
1144 * @name ACTIVATION_BLOCK
1145 *
1146 * Supported cases are N=1,2,3,...,16.
1147 *
1148 * @param[in] N The number of vectors in the block
1149 * @param[in] ACTIVATION_TYPE The type of the activation
1150 * @param[in] DATA_TYPE The data type of the vectors
1151 * @param[in] BASENAME The basename of the variables
1152 * @param[in] A_VAL Additional value required by the activation
1153 * @param[in] B_VAL Additional value required by the activation
1154 * @{
Gian Marco Iodiceca1f4602019-07-16 15:46:48 +01001155 */
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +00001156#define ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) ACTIVATION_ROW_##N(ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +01001157#define ACTIVATION_BLOCK(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL) ACTIVATION_BLOCK_STR(N, ACTIVATION_TYPE, DATA_TYPE, BASENAME, A_VAL, B_VAL)
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +00001158/** @} */ // end of group ACTIVATION_BLOCK
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +01001159
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +00001160/** Apply convert_<data_type> to the given variables
1161 * @name CONVERT_ROW_n
1162 *
1163 * @param[in] N The size of the vectors
1164 * @param[in] DATA_TYPE The data type of the vectors
1165 * @param[in] BASENAME_SRC The basename of the source variables
1166 * @param[in] BASENAME_DST The basename of the destination variables
1167 */
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +01001168#define CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1169 VEC_DATA_TYPE(DATA_TYPE, N) \
1170 BASENAME_DST##0 = CONVERT(BASENAME_SRC##0, VEC_DATA_TYPE(DATA_TYPE, N));
1171
1172#define CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1173 CONVERT_ROW_1(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1174 VEC_DATA_TYPE(DATA_TYPE, N) \
1175 BASENAME_DST##1 = CONVERT(BASENAME_SRC##1, VEC_DATA_TYPE(DATA_TYPE, N));
1176
1177#define CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1178 CONVERT_ROW_2(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1179 VEC_DATA_TYPE(DATA_TYPE, N) \
1180 BASENAME_DST##2 = CONVERT(BASENAME_SRC##2, VEC_DATA_TYPE(DATA_TYPE, N));
1181
1182#define CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1183 CONVERT_ROW_3(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1184 VEC_DATA_TYPE(DATA_TYPE, N) \
1185 BASENAME_DST##3 = CONVERT(BASENAME_SRC##3, VEC_DATA_TYPE(DATA_TYPE, N));
1186
1187#define CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1188 CONVERT_ROW_4(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1189 VEC_DATA_TYPE(DATA_TYPE, N) \
1190 BASENAME_DST##4 = CONVERT(BASENAME_SRC##4, VEC_DATA_TYPE(DATA_TYPE, N));
1191
1192#define CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1193 CONVERT_ROW_5(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1194 VEC_DATA_TYPE(DATA_TYPE, N) \
1195 BASENAME_DST##5 = CONVERT(BASENAME_SRC##5, VEC_DATA_TYPE(DATA_TYPE, N));
1196
1197#define CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1198 CONVERT_ROW_6(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1199 VEC_DATA_TYPE(DATA_TYPE, N) \
1200 BASENAME_DST##6 = CONVERT(BASENAME_SRC##6, VEC_DATA_TYPE(DATA_TYPE, N));
1201
1202#define CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1203 CONVERT_ROW_7(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1204 VEC_DATA_TYPE(DATA_TYPE, N) \
1205 BASENAME_DST##7 = CONVERT(BASENAME_SRC##7, VEC_DATA_TYPE(DATA_TYPE, N));
1206
1207#define CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1208 CONVERT_ROW_8(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1209 VEC_DATA_TYPE(DATA_TYPE, N) \
1210 BASENAME_DST##8 = CONVERT(BASENAME_SRC##8, VEC_DATA_TYPE(DATA_TYPE, N));
1211
1212#define CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1213 CONVERT_ROW_9(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1214 VEC_DATA_TYPE(DATA_TYPE, N) \
1215 BASENAME_DST##9 = CONVERT(BASENAME_SRC##9, VEC_DATA_TYPE(DATA_TYPE, N));
1216
1217#define CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1218 CONVERT_ROW_10(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1219 VEC_DATA_TYPE(DATA_TYPE, N) \
1220 BASENAME_DST##A = CONVERT(BASENAME_SRC##A, VEC_DATA_TYPE(DATA_TYPE, N));
1221
1222#define CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1223 CONVERT_ROW_11(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1224 VEC_DATA_TYPE(DATA_TYPE, N) \
1225 BASENAME_DST##B = CONVERT(BASENAME_SRC##B, VEC_DATA_TYPE(DATA_TYPE, N));
1226
1227#define CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1228 CONVERT_ROW_12(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1229 VEC_DATA_TYPE(DATA_TYPE, N) \
1230 BASENAME_DST##C = CONVERT(BASENAME_SRC##C, VEC_DATA_TYPE(DATA_TYPE, N));
1231
1232#define CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1233 CONVERT_ROW_13(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1234 VEC_DATA_TYPE(DATA_TYPE, N) \
1235 BASENAME_DST##D = CONVERT(BASENAME_SRC##D, VEC_DATA_TYPE(DATA_TYPE, N));
1236
1237#define CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1238 CONVERT_ROW_14(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1239 VEC_DATA_TYPE(DATA_TYPE, N) \
1240 BASENAME_DST##E = CONVERT(BASENAME_SRC##E, VEC_DATA_TYPE(DATA_TYPE, N));
1241
1242#define CONVERT_ROW_16(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1243 CONVERT_ROW_15(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) \
1244 VEC_DATA_TYPE(DATA_TYPE, N) \
1245 BASENAME_DST##F = CONVERT(BASENAME_SRC##F, VEC_DATA_TYPE(DATA_TYPE, N));
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +00001246/** @} */ // end of group CONVERT_ROW_n
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +01001247
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +00001248/** Apply convert_<data_type> to a block (BASENAME_SRC) and save to another block (BASENAME_DST)
1249 * @name CONVERT_BLOCK
1250 *
1251 * Supported cases N=1,2,3,...,16.
1252 *
1253 * @param[in] M The number of vectors to convert
1254 * @param[in] N The size of the vectors
1255 * @param[in] DATA_TYPE The data type of the vectors
1256 * @param[in] BASENAME_SRC The basename of the source variables
1257 * @param[in] BASENAME_DST The basename of the destination variables
Gian Marco Iodice0c17aa22019-09-27 09:23:15 +01001258 */
Sang-Hoon Park11b0b8a2019-11-05 13:29:19 +00001259#define CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) CONVERT_ROW_##M(N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
1260#define CONVERT_BLOCK(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST) CONVERT_BLOCK_STR(M, N, DATA_TYPE, BASENAME_SRC, BASENAME_DST)
1261/** @} */ // end of group CONVERT_BLOCK