blob: eb9d7198a9fd5cf0aa4007207d35201189dcbba5 [file] [log] [blame]
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
Viet-Hoa Doce3c48c2023-07-03 13:44:43 +010025#ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H
26#define CKW_PROTOTYPE_SRC_PROTOTYPE_H
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010027
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010028#include <algorithm>
29#include <array>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010030#include <cassert> // assert (to be removed)
31#include <chrono>
32#include <cmath>
33#include <cstdint> // int32_t
Nikolaj Jensen5ff48022023-06-27 14:13:24 +010034#include <functional>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010035#include <iostream> // cout (to be removed)
36#include <map>
37#include <memory>
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010038#include <stdexcept>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010039#include <string>
40#include <unordered_map>
41#include <vector>
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010042
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010043#include "ckw/Error.h"
Nikolaj Jensenacea4072023-07-03 09:44:42 +010044#include "ckw/TensorInfo.h"
Nikolaj Jensen5ff48022023-06-27 14:13:24 +010045#include "ckw/types/ConvertPolicy.h"
46#include "ckw/types/DataType.h"
47#include "ckw/types/Functions.h"
48#include "ckw/types/GpuTargetLanguage.h"
49#include "ckw/types/Operators.h"
50#include "ckw/types/TensorSamplerTypes.h"
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010051
52namespace ckw
53{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010054namespace prototype
55{
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010056
57// Dummy data structure for Size2D
58using Size2D = std::vector<int32_t>;
59
60// Dummy Status
61using Status = void;
62
63enum class ComponentType : int32_t
64{
65 Complex = 0,
66 Simple = 1,
67 Unfusable = 2
68};
69
70enum class GpuCompilationSpeed
71{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010072 Fast = 0x00, // fast compilation may increase the latency of the network
73 Slow = 0x01 // slow compilation may decrease the latency of the network
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010074};
75
76enum class GpuExtensions
77{
78 Fp16,
79 Dot8,
80 Mmul,
81 FastMath
82};
83
84struct TensorInfo
85{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010086 TensorShape shape{ { 0 } };
87 DataType data_type{ DataType::Unknown };
88 TensorDataLayout data_layout{ TensorDataLayout::Nhwc };
89 int32_t id{ -1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010090};
91
92struct ComponentAttribute
93{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010094 GpuCompilationSpeed compilation_speed{ GpuCompilationSpeed::Fast };
95 bool overwrite_tile{ true };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010096};
97
98inline std::string data_type_to_cl_type(DataType dt)
99{
100 switch(dt)
101 {
102 case DataType::Fp32:
103 return "float";
104 case DataType::Fp16:
105 return "half";
106 case DataType::Int8:
107 return "char";
108 case DataType::Uint8:
109 return "uchar";
110 case DataType::Uint16:
111 return "ushort";
112 case DataType::Int16:
113 return "short";
114 case DataType::Uint32:
115 return "uint";
116 case DataType::Int32:
117 return "int";
118 case DataType::Bool:
119 return "bool";
120 default:
121 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100122 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100123 }
124}
125
126inline int32_t width_to_cl_vector_size(int32_t width)
127{
128 switch(width)
129 {
130 case 1:
131 return 1;
132 case 2:
133 return 2;
134 case 3:
135 return 3;
136 case 4:
137 return 4;
138 case 5:
139 case 6:
140 case 7:
141 case 8:
142 return 8;
143 case 9:
144 case 10:
145 case 11:
146 case 12:
147 case 13:
148 case 14:
149 case 15:
150 case 16:
151 return 16;
152 default:
153 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100154 return 0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100155 }
156}
157
158inline std::string get_cl_data_type(DataType dt, int32_t width)
159{
160 std::string data_type;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100161 int32_t w = width_to_cl_vector_size(width);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100162 data_type += data_type_to_cl_type(dt);
163 if(w != 1)
164 {
165 data_type += std::to_string(w);
166 }
167 return data_type;
168}
169
170inline std::string to_opencl_store(int32_t vector_length)
171{
172 if(vector_length != 1)
173 {
174 return "vstore" + std::to_string(vector_length) + "(";
175 }
176 else
177 {
178 return "*(";
179 }
180}
181
182struct TileInfo
183{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100184 TileInfo()
185 {
186 }
187
188 TileInfo(DataType dt)
189 : dt(dt), w(1), h(1)
190 {
191 }
192
193 TileInfo(DataType dt, int32_t width)
194 : dt(dt), w(width), h(1)
195 {
196 }
197
198 TileInfo(DataType dt, int32_t width, int32_t height)
199 : dt(dt), w(width), h(height)
200 {
201 }
202
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100203 DataType dt{ DataType::Unknown }; // Data type of the tile
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100204 int32_t w{ 0 }; // Width (i.e. c0 - portion of the channels)
205 int32_t h{ 0 }; // Height (i.e. s0 - portion of the spatial dimensions)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100206};
207
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100208inline std::ostream &operator<<(std::ostream &o, const TileInfo &a)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100209{
210 o << a.w << " x " << a.h;
211 return o;
212}
213
214struct DataTypeAsString
215{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100216 std::string str{ "" };
217 DataType dt{ DataType::Unknown };
218 int32_t size{ 1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100219};
220
221struct ValueAsString
222{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100223 std::string str{ "" };
224 DataTypeAsString type{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100225};
226
227// https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c
228// A Tile is a collection of variables used to express a 2D data.
229class IScalarTile
230{
231public:
232 virtual ~IScalarTile() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100233
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100234 /** Method to get the scalar variable from a tile
235 * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
236 * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
237 *
238 * @return the scalar variable as a string
239 */
240 virtual ValueAsString scalar(int32_t x, int32_t y) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100241
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100242 /** Method to get the list of underlying variable names used by the tile
243 *
244 * @return the list of variable names
245 */
246 virtual std::vector<ValueAsString> underlying_source_variables() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100247
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100248 /** Method to get the name of the tile.
249 *
250 * @return the name of the tile
251 */
252 std::string name() const
253 {
254 return _basename;
255 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100256
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100257 /** Method to get the tile format
258 *
259 * @return the format
260 */
261 TileInfo format() const
262 {
263 return _format;
264 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100265
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100266 /** Method to know whether the tile is assignable or not (constant)
267 *
268 * @return true if the tile is assignable
269 */
270 virtual bool is_assignable() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100271
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100272 /** Method to know whether the tile needs to be declared
273 *
274 * @return true if the tile needs to be declared in the code before being used
275 */
276 virtual bool need_declaration() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100277
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100278protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100279 TileInfo _format{}; // Tile format
280 std::string _basename{ "" }; // Tile name
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100281};
282
283// A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context.
284// The vector size is given by the width of the tile. The number of vectors height by depth defines the number of vectors
285class IVectorTile : public IScalarTile
286{
287public:
288 virtual ~IVectorTile() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100289
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100290 /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
291 * The user can query the list of supported width for the vectors through preferred_vector_sizes().
292 *
293 * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
294 *
295 * @return the vector variable as a string
296 */
297 virtual ValueAsString vector(int32_t y) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100298
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100299 /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
300 *
301 * @return the vector variable as a string
302 */
303 virtual ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const = 0;
304 /** Method to get the preferred vector sizes.
305 *
306 * @return a vector with the preferred vector sizes
307 */
308 //virtual std::vector<int32_t> preferred_vector_sizes() const = 0;
309};
310
311class ClTile : public IVectorTile
312{
313public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100314 ClTile(const std::string &name, TileInfo format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100315 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100316 _format = format;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100317 _basename = name;
318 }
319
320 ValueAsString scalar(int32_t x, int32_t y) const override
321 {
322 x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
323 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
324
325 ValueAsString t;
326 t.str = build_variable_name(y);
327 t.type.str = get_cl_data_type(_format.dt, 1);
328 t.type.dt = _format.dt;
329 t.type.size = 1;
330
331 // Check required because if the width has only one element, we cannot use .s0
332 if(_format.w != 1)
333 {
334 // Automatic broadcasting
335 t.str += ".s" + std::to_string(x);
336 }
337
338 return t;
339 }
340
341 ValueAsString vector(int32_t y) const override
342 {
343 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
344
345 ValueAsString t;
346 t.str = build_variable_name(y);
347 t.type.str = get_cl_data_type(_format.dt, _format.w);
348 t.type.dt = _format.dt;
349 t.type.size = _format.w;
350 return t;
351 }
352
353 ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
354 {
355 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
356
357 ValueAsString t;
358 t.str = build_variable_name(y);
359 t.type.str = get_cl_data_type(_format.dt, width);
360 t.type.dt = _format.dt;
361 t.type.size = width;
362
363 if(_format.w != 1)
364 {
365 t.str += ".s";
366 for(int i = 0; i < width; ++i)
367 {
368 t.str += to_scalar_hex(x_start + i);
369 }
370 }
371 return t;
372 }
373
374 std::vector<ValueAsString> underlying_source_variables() const override
375 {
376 std::vector<ValueAsString> vars;
377 for(int32_t y = 0; y < _format.h; ++y)
378 {
379 ValueAsString t;
380 t.str = build_variable_name(y);
381 t.type.str = get_cl_data_type(_format.dt, _format.w);
382 t.type.dt = _format.dt;
383 t.type.size = _format.w;
384 vars.push_back(t);
385 }
386 return vars;
387 }
388
389 bool is_assignable() const override
390 {
391 return true;
392 }
393
394 bool need_declaration() const override
395 {
396 return true;
397 }
398
399private:
400 std::string build_variable_name(int32_t y) const
401 {
402 std::string var_name = _basename;
403
404 if(_format.h == 1)
405 {
406 return var_name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100407 }
408 else
409 {
410 var_name += "_";
411 var_name += std::to_string(y);
412 }
413
414 return var_name;
415 }
416
417 std::string to_scalar_hex(int32_t x) const
418 {
419 switch(x)
420 {
421 case 0:
422 case 1:
423 case 2:
424 case 3:
425 case 4:
426 case 5:
427 case 6:
428 case 7:
429 case 8:
430 case 9:
431 return std::to_string(x);
432 case 10:
433 return "A";
434 case 11:
435 return "B";
436 case 12:
437 return "C";
438 case 13:
439 return "D";
440 case 14:
441 return "E";
442 case 15:
443 return "F";
444 default:
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100445 std::cout << "Unsupported hexadecimal value" << std::endl;
446 assert(false);
447 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100448 }
449 }
450};
451
452// Unique features: It contains values in the form of string. The name used for this object is misleading since the variables can change the value over time.
453class ClConstantTile : public IVectorTile
454{
455public:
456 ClConstantTile(const std::vector<std::vector<std::string>> &in, DataType dt)
457 {
458 _format.w = in[0].size();
459 _format.h = in.size();
460 _format.dt = dt;
461
462 _data = std::vector<std::vector<std::string>>(_format.h, std::vector<std::string>(_format.w));
463
464 for(int32_t y = 0; y < _format.h; ++y)
465 {
466 for(int32_t x = 0; x < _format.w; ++x)
467 {
468 _data[y][x] = in[y][x];
469 }
470 }
471 }
472
473 ValueAsString scalar(int32_t x, int32_t y) const override
474 {
475 x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
476 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
477
478 ValueAsString t;
479 t.str = _data[y][x];
480 t.type.str = get_cl_data_type(_format.dt, 1);
481 t.type.dt = _format.dt;
482 t.type.size = 1;
483
484 return t;
485 }
486
487 ValueAsString vector(int32_t y) const override
488 {
489 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
490
491 return vector(0, _format.w, y);
492 }
493
494 ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
495 {
496 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
497
498 ValueAsString t;
499 t.str = "";
500 t.type.str = get_cl_data_type(_format.dt, width);
501 t.type.dt = _format.dt;
502 t.type.size = width;
503
504 if(width > 1)
505 {
506 t.str += "((" + get_cl_data_type(_format.dt, width) + ")(";
507 }
508
509 int32_t x = x_start;
510 for(; x < width - 1; ++x)
511 {
512 t.str += scalar(x, y).str;
513 t.str += ", ";
514 }
515 t.str += scalar(x, y).str;
516
517 if(width > 1)
518 {
519 t.str += "))";
520 }
521
522 return t;
523 }
524
525 std::vector<ValueAsString> underlying_source_variables() const override
526 {
527 std::vector<ValueAsString> vars;
528
529 for(int32_t y = 0; y < _format.h; ++y)
530 {
531 for(int32_t x = 0; x < _format.w; ++x)
532 {
533 ValueAsString t;
534 t.str = _data[y][x];
535 t.type.str = get_cl_data_type(_format.dt, 1);
536 t.type.dt = _format.dt;
537 t.type.size = 1;
538 vars.push_back(t);
539 }
540 }
541
542 return vars;
543 }
544
545 bool is_assignable() const override
546 {
547 return false;
548 }
549
550 bool need_declaration() const override
551 {
552 return false;
553 }
554
555private:
556 std::vector<std::vector<std::string>> _data{};
557};
558
559enum class TensorComponentIndex : int32_t
560{
561 IndexMask = 0x0000000f,
562};
563
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100564enum class TensorComponentGroup : int32_t
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100565{
566 OffsetFirstElement = 0x00000100,
567 Stride = 0x00001000,
568 Dimension = 0x00010000,
569 FoldedDimension = 0x00100000,
570 Constant = 0x01000000
571};
572
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100573inline std::string to_string(TensorComponentType x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100574{
575 switch(x)
576 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100577 case TensorComponentType::Unknown:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100578 return "Unknown";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100579 case TensorComponentType::OffsetFirstElement:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100580 return "OffsetFirstElement";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100581 case TensorComponentType::Stride1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100582 return "Stride1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100583 case TensorComponentType::Stride2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100584 return "Stride2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100585 case TensorComponentType::Stride3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100586 return "Stride3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100587 case TensorComponentType::Stride4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100588 return "Stride4";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100589 case TensorComponentType::Dim0:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100590 return "Dim0";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100591 case TensorComponentType::Dim1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100592 return "Dim1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100593 case TensorComponentType::Dim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100594 return "Dim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100595 case TensorComponentType::Dim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100596 return "Dim3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100597 case TensorComponentType::Dim4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100598 return "Dim4";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100599 case TensorComponentType::Dim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100600 return "Dim1xDim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100601 case TensorComponentType::Dim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100602 return "Dim1xDim2xDim3";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100603 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100604 assert(false);
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100605 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100606 }
607}
608
609class ITensorArgument
610{
611public:
612 virtual ~ITensorArgument() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100613
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100614 /** Method to get the tensor component as a string
615 *
616 * @param[in] x tensor component to query
617 *
618 * @return the tensor component as a string
619 */
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100620 virtual std::string component(TensorComponentType x) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100621
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100622 /** Method to get the tensor component type declaration as a string
623 *
624 * @return the tensor component type declaration as a string
625 */
626 virtual std::string component_type_declaration() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100627
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100628 /** Method to get the tensor component data type
629 *
630 * @return the tensor component data type
631 */
632 virtual DataType component_data_type() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100633
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100634 /** Method to get the tensor component declarations
635 *
636 * @return a vector containing the tensor component declarations
637 */
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100638 virtual std::vector<TensorComponentType> component_declarations() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100639
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100640 /** Method to get the name of the tensor argument.
641 *
642 * @return the name of the tensor argument
643 */
644 std::string name() const
645 {
646 return _basename;
647 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100648
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100649 /** Method to get the tensor format
650 *
651 * @return the format
652 */
653 TensorInfo format() const
654 {
655 return _format;
656 }
657
658protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100659 TensorInfo _format{};
660 std::string _basename{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100661};
662
663enum class GpuTensorStorage : int32_t
664{
665 Unknown = 0x0000,
666 BufferUint8Ptr = 0x0012,
667 Image2dReadOnly = 0x0020,
668 Image2dWriteOnly = 0x0021,
669 Image3dReadOnly = 0x0030,
670 Image3dWriteOnly = 0x0031
671};
672
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100673inline GpuTensorStorage to_gpu_tensor_storage(TensorStorageType s)
674{
675 switch(s)
676 {
677 case TensorStorageType::Unknown:
678 return GpuTensorStorage::Unknown;
679
680 case TensorStorageType::BufferUint8Ptr:
681 return GpuTensorStorage::BufferUint8Ptr;
682
683 case TensorStorageType::Texture2dReadOnly:
684 return GpuTensorStorage::Image2dReadOnly;
685
686 case TensorStorageType::Texture2dWriteOnly:
687 return GpuTensorStorage::Image2dWriteOnly;
688
689 default:
690 assert(false);
691 return GpuTensorStorage::Unknown;
692 }
693}
694
695inline TensorStorageType to_tensor_storage(GpuTensorStorage s)
696{
697 switch(s)
698 {
699 case GpuTensorStorage::Unknown:
700 return TensorStorageType::Unknown;
701
702 case GpuTensorStorage::BufferUint8Ptr:
703 return TensorStorageType::BufferUint8Ptr;
704
705 case GpuTensorStorage::Image2dReadOnly:
706 return TensorStorageType::Texture2dReadOnly;
707
708 case GpuTensorStorage::Image2dWriteOnly:
709 return TensorStorageType::Texture2dWriteOnly;
710
711 default:
712 assert(false);
713 return TensorStorageType::Unknown;
714 }
715}
716
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100717class IGpuTensorArgument : public ITensorArgument
718{
719public:
720 virtual ~IGpuTensorArgument() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100721
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100722 /** Method to get the tensor storage, which is the underlying storage used to keep the data memory
723 *
724 * @param[in] x tensor storage to query
725 *
726 * @return the tensor storage as a string
727 */
728 virtual std::string storage(GpuTensorStorage x) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100729
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100730 /** Method to get the tensor storage type declaration as a string
731 *
732 * @param[in] x tensor component to query
733 *
734 * @return the tensor storage type declaration as a string
735 */
736 virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100737
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100738 /** Method to get the tensor storage declarations
739 *
740 * @return a vector containing the tensor storage declarations
741 */
742 virtual std::vector<GpuTensorStorage> storage_declarations() const = 0;
743};
744
745class ClTensorArgument : public IGpuTensorArgument
746{
747public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100748 ClTensorArgument(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100749 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100750 _basename = name;
751 _format = x;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100752 _return_by_value_when_possible = return_by_value_when_possible;
753 }
754
755 // Methods to override
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100756 std::string component(TensorComponentType x) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100757 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100758 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Constant)))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100759 {
760 int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
761 return std::to_string(idx - 1);
762 }
763
764 if(_return_by_value_when_possible)
765 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100766 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Dimension)))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100767 {
768 int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
769 return std::to_string(_format.shape[idx]);
770 }
771
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100772 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::FoldedDimension)))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100773 {
774 switch(x)
775 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100776 case TensorComponentType::Dim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100777 return std::to_string(_format.shape[1] * _format.shape[2]);
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100778 case TensorComponentType::Dim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100779 return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100780 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100781 std::cout << "Unsupported folded dimension" << std::endl;
782 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100783 }
784 }
785 }
786
787 if(std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end())
788 {
789 _components_required.push_back(x);
790 }
791
792 return build_component_name(x);
793 }
794
795 std::string component_type_declaration() const override
796 {
797 return "int";
798 };
799
800 DataType component_data_type() const override
801 {
802 return DataType::Int32;
803 }
804
805 std::string storage(GpuTensorStorage x) override
806 {
807 if(std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end())
808 {
809 _storage_required.push_back(x);
810 }
811
812 return build_storage_name(x);
813 }
814
815 std::string storage_type_declaration(GpuTensorStorage x) const override
816 {
817 switch(x)
818 {
819 case GpuTensorStorage::BufferUint8Ptr:
820 return "__global uchar*";
821 case GpuTensorStorage::Image2dReadOnly:
822 return "__read_only image2d_t";
823 case GpuTensorStorage::Image2dWriteOnly:
824 return "__write_only image2d_t";
825 case GpuTensorStorage::Image3dReadOnly:
826 return "__read_only image3d_t ";
827 case GpuTensorStorage::Image3dWriteOnly:
828 return "__write_only image3d_t ";
829 default:
830 std::cout << "Unsupported storage" << std::endl;
831 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100832 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100833 }
834 };
835
836 std::vector<GpuTensorStorage> storage_declarations() const override
837 {
838 return _storage_required;
839 }
840
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100841 std::vector<TensorComponentType> component_declarations() const override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100842 {
843 return _components_required;
844 }
845
846private:
847 std::string build_storage_name(GpuTensorStorage x) const
848 {
849 std::string var_name = _basename;
850
851 switch(x)
852 {
853 case GpuTensorStorage::BufferUint8Ptr:
854 return var_name + "_ptr";
855 case GpuTensorStorage::Image2dReadOnly:
856 case GpuTensorStorage::Image2dWriteOnly:
857 return var_name + "_img2d";
858 case GpuTensorStorage::Image3dReadOnly:
859 case GpuTensorStorage::Image3dWriteOnly:
860 return var_name + "_img3d";
861 default:
862 std::cout << "Unsupported storage" << std::endl;
863 assert(false);
864 }
865
866 return var_name;
867 }
868
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100869 std::string build_component_name(TensorComponentType x) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100870 {
871 std::string var_name = _basename;
872
873 switch(x)
874 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100875 case TensorComponentType::OffsetFirstElement:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100876 return var_name + "_offset_first_element";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100877 case TensorComponentType::Stride1:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100878 return var_name + "_stride1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100879 case TensorComponentType::Stride2:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100880 return var_name + "_stride2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100881 case TensorComponentType::Stride3:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100882 return var_name + "_stride3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100883 case TensorComponentType::Dim0:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100884 return var_name + "_dim0";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100885 case TensorComponentType::Dim1:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100886 return var_name + "_dim1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100887 case TensorComponentType::Dim2:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100888 return var_name + "_dim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100889 case TensorComponentType::Dim3:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100890 return var_name + "_dim3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100891 case TensorComponentType::Dim1xDim2:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100892 return var_name + "_dim1xdim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100893 case TensorComponentType::Dim1xDim2xDim3:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100894 return var_name + "_dim1xdim2xdim3";
895 default:
896 std::cout << "Unsupported component" << std::endl;
897 assert(false);
898 }
899
900 return var_name;
901 }
902
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100903 bool _return_by_value_when_possible{ false };
904 std::vector<GpuTensorStorage> _storage_required{};
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100905 std::vector<TensorComponentType> _components_required{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100906};
907
908/**
909 * @brief Data structure that contains the declared tiles by the components.
910 * The registry is a linear data structure that follows the similar principle of the stack. The user can use the @p increment_registry_level() method to
911 * increase the level of the stack (0 when it starts). When the user uses the @p decrement_registry_level() method, the registry decreases the level of the stack
912 * and remove (pop) all the tiles from the level above.
913 * When a tile is declared on the level 0, it is a global tile. A global tile is visible in all parts of the code.
914 * Since different components may use the same name to define a tile, the registry adopts the IdSpace concept, an @p id to prevent name collisions
915 * when declaring tiles among different components.
916 *
917 */
918class GpuTileRegistry
919{
920public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100921 enum class RegistryTileType
922 {
923 Tile,
924 Link
925 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100926
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100927 using RegistryIdSpace = int32_t;
928 using RegistryLevel = int32_t;
929 using RegistryTileName = std::string;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100930
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100931 struct RegistryTileTableEntry
932 {
933 RegistryLevel registry_level{ 0 };
934 std::unique_ptr<IVectorTile> tile_object{ nullptr };
935 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100936
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100937 struct RegistryTileTypeTableEntry
938 {
939 RegistryTileType tile_type{ RegistryTileType::Tile };
940 RegistryTileName tile_name{};
941 RegistryIdSpace registry_idspace{ 0 };
942 RegistryLevel registry_level{ 0 };
943 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100944
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100945 using RegistryTileTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTableEntry>>;
946 using RegistryTileTypeTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTypeTableEntry>>;
947
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100948 /**
949 * @brief Construct a new Gpu Tile Registry object
950 *
951 */
952 GpuTileRegistry()
953 {
954 _language = GpuTargetLanguage::Unknown;
955 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100956
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100957 /**
958 * @brief Construct a new Gpu Tile Registry object providing the Gpu programming language
959 *
960 * @param[in] language Gpu programming language to use
961 */
962 GpuTileRegistry(GpuTargetLanguage language)
963 {
964 _language = language;
965 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100966
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100967 /**
968 * @brief Default destructor. Destroy the Gpu Tile Registry object
969 *
970 */
971 ~GpuTileRegistry() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100972
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100973 /**
974 * @brief Set the working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles.
975 * Therefore, the IdSpace should be set before declaring any tiles.
976 *
977 * @param[in] id The IdSpace id
978 */
979 void set_IdSpace(int32_t id)
980 {
981 _IdSpace = id;
982 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100983
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100984 /**
985 * @brief Get the current working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles
986 *
987 * @return The IdSpace id
988 */
989 int32_t IdSpace() const
990 {
991 return _IdSpace;
992 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100993
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100994 /**
995 * @brief Gets all the IdSpace declarations defined in the tile registry.
996 *
997 * @return all the IdSpace declarations defined in the tile registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
998 */
999 std::vector<int32_t> IdSpace_declarations() const
1000 {
1001 std::vector<int32_t> x;
1002
1003 auto it = _frags.begin();
1004
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001005 while(it != _frags.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001006 {
1007 x.push_back(it->first);
1008
1009 it++;
1010 }
1011
1012 return x;
1013 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001014
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001015 /**
1016 * @brief Declare a tile from a previously created tile
1017 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001018 void insert(const std::string &name, const IVectorTile *frag)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001019 {
1020 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001021 const int32_t key_IdSpace = _IdSpace;
1022 const std::string key_var_name = name;
1023 const std::string var_name = frag->name();
1024 TileInfo format = frag->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001025
1026 // First check whether a tile with the same name exists
1027 IVectorTile *result = (*this)[key_var_name];
1028 assert(result == nullptr);
1029 if(result == nullptr)
1030 {
1031 std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
1032
1033 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1034 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1035
1036 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Link;
1037 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1038 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1039 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1040 }
1041 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001042
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001043 /**
1044 * @brief Declare a tile with TileInfo. The tile will be stored in the IdSpace set with @p set_IdSpace()
1045 *
1046 * @note The reference name used for declaring the tile should not be previously used in the IdSpace
1047 *
1048 * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
1049 * @param[in] format Tile format use to use
1050 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001051 void insert(const std::string &name, const TileInfo &format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001052 {
1053 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001054 const int32_t key_IdSpace = _IdSpace;
1055 const std::string key_var_name = name;
1056 const std::string var_name = generate_tile_name(name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001057
1058 // First check whether a tile with the same name exists
1059 IVectorTile *result = (*this)[key_var_name];
1060 assert(result == nullptr);
1061 if(result == nullptr)
1062 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001063 std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001064 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1065 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1066
1067 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1068 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1069 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1070 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1071 }
1072 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001073
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001074 /**
1075 * @brief Declare a constant tile. The content of the tile is passed as a vector of std::string
1076 *
1077 * @note The reference name used for declaring the tile should not be previously used in the IdSpace
1078 *
1079 * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
1080 * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
1081 * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user's responsibilty to ensure
1082 * that the data type is aligned with the content of the std::string.
1083 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001084 void insert(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001085 {
1086 assert(_language == GpuTargetLanguage::OpenCL);
1087 const int32_t key_IdSpace = _IdSpace;
1088 const std::string key_var_name = name;
1089
1090 // First check whether a tile with the same name exists
1091 IVectorTile *result = (*this)[key_var_name];
1092 assert(result == nullptr);
1093 if(result == nullptr)
1094 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001095 std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001096 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1097 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1098
1099 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1100 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1101 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1102 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1103 }
1104 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001105
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001106 /**
1107 * @brief Declare an anonymous constant tile. The content of the tile is passed as a vector of std::string
1108 *
1109 * @note This method can be used to declare temporary tiles that need to be accessed only once.
1110 *
1111 * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
1112 * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user responsibilty to ensure
1113 * that the data type is aligned with what passed with the std::string.
1114 *
1115 * @return IVectorTile* the anonymous constant tile
1116 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001117 IVectorTile *insert(const std::vector<std::vector<std::string>> &in, DataType dt)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001118 {
1119 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001120 const int32_t key_IdSpace = _IdSpace;
1121 const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001122
1123 // First check whether a tile with the same name exists
1124 IVectorTile *result = (*this)[key_var_name];
1125 assert(result == nullptr);
1126 if(result == nullptr)
1127 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001128 std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001129 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1130 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1131
1132 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1133 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1134 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1135 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1136 }
1137
1138 return (*this)[key_var_name];
1139 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001140
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001141 /**
1142 * @brief Get the tile from the registry. This method searches the tile in the IdSpace provided by the user
1143 *
1144 * @param[in] name The name of the tile to retrieve
1145 * @param[in] IdSpace The IdSpace id where to search the tile
1146 *
1147 * @return IVectorTile* The tile
1148 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001149 IVectorTile *get(const std::string &name, int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001150 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001151 const int32_t key_IdSpace = IdSpace;
1152 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001153
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001154 IVectorTile *result = nullptr;
1155 auto search_IdSpace = _frags.find(key_IdSpace);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001156 if(search_IdSpace != _frags.end())
1157 {
1158 auto search_tile = _frags[key_IdSpace].find(key_var_name);
1159 if(search_tile != _frags[key_IdSpace].end())
1160 {
1161 result = search_tile->second.tile_object.get();
1162 assert(result != nullptr);
1163 }
1164 }
1165
1166 return result;
1167 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001168
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001169 /**
1170 * @brief Get the tile from the registry. This method searches the tile in the IdSpace set with @p set_IdSpace()
1171 *
1172 * @param[in] name The name of the tile to retrieve
1173 *
1174 * @return IVectorTile* The tile
1175 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001176 IVectorTile *operator[](const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001177 {
1178 return get(name, _IdSpace);
1179 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001180
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001181 /**
1182 * @brief Check whether the tile in the in the IdSpace provided by the user exists
1183 *
1184 * @param[in] name Name of the tile to search for
1185 * @param[in] IdSpace The IdSpace id where to search the tile
1186 *
1187 * @return true if the tile exists
1188 * @return false if the tile does not exist
1189 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001190 bool has_tile(const std::string &name, int32_t IdSpace) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001191 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001192 const int32_t key_IdSpace = IdSpace;
1193 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001194
1195 // IVectorTile* result = nullptr;
1196 auto search_IdSpace = _frags.find(key_IdSpace);
1197
1198 return search_IdSpace != _frags.end();
1199 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001200
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001201 /**
1202 * @brief Check whether the tile within the current IdSpace exists
1203 *
1204 * @param[in] name Name of the tile to search for
1205 *
1206 * @return true if the tile exists
1207 * @return false if the tile does not exist
1208 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001209 bool has_tile(const std::string &name) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001210 {
1211 return has_tile(name, _IdSpace);
1212 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001213
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001214 /**
1215 * @brief Get all the tiles declared within the IdSpace provided by the user
1216 *
1217 * @param[in] IdSpace IdSpace where to retrieve all the declared tiles
1218 *
1219 * @return std::vector<IVectorTile*> A vector with all the declared tiles in the IdSpace provided by the user
1220 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001221 std::vector<IVectorTile *> tile_declarations(int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001222 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001223 std::vector<IVectorTile *> tiles;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001224
1225 std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it = _frag_types[IdSpace].begin();
1226
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001227 while(it != _frag_types[IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001228 {
1229 // The following line should be enabled. However, we cannot at this stage
1230 // because it used to retrieve the output tile produced by each component.
1231 // However, this method should NOT be used to retrieve the output tile
1232 //if(it->second.tile_type == RegistryTileType::Tile)
1233 {
1234 tiles.push_back(get(it->second.tile_name, it->second.registry_idspace));
1235 }
1236 it++;
1237 }
1238
1239 return tiles;
1240 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001241
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001242 /**
1243 * @brief Increase the level of stack.
1244 *
1245 */
1246 void increment_registry_level()
1247 {
1248 _registry_level++;
1249 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001250
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001251 /**
1252 * @brief Remove all the tiles declared at the current stack level and decrease the level of the stack.
1253 *
1254 */
1255 void decrement_registry_level()
1256 {
1257 assert(_registry_level >= 0);
1258
1259 // Remove all variables in the local scope
1260 std::map<RegistryTileName, RegistryTileTableEntry>::iterator it = _frags[_IdSpace].begin();
1261
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001262 while(it != _frags[_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001263 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001264 if(it->second.registry_level == _registry_level)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001265 {
1266 it = _frags[_IdSpace].erase(it);
1267 }
1268 else
1269 {
1270 it++;
1271 }
1272 }
1273
1274 std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it_type = _frag_types[_IdSpace].begin();
1275
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001276 while(it_type != _frag_types[_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001277 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001278 if(it_type->second.registry_level == _registry_level)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001279 {
1280 it_type = _frag_types[_IdSpace].erase(it_type);
1281 }
1282 else
1283 {
1284 it_type++;
1285 }
1286 }
1287
1288 _registry_level--;
1289 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001290
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001291 /**
1292 * @brief Get the level of the stack
1293 *
1294 */
1295 int32_t level() const
1296 {
1297 return _registry_level;
1298 }
1299
1300private:
1301 // This method ensures that the key is unique among different components
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001302 std::string generate_tile_name(const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001303 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001304 assert(_IdSpace >= 0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001305 if(_registry_level == 0)
1306 {
1307 return "_G" + std::to_string(_IdSpace) + "_" + name;
1308 }
1309 else
1310 {
1311 return name;
1312 }
1313 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001314
1315 RegistryTileTable _frags{};
1316 RegistryTileTypeTable _frag_types{};
1317 RegistryLevel _registry_level{ 0 };
1318 RegistryIdSpace _IdSpace{ -1 };
1319 int32_t _anonymous_frag_count{ 0 }; // Counter used to create the anonymous tiles
1320 GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001321};
1322
1323using TensorEntry = std::unique_ptr<IGpuTensorArgument>;
1324
1325/**
1326 * @brief Data structure that contains the tensors consumed by the components.
1327 * Since different components may use the same name as reference for a tensor, the registry adopts the IdSpace concept, an @p id to prevent name collisions
1328 * when declaring tensors among different components.
1329 *
1330 */
1331class GpuTensorArgumentRegistry
1332{
1333public:
1334 /**
1335 * @brief Construct a new Gpu Tensor Registry object
1336 *
1337 */
1338 GpuTensorArgumentRegistry()
1339 {
1340 _language = GpuTargetLanguage::Unknown;
1341 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001342
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001343 /**
1344 * @brief Construct a new Gpu Tensor Registry object
1345 *
1346 * @param[in] language Gpu programming language to use
1347 */
1348 GpuTensorArgumentRegistry(GpuTargetLanguage language)
1349 {
1350 _language = language;
1351 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001352
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001353 /**
1354 * @brief Default destructor. Destroy the Gpu Tensor Registry object
1355 *
1356 */
1357 ~GpuTensorArgumentRegistry() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001358
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001359 /**
1360 * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors.
1361 * Therefore, the IdSpace should be set before declaring any tensors.
1362 *
1363 * @param[in] id The IdSpace id
1364 */
1365 void set_IdSpace(int32_t id)
1366 {
1367 _IdSpace = id;
1368 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001369
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001370 /**
1371 * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors
1372 *
1373 * @return The IdSpace id
1374 */
1375 int32_t IdSpace() const
1376 {
1377 return _IdSpace;
1378 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001379
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001380 /**
1381 * @brief Gets all the IdSpace declarations defined in the tensor registry.
1382 *
1383 * @return all the IdSpace declarations defined in the tensor registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
1384 */
1385 std::vector<int32_t> IdSpace_declarations() const
1386 {
1387 std::vector<int32_t> x;
1388
1389 auto it = _refs.begin();
1390
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001391 while(it != _refs.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001392 {
1393 x.push_back(it->first);
1394
1395 it++;
1396 }
1397
1398 return x;
1399 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001400
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001401 /**
1402 * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace()
1403 *
1404 * @note The reference name used for declaring the tensor should not be previously used in the IdSpace
1405 *
1406 * @param[in] name Reference name for the tensor. The reference name can be used to retrieve the tensor stored in the registry.
1407 * @param[in] x Pair of tensor info and tensor id
1408 * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components
1409 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001410 void insert(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001411 {
1412 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001413 const int32_t key_IdSpace = _IdSpace;
1414 const int32_t tensor_id = x.id;
1415 const std::string key_var_name = name;
1416 const std::string var_name = generate_tensor_name(name, tensor_id);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001417
1418 // First, check whether the tensor has already a reference. If so, trigger an assert
1419 assert(!has_tensor_argument(name));
1420
1421 // Check whether a tensor with that tensorID exists
1422 auto result = _tensor_arguments.find(tensor_id);
1423 if(result == _tensor_arguments.end())
1424 {
1425 // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001426 std::unique_ptr<ClTensorArgument> arg = std::make_unique<ClTensorArgument>(var_name, x,
1427 return_by_value_when_possible);
1428 _tensor_arguments[tensor_id] = std::move(arg);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001429 }
1430
1431 _refs[key_IdSpace][key_var_name] = tensor_id;
1432 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001433
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001434 /**
1435 * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace()
1436 *
1437 * @param[in] name The name of the tensor to retrieve
1438 *
1439 * @return IGpuTensor* The tensor
1440 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001441 IGpuTensorArgument *operator[](const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001442 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001443 const int32_t key_IdSpace = _IdSpace;
1444 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001445
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001446 IGpuTensorArgument *result = nullptr;
1447 auto search_IdSpace = _refs.find(key_IdSpace);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001448 if(search_IdSpace != _refs.end())
1449 {
1450 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1451
1452 if(search_tensor_id != _refs[key_IdSpace].end())
1453 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001454 const int32_t tensor_id = search_tensor_id->second;
1455 auto search_tensor_argument = _tensor_arguments.find(tensor_id);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001456 if(search_tensor_argument != _tensor_arguments.end())
1457 {
1458 result = search_tensor_argument->second.get();
1459 }
1460 assert(result != nullptr);
1461 }
1462 }
1463
1464 return result;
1465 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001466
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001467 /**
1468 * @brief Get all the tensors declared in the IdSpace provided by the user
1469 *
1470 * @return std::vector<IGpuTensorArgument*> A vector with all the declared tensors
1471 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001472 std::vector<IGpuTensorArgument *> tensor_argument_declarations()
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001473 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001474 std::vector<IGpuTensorArgument *> args;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001475
1476 auto it = _tensor_arguments.begin();
1477
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001478 while(it != _tensor_arguments.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001479 {
1480 args.push_back(it->second.get());
1481 it++;
1482 }
1483
1484 return args;
1485 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001486
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001487 /**
1488 * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists
1489 *
1490 * @param[in] name Name of the tensor argument to search for
1491 *
1492 * @return true if the tensor argument exists
1493 * @return false if the tensor argument does not exist
1494 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001495 bool has_tensor_argument(const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001496 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001497 const int32_t key_IdSpace = _IdSpace;
1498 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001499
1500 auto search_IdSpace = _refs.find(key_IdSpace);
1501
1502 if(search_IdSpace != _refs.end())
1503 {
1504 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1505
1506 return search_tensor_id != _refs[key_IdSpace].end();
1507 }
1508 else
1509 {
1510 return false;
1511 }
1512 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001513
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001514 /**
1515 * @brief Check whether the tensor argument is in the the IdSpace provided by the user
1516 *
1517 * @param[in] name Name of the tensor argument to search for
1518 * @param[in] IdSpace The IdSpace id where to search the tensor argument
1519 *
1520 * @return true if the tile exists
1521 * @return false if the tile does not exist
1522 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001523 bool has_tensor_argument(const std::string &name, int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001524 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001525 const int32_t key_IdSpace = IdSpace;
1526 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001527
1528 auto search_IdSpace = _refs.find(key_IdSpace);
1529
1530 if(search_IdSpace != _refs.end())
1531 {
1532 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1533
1534 return search_tensor_id != _refs[key_IdSpace].end();
1535 }
1536 else
1537 {
1538 return false;
1539 }
1540 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001541
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001542private:
1543 // This method ensures that the key is unique among different components
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001544 std::string generate_tensor_name(const std::string &name, int32_t tensor_id)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001545 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001546 assert(tensor_id >= 0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001547
1548 return name + std::to_string(tensor_id);
1549 }
1550
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001551 std::map<int32_t, TensorEntry> _tensor_arguments{};
1552 std::map<int32_t, std::map<std::string, int32_t>> _refs{};
1553 int32_t _IdSpace{ -1 };
1554 GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001555};
1556
1557enum class OpType : int32_t
1558{
1559 Elementwise = 0x0000,
1560 Relational = 0x1000,
1561 Algebra = 0x2000
1562};
1563
1564inline std::string to_string(AssignmentOp op)
1565{
1566 switch(op)
1567 {
1568 case AssignmentOp::Decrement:
1569 return "-=";
1570 case AssignmentOp::Increment:
1571 return "+=";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001572 default:
1573 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001574 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001575 }
1576}
1577
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01001578inline std::string to_string(UnaryOp op)
1579{
1580 switch(op)
1581 {
1582 case UnaryOp::LogicalNot:
1583 return "!";
Adnan AlSinan66f3d382023-07-10 15:07:45 +01001584 case UnaryOp::BitwiseNot:
1585 return "~";
Adnan AlSinan2e6d6592023-08-21 13:54:27 +01001586 case UnaryOp::Negate:
1587 return "-";
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01001588 default:
1589 assert(false);
1590 return "";
1591 }
1592}
1593
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001594inline std::string to_string(BinaryOp op)
1595{
1596 switch(op)
1597 {
1598 case BinaryOp::Add:
1599 return "+";
1600 case BinaryOp::Sub:
1601 return "-";
1602 case BinaryOp::Mul:
1603 return "*";
1604 case BinaryOp::Div:
1605 return "/";
1606 case BinaryOp::Mod:
1607 return "%";
1608 case BinaryOp::Equal:
1609 return "==";
1610 case BinaryOp::Less:
1611 return "<";
1612 case BinaryOp::LessEqual:
1613 return "<=";
1614 case BinaryOp::Greater:
1615 return ">";
1616 case BinaryOp::GreaterEqual:
1617 return ">=";
1618 case BinaryOp::LogicalAnd:
1619 return "&&";
1620 case BinaryOp::LogicalOr:
1621 return "||";
Adnan AlSinan66f3d382023-07-10 15:07:45 +01001622 case BinaryOp::BitwiseXOR:
1623 return "^";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001624 default:
1625 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001626 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001627 }
1628}
1629
1630inline std::string binary_op_string(BinaryOp op)
1631{
1632 switch(op)
1633 {
1634 case BinaryOp::Add:
1635 return "add";
1636 case BinaryOp::Sub:
1637 return "sub";
1638 case BinaryOp::Mul:
1639 return "mul";
1640 case BinaryOp::Div:
1641 return "div";
1642 case BinaryOp::Mod:
1643 return "mod";
1644 case BinaryOp::Equal:
1645 return "eq";
1646 case BinaryOp::Less:
1647 return "gt";
1648 case BinaryOp::LessEqual:
1649 return "gteq";
1650 case BinaryOp::Greater:
1651 return "lt";
1652 case BinaryOp::GreaterEqual:
1653 return "lte";
1654 default:
1655 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001656 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001657 }
1658}
1659
1660enum class OperandType : int32_t
1661{
1662 Unknown = 0x00000000,
1663 ScalarFp32 = 0x00001011, // Immediate scalar tile
1664 ScalarFp16 = 0x00001012, // Immediate scalar tile
1665 ScalarInt32 = 0x00001021, // Immediate scalar tile
1666 ScalarInt16 = 0x00001022, // Immediate scalar tile
1667 ScalarInt8 = 0x00001024, // Immediate scalar tile
1668 ScalarUInt32 = 0x00001031, // Immediate scalar tile
1669 ScalarUInt16 = 0x00001032, // Immediate scalar tile
1670 ScalarUInt8 = 0x00001034, // Immediate scalar tile
1671 ScalarBool = 0x00001041, // Immediate scalar tile
1672 ScalarTile = 0x00001050, // Scalar from a tile
1673 Tile = 0x00010000, // Tile
1674 TensorStride1 = 0x00100001, // Tensor component
1675 TensorStride2 = 0x00100002, // Tensor component
1676 TensorStride3 = 0x00100003, // Tensor component
1677 TensorStride4 = 0x00100004, // Tensor component
1678 TensorDim0 = 0x00100010, // Tensor component
1679 TensorDim1 = 0x00100020, // Tensor component
1680 TensorDim2 = 0x00100030, // Tensor component
1681 TensorDim3 = 0x00100040, // Tensor component
1682 TensorDim4 = 0x00100050, // Tensor component
1683 TensorC = 0x00100010, // Tensor component
1684 TensorW = 0x00100020, // Tensor component
1685 TensorH = 0x00100030, // Tensor component
1686 TensorD = 0x00100040, // Tensor component
1687 TensorN = 0x00100050, // Tensor component
1688 TensorDim1xDim2 = 0x00100100, // Tensor component
1689 TensorDim1xDim2xDim3 = 0x00100200, // Tensor component
1690 TensorWxH = 0x00100300, // Tensor component
1691 TensorWxHxD = 0x00100400, // Tensor component
1692 TensorDataOffset = 0x00100500, // Tensor component
1693};
1694
1695struct ScalarTileCoord
1696{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001697 ScalarTileCoord()
1698 {
1699 }
1700
1701 ScalarTileCoord(int32_t x0, int32_t y0)
1702 : x(x0), y(y0)
1703 {
1704 }
1705
1706 int32_t x{ -1 };
1707 int32_t y{ -1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001708};
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001709
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001710/**
1711 * @brief Operand class. This object is used to pass the operands to the operations performed by the writer.
1712 * Operand can be of three types:
1713 * -# Scalar immediate: constant expression
1714 * -# Tile: A tile
1715 * -# Tensor component: A component (scalar) of a tensor
1716 *
1717 */
1718class Operand
1719{
1720public:
1721 Operand(const std::string &val)
1722 {
1723 _str = val;
1724 _type = OperandType::Tile;
1725 }
1726
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001727 Operand(const std::string &val, const ScalarTileCoord &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001728 {
1729 _str = val;
1730 _type = OperandType::ScalarTile;
1731 _coord = coord;
1732 }
1733
1734 Operand(const std::string &val, OperandType type)
1735 {
1736 _str = val;
1737 _type = type;
1738 }
1739
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001740 Operand(const Operand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001741 {
1742 _str = t.value();
1743 _type = t.type();
1744 }
1745
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001746 Operand &operator=(const Operand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001747 {
1748 _str = t.value();
1749 _type = t.type();
1750 _coord = t.scalar_tile_coordinate();
1751 return *this;
1752 }
1753
1754 std::string value() const
1755 {
1756 return _str;
1757 }
1758
1759 OperandType type() const
1760 {
1761 return _type;
1762 }
1763
1764 ScalarTileCoord scalar_tile_coordinate() const
1765 {
1766 return _coord;
1767 }
1768
1769private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001770 std::string _str{};
1771 OperandType _type{ OperandType::Unknown };
1772 ScalarTileCoord _coord{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001773};
1774
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01001775using GpuSamplerTensorStorage = GpuTensorStorage;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001776
1777struct GpuSampler
1778{
1779 GpuSampler() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001780
1781 TensorSamplerFormat format{ TensorSamplerFormat::Unknown };
1782 GpuSamplerTensorStorage storage{ GpuSamplerTensorStorage::Unknown };
1783 TensorSamplerAddressModeX address_mode_x{ TensorSamplerAddressModeX::Unknown };
1784 TensorSamplerAddressModeY address_mode_y{ TensorSamplerAddressModeY::Unknown };
1785 TensorSamplerAddressModeZ address_mode_z{ TensorSamplerAddressModeZ::Unknown };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001786};
1787
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001788inline GpuSampler
1789create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y,
1790 int32_t step_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001791{
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001792 CKW_UNUSED(step_x, step_y, step_z);
1793
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001794 auto tensor = tensor_info_id->shape;
1795
1796 GpuSampler dst_sampler;
1797 dst_sampler.format = sampler.format;
1798 dst_sampler.storage = GpuSamplerTensorStorage::BufferUint8Ptr;
1799 dst_sampler.address_mode_x = sampler.address_mode_x;
1800 dst_sampler.address_mode_y = sampler.address_mode_y;
1801 dst_sampler.address_mode_z = sampler.address_mode_z;
1802
1803 int32_t dim_x = 0;
1804 int32_t dim_y = 0;
1805 int32_t dim_z = 0;
1806
1807 switch(sampler.format)
1808 {
1809 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001810 dim_x = tensor[0];
1811 dim_y = tensor[1];
1812 dim_z = tensor[2];
1813 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001814 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001815 dim_x = tensor[0];
1816 dim_y = tensor[1] * tensor[2];
1817 dim_z = 1;
1818 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001819 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001820 std::cout << "Unsupported tensor format" << std::endl;
1821 assert(false);
1822 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001823 }
1824
1825 if(dim_x == 1)
1826 {
1827 assert(step_x == 1);
1828 dst_sampler.address_mode_x = TensorSamplerAddressModeX::None;
1829 }
1830
1831 if(dim_y == 1)
1832 {
1833 assert(step_y == 1);
1834 dst_sampler.address_mode_y = TensorSamplerAddressModeY::None;
1835 }
1836
1837 if(dim_z == 1)
1838 {
1839 assert(step_z == 1);
1840 dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None;
1841 }
1842
1843 return dst_sampler;
1844}
1845
1846class GpuOutputSampler
1847{
1848public:
1849 GpuOutputSampler() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001850
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001851 /**
1852 * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once
1853 * by the root component. Once initialized, all simpler components will need to used this sampler
1854 * or a broadcasted version of it
1855 *
1856 * @param[in] sampler GpuSampler
1857 * @param[in] step_x Increment step in the X direction. Not necessarily it is the same of n0 of tile!
1858 * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile!
1859 * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile!
1860 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001861 void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage,
1862 TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001863 {
1864 assert(_is_initialized == false);
1865
1866 _step_x = step_x;
1867 _step_y = step_y;
1868 _step_z = step_z;
1869 _tensor_info_id = tensor_info_id;
1870 _sampler = create_sampler(tensor_storage, tensor_format);
1871 _is_initialized = true;
1872 };
1873
1874 GpuSampler sampler() const
1875 {
1876 return _sampler;
1877 };
1878
1879 int32_t step_x() const
1880 {
1881 return _step_x;
1882 };
1883
1884 int32_t step_y() const
1885 {
1886 return _step_y;
1887 };
1888
1889 int32_t step_z() const
1890 {
1891 return _step_z;
1892 };
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001893
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001894private:
1895 GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format)
1896 {
1897 // Output can only be in output mode
1898 assert(tensor_storage != GpuSamplerTensorStorage::Image2dReadOnly);
1899 assert(tensor_storage != GpuSamplerTensorStorage::Image3dReadOnly);
1900
1901 auto tensor = _tensor_info_id->shape;
1902
1903 GpuSampler sampler;
1904 sampler.format = tensor_format;
1905 sampler.storage = tensor_storage;
1906 sampler.address_mode_x = TensorSamplerAddressModeX::None;
1907 sampler.address_mode_y = TensorSamplerAddressModeY::None;
1908 sampler.address_mode_z = TensorSamplerAddressModeZ::None;
1909
1910 // In the case of texture, we do not need any special checks at the border
1911 if(tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr)
1912 {
1913 int32_t dim_x = 0;
1914 int32_t dim_y = 0;
1915 int32_t dim_z = 0;
1916
1917 switch(tensor_format)
1918 {
1919 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001920 dim_x = tensor[0];
1921 dim_y = tensor[1];
1922 dim_z = tensor[2];
1923 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001924 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001925 dim_x = tensor[0];
1926 dim_y = tensor[1] * tensor[2];
1927 dim_z = 1;
1928 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001929 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001930 std::cout << "Unsupported tensor format" << std::endl;
1931 assert(false);
1932 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001933 }
1934
1935 if((dim_x % _step_x) != 0 && dim_x != 1)
1936 {
1937 sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin;
1938 }
1939
1940 if((dim_y % _step_y) != 0 && dim_y != 1)
1941 {
1942 sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly;
1943 }
1944
1945 if((dim_z % _step_z) != 0 && dim_z != 1)
1946 {
1947 sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly;
1948 }
1949 }
1950
1951 return sampler;
1952 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001953
1954 GpuSampler _sampler{}; // GpuSampler
1955 int32_t _step_x{ 1 };
1956 int32_t _step_y{ 1 };
1957 int32_t _step_z{ 1 };
1958 const TensorInfo *_tensor_info_id{ nullptr };
1959 bool _is_initialized{ false };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001960};
1961
1962/**
1963 * @brief Tensor operand class. This object is used to pass the operands as tensor to the operations performed by the writer.
1964 */
1965class TensorOperand
1966{
1967public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001968 TensorOperand(const std::string &val, GpuSampler sampler)
1969 : _str(val), _sampler(sampler)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001970 {
1971 }
1972
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001973 TensorOperand &operator=(const TensorOperand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001974 {
1975 _str = t.value();
1976 _sampler = t.sampler();
1977 return *this;
1978 }
1979
1980 std::string value() const
1981 {
1982 return _str;
1983 }
1984
1985 GpuSampler sampler() const
1986 {
1987 return _sampler;
1988 }
1989
1990private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001991 std::string _str{};
1992 GpuSampler _sampler{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001993};
1994
1995/**
1996 * @brief Data structure that contains all the necessary information to write the Gpu kernel with the Gpu kernel Writer
1997 * This data structure must be initialized before being passed to the Gpu Kernel Writer
1998 *
1999 */
2000class GpuKernelWriterDataHolder
2001{
2002public:
2003 /**
2004 * @brief Construct a new Gpu Kernel Data object. In this phase, we should also store
2005 * the GPU target and target specific capabilities (extensions). For now, we just initialize the
2006 * programming language
2007 *
2008 * @param[in] language Gpu programming language to use
2009 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002010 GpuKernelWriterDataHolder(GpuTargetLanguage language)
2011 : tiles(language), arguments(language), code(""), _language(language)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002012 {
2013 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002014
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002015 /**
2016 * @brief Get the Gpu programming language used
2017 *
2018 * @return GpuTargetLanguage the Gpu programming language
2019 */
2020 GpuTargetLanguage programming_language() const
2021 {
2022 return _language;
2023 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002024
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002025 /**
2026 * @brief @ref GpuTileRegistry
2027 *
2028 */
2029 GpuTileRegistry tiles{};
2030 /**
2031 * @brief @ref GpuTensorArgumentRegistry
2032 *
2033 */
2034 GpuTensorArgumentRegistry arguments{};
2035 /**
2036 * @brief @ref GpuOutputSampler.
2037 *
2038 */
2039 GpuOutputSampler output_sampler{};
2040 /**
2041 * @brief Source code
2042 *
2043 */
2044 std::string code{};
2045
2046 // GpuExtensionRegistry extensions{};
2047private:
2048 GpuTargetLanguage _language;
2049};
2050
2051struct LWS
2052{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002053 int32_t x{ 1 };
2054 int32_t y{ 1 };
2055 int32_t z{ 1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002056};
2057
2058/**
2059 * @brief Utility class used to get the tile from the operand. If the operand is not a tile, @ref OperandUnpacker
2060 * declare an anonymous tile in the tile registry.
2061 */
2062class OperandUnpacker
2063{
2064public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002065 OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments)
2066 : _tiles(tiles), _arguments(arguments)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002067 {
2068 // Increase the level of the stack to allocate possible temporary tiles
2069 _tiles.increment_registry_level();
2070 };
2071
2072 ~OperandUnpacker()
2073 {
2074 // Decrease the level of the stack to deallocate any temporary tiles
2075 _tiles.decrement_registry_level();
2076 }
2077
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002078 IVectorTile *unpack(const Operand &src)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002079 {
2080 // Get the tile
2081 if(src.type() == OperandType::Tile)
2082 {
2083 assert(_tiles.has_tile(src.value()));
2084 return _tiles[src.value()];
2085 }
2086 // Create an anonymous tile with a constant
2087 else if(static_cast<int32_t>(src.type()) & 0x00001000)
2088 {
2089 if(src.type() == OperandType::ScalarTile)
2090 {
2091 ScalarTileCoord coord = src.scalar_tile_coordinate();
2092 assert(_tiles.has_tile(src.value()));
2093 assert(coord.x >= 0);
2094 assert(coord.y >= 0);
2095 auto val = _tiles[src.value()]->scalar(coord.x, coord.y);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002096 return _tiles.insert({ { { val.str } } }, val.type.dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002097 }
2098 else
2099 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002100 return _tiles.insert({ { { src.value() } } }, to_tile_data_type(src.type()));
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002101 }
2102 }
2103 // Create an anonymous tile with the tensor component
2104 else
2105 {
2106 assert(_arguments.has_tensor_argument(src.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002107 auto x = _arguments[src.value()];
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002108 const std::string val = x->component(to_tensor_component(src.type()));
2109 const DataType dt = x->component_data_type();
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002110 return _tiles.insert({ { { val } } }, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002111 }
2112 }
2113
2114private:
2115 DataType to_tile_data_type(OperandType x)
2116 {
2117 return static_cast<DataType>(static_cast<int32_t>(x) & 0x00ff);
2118 }
2119
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002120 TensorComponentType to_tensor_component(OperandType x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002121 {
2122 switch(x)
2123 {
2124 case OperandType::TensorDim0:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002125 return TensorComponentType::Dim0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002126 case OperandType::TensorDim1:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002127 return TensorComponentType::Dim1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002128 case OperandType::TensorDim2:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002129 return TensorComponentType::Dim2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002130 case OperandType::TensorDim3:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002131 return TensorComponentType::Dim3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002132 case OperandType::TensorDim4:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002133 return TensorComponentType::Dim4;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002134 case OperandType::TensorStride1:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002135 return TensorComponentType::Stride1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002136 case OperandType::TensorStride2:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002137 return TensorComponentType::Stride2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002138 case OperandType::TensorStride3:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002139 return TensorComponentType::Stride3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002140 case OperandType::TensorStride4:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002141 return TensorComponentType::Stride4;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002142 case OperandType::TensorDim1xDim2:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002143 return TensorComponentType::Dim1xDim2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002144 case OperandType::TensorDim1xDim2xDim3:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002145 return TensorComponentType::Dim1xDim2xDim3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002146 case OperandType::TensorDataOffset:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002147 return TensorComponentType::OffsetFirstElement;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002148 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002149 assert(false);
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002150 return TensorComponentType::Unknown;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002151 }
2152 }
2153
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002154 GpuTileRegistry &_tiles;
2155 GpuTensorArgumentRegistry &_arguments;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002156};
2157
2158/**
2159 * @brief Utility class used to get the tensor argument from the operand. If the operand is not a tile, @ref OperandUnpacker
2160 * declare an anonymous tile in the tile registry.
2161 * Tensor dimension reduction aims for reducing the tensor data dimension while keeping data's tensor structure.
2162 */
2163class TensorOperandUnpacker
2164{
2165public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002166 TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments)
2167 : _arguments(arguments){};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002168
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002169 IGpuTensorArgument *unpack(const TensorOperand &src)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002170 {
2171 assert(_arguments.has_tensor_argument(src.value()));
2172 return _arguments[src.value()];
2173 }
2174
2175private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002176 GpuTensorArgumentRegistry &_arguments;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002177};
2178
2179/**
2180 * @brief The GpuKernel will be used in three occasions (stages):
2181 * #- Compilation stage
2182 * #- Tuning stage
2183 * #- Dispatch stage
2184 */
2185struct GpuKernel
2186{
2187 // Compilation stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002188 std::string code{}; // Source code, required for the compilation stage
2189 std::vector<GpuExtensions> list_extensions{}; // Extensions, required for the compilation stage
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002190 // Tuning stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002191 std::string config_id{}; // Unique id, required for the tuning stage
2192 std::vector<LWS> list_lws{}; // LWS to test, required for the tuning stage
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002193 // Dispatch stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002194 GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage
2195 std::vector<std::pair<int32_t, GpuTensorStorage>> list_tensor_storages; // List of tensor storages, required for the dispatch stage
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002196 std::vector<std::pair<int32_t, TensorComponentType>> list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002197};
2198
SiCong Li16b37522023-07-18 17:56:49 +01002199// Generate all extension pragmas (hardcoded for now)
2200inline std::string generate_extensions()
2201{
2202 std::string ext = R"(
2203#if defined(cl_khr_fp16)
2204#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2205#endif // defined(cl_khr_fp16)
2206
2207#if defined(cl_arm_integer_dot_product_int8)
2208#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
2209#endif // defined(cl_arm_integer_dot_product_int8)
2210
2211#if defined(cl_arm_integer_dot_product_accumulate_int8)
2212#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
2213#endif // defined(cl_arm_integer_dot_product_accumulate_int8)
2214
2215#if defined(cl_arm_printf)
2216#pragma OPENCL EXTENSION cl_arm_printf : enable
2217#endif // defined(cl_arm_printf);
2218)";
2219 return ext;
2220}
2221
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002222// This function should produce an object with the source
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002223inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002224{
2225 std::string code;
SiCong Li16b37522023-07-18 17:56:49 +01002226 code += generate_extensions();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002227 code += "__kernel void ";
2228 code += name;
2229 code += "(\n";
2230
2231 auto IdSpaces = in.arguments.IdSpace_declarations();
2232
2233 std::vector<std::string> arg_str;
2234
2235 auto tensor_args = in.arguments.tensor_argument_declarations();
2236
2237 for(auto &i : tensor_args)
2238 {
2239 // For each tensor used, get the storage and tensor components
2240 auto storages = i->storage_declarations();
2241 auto components = i->component_declarations();
2242
2243 for(auto &y : storages)
2244 {
2245 std::string str;
2246 str += i->storage_type_declaration(y);
2247 str += " ";
2248 str += i->storage(y);
2249 arg_str.push_back(str);
2250 }
2251
2252 for(auto &y : components)
2253 {
2254 std::string str;
2255 str += i->component_type_declaration();
2256 str += " ";
2257 str += i->component(y);
2258 arg_str.push_back(str);
2259 }
2260 }
2261
2262 for(size_t i = 0; i < arg_str.size(); ++i)
2263 {
2264 code += arg_str[i];
2265 if(i + 1 < arg_str.size())
2266 {
2267 code += ",\n";
2268 }
2269 }
2270
2271 code += ")\n";
2272 code += "{\n";
2273 code += in.code;
2274 code += "}\n";
2275
2276 return code;
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002277}
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002278
2279/**
2280 * @brief This class is responsible to map a N-Tensor to a 3d tensor. The mapper needs the GpuSampler to know
2281 * how to reduce the dimensionality of a tensor
2282 *
2283 */
2284class GpuTensor3dMapper
2285{
2286public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002287 GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler)
2288 : _sampler(sampler), _tensor(tensor){};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002289
2290 std::string tensor_component_x() const
2291 {
2292 const auto format = _sampler.format;
2293 switch(format)
2294 {
2295 case TensorSamplerFormat::C_WH_1:
2296 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002297 return _tensor->component(TensorComponentType::Dim0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002298 default:
2299 std::cout << "Unsupported tensor format" << std::endl;
2300 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002301 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002302 }
2303 }
2304
2305 std::string tensor_component_y() const
2306 {
2307 const auto format = _sampler.format;
2308 switch(format)
2309 {
2310 case TensorSamplerFormat::C_WH_1:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002311 return _tensor->component(TensorComponentType::Dim1xDim2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002312 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002313 return _tensor->component(TensorComponentType::Dim1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002314 default:
2315 std::cout << "Unsupported tensor format" << std::endl;
2316 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002317 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002318 }
2319 }
2320
2321 std::string tensor_component_z() const
2322 {
2323 const auto format = _sampler.format;
2324 switch(format)
2325 {
2326 case TensorSamplerFormat::C_WH_1:
2327 return "1";
2328 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002329 return _tensor->component(TensorComponentType::Dim2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002330 default:
2331 std::cout << "Unsupported tensor format" << std::endl;
2332 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002333 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002334 }
2335 }
2336
2337 std::string tensor_component_stride_y() const
2338 {
2339 const auto format = _sampler.format;
2340 switch(format)
2341 {
2342 case TensorSamplerFormat::C_WH_1:
2343 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002344 return _tensor->component(TensorComponentType::Stride1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002345 default:
2346 std::cout << "Unsupported tensor format" << std::endl;
2347 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002348 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002349 }
2350 }
2351
2352 std::string tensor_component_stride_z() const
2353 {
2354 const auto format = _sampler.format;
2355 switch(format)
2356 {
2357 case TensorSamplerFormat::C_WH_1:
2358 return "0";
2359 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002360 return _tensor->component(TensorComponentType::Stride2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002361 default:
2362 std::cout << "Unsupported tensor format" << std::endl;
2363 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002364 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002365 }
2366 }
2367
2368 std::string tensor_component_stride_batch() const
2369 {
2370 const auto format = _sampler.format;
2371 switch(format)
2372 {
2373 case TensorSamplerFormat::C_WH_1:
2374 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002375 return _tensor->component(TensorComponentType::Stride3);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002376 default:
2377 std::cout << "Unsupported tensor format" << std::endl;
2378 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002379 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002380 }
2381 }
2382
2383 bool is_one_component_x() const
2384 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002385 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002386 const auto format = _sampler.format;
2387 switch(format)
2388 {
2389 case TensorSamplerFormat::C_WH_1:
2390 case TensorSamplerFormat::C_W_H:
2391 return t.shape[0] == 1;
2392 default:
2393 std::cout << "Unsupported tensor format" << std::endl;
2394 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002395 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002396 }
2397 }
2398
2399 bool is_one_component_y() const
2400 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002401 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002402 const auto format = _sampler.format;
2403 switch(format)
2404 {
2405 case TensorSamplerFormat::C_WH_1:
2406 return (t.shape[1] * t.shape[2]) == 1;
2407 case TensorSamplerFormat::C_W_H:
2408 return t.shape[1] == 1;
2409 default:
2410 std::cout << "Unsupported tensor format" << std::endl;
2411 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002412 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002413 }
2414 }
2415
2416 bool is_one_component_z() const
2417 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002418 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002419 const auto format = _sampler.format;
2420 switch(format)
2421 {
2422 case TensorSamplerFormat::C_WH_1:
2423 return true;
2424 case TensorSamplerFormat::C_W_H:
2425 return t.shape[2] == 1;
2426 default:
2427 std::cout << "Unsupported tensor format" << std::endl;
2428 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002429 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002430 }
2431 }
2432
2433 bool is_one_component_batch() const
2434 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002435 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002436 const auto format = _sampler.format;
2437 switch(format)
2438 {
2439 case TensorSamplerFormat::C_WH_1:
2440 case TensorSamplerFormat::C_W_H:
2441 return t.shape[3] == 1;
2442 default:
2443 std::cout << "Unsupported tensor format" << std::endl;
2444 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002445 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002446 }
2447 }
2448
2449 GpuSampler gpu_sampler() const
2450 {
2451 return _sampler;
2452 }
2453
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002454 IGpuTensorArgument *tensor_argument() const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002455 {
2456 return _tensor;
2457 }
2458
2459private:
2460 GpuSampler _sampler;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002461 IGpuTensorArgument *_tensor;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002462};
2463
2464struct GpuKernelWriterAttribute
2465{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002466 bool return_tensor_component_by_value{ false };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002467};
2468
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002469enum class RoundingMode
2470{
2471 None,
2472 Rte,
2473 Rtz,
2474 Rtp,
2475 Rtn
2476};
2477
2478// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl05.html
2479class IGpuKernelWriter
2480{
2481public:
2482 virtual ~IGpuKernelWriter() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002483
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002484 virtual void set_IdSpace(int32_t id) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002485
2486 virtual void import_tile(const std::string &dst, const IVectorTile *src) = 0;
2487
2488 virtual void declare_argument(const std::string &name, const TensorInfo &tensor) = 0;
2489
2490 virtual void declare_tile(const std::string &name, const TileInfo &info) = 0;
2491
2492 virtual void declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) = 0;
2493
2494 virtual void write_text(const std::string &x) = 0;
2495
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002496 virtual void compound_statement_begin() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002497
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002498 virtual void compound_statement_end() = 0;
2499
2500 // Operations
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002501 virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002502
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002503 virtual void op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002504
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002505 virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002506
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002507 virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002508
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002509 virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002510
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002511 virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002512
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002513 virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002514
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002515 virtual void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002516
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002517 virtual void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002518
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002519 virtual void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name) = 0;
2520
2521 virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
2522
2523 virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
2524
2525 virtual void op_else_header() = 0;
2526
Nikolaj Jensenfab6c212023-06-27 14:13:24 +01002527 virtual void op_for_loop_header(const Operand &var_name, BinaryOp cond_op, const Operand &cond_value, const Operand &update_var, AssignmentOp update_op, const Operand &update_value) = 0;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002528
2529 virtual void op_load_indirect(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y_indirect, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002530
2531 virtual void op_load_immediate(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32), const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0;
2532
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002533 virtual void op_store_immediate(const TensorOperand &tensor, const Operand &src, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002534
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002535 virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002536
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002537 virtual void op_return() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002538
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002539 // Utils
2540 // It is the process of converting
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002541 virtual void util_get_indirect_buffer(const Operand &dst, const TensorOperand &tensor, const Operand &x,
2542 const Operand &y, const Operand &x_off, const Operand &y_off) = 0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002543};
2544
2545enum class GpuLoadStoreType
2546{
2547 Load = 1,
2548 Store = 2
2549};
2550
2551class IGpuLoadStoreHelperWriter
2552{
2553public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002554 IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type)
2555 : _writer(x), _mapper(mapper), _type(type)
2556 {
2557 }
2558
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002559 IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002560
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002561 IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002562
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002563 virtual ~IGpuLoadStoreHelperWriter() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002564
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002565 virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002566
2567 virtual void write(const std::pair<int32_t, std::string> &y) = 0;
2568
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002569 virtual void finalize() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002570
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002571protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002572 IGpuKernelWriter *_writer;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002573 GpuTensor3dMapper _mapper;
2574 GpuLoadStoreType _type;
2575};
2576
2577class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter
2578{
2579public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002580 ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
2581 : IGpuLoadStoreHelperWriter(x, mapper, type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002582 {
2583 }
2584
2585 ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002586
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002587 ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default;
2588
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002589 static bool
2590 validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002591 {
2592 CKW_UNUSED(x, type, dst);
2593
2594 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr)
2595 {
2596 return false;
2597 }
2598 return true;
2599 }
2600
2601 void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
2602 {
2603 assert(validate(_writer, _mapper, _type, dst));
2604
2605 _dst = dst;
2606 _ls_width_full = dst->format().w;
2607
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002608 _coord_x = x->scalar(0, 0).str;
2609 _coord_z = z->scalar(0, 0).str;
2610 _coord_b = b->scalar(0, 0).str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002611 _coord_orig_z = _coord_z;
2612
2613 out_of_bound_initialize_x(_coord_x);
2614 out_of_bound_initialize_z(_coord_z);
2615
2616 /*
2617 meaning of else:
2618 - x: partial load/store
2619 - y: no load/store operation
2620 - z: no load/store operation
2621 if(x)
2622 {
2623 if(z)
2624 {
2625 if(y)
2626 {
2627 // full load/store width
2628 }
2629 else
2630 {
2631 // no load/store
2632 }
2633 }
2634 else
2635 {
2636 // no load/store
2637 }
2638 }
2639 else
2640 {
2641 if(z)
2642 {
2643 if(y)
2644 {
2645 // partial load/store width
2646 }
2647 else
2648 {
2649 // no load/store
2650 }
2651 }
2652 else
2653 {
2654 // no load/store
2655 }
2656 }
2657 */
2658 }
2659
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002660 void write(const std::pair<int32_t, std::string> &y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002661 {
2662 int32_t idx_y = y.first;
2663 std::string coord_y = y.second;
2664
2665 // The only check required is on Y.
2666 out_of_bound_initialize_y(coord_y);
2667
2668 const std::string dst = _dst->vector(idx_y).str;
2669 const std::string address = to_ls_buffer_address(_coord_x, coord_y, _coord_z, _coord_b);
2670 const std::string ls_buf = to_ls_buffer(_type, _ls_width_full, dst, address);
2671
2672 _writer->write_text(ls_buf);
2673 _writer->write_text(";\n");
2674
2675 out_of_bound_finalize_y(dst);
2676
2677 // The left over load/store will be written in the finalize stage
2678 if(_ls_width_part.size() != 0)
2679 {
2680 int32_t w = 0;
2681 for(auto &p : _ls_width_part)
2682 {
2683 const std::string dst0 = _dst->vector(w, p, idx_y).str;
2684 const std::string coord_x = _coord_x + " + " + std::to_string(w);
2685 const std::string address = to_ls_buffer_address(coord_x, coord_y, _coord_z, _coord_b);
2686 const std::string ls_buf0 = to_ls_buffer(_type, p, dst0, address);
2687 _leftovers_x.push_back(std::make_pair(std::make_pair(dst0, coord_y), ls_buf0));
2688
2689 w += p;
2690 }
2691 }
2692 }
2693
2694 void finalize() override
2695 {
2696 out_of_bound_finalize_z();
2697 out_of_bound_finalize_x();
2698 }
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002699
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002700private:
2701 IVectorTile *_dst{ nullptr };
2702 int32_t _ls_width_full{ 0 };
2703 std::vector<int32_t> _ls_width_part{};
2704 std::vector<std::pair<std::pair<std::string, std::string>, std::string>> _leftovers_x{};
2705 std::string _coord_x{};
2706 std::string _coord_z{};
2707 std::string _coord_orig_z{};
2708 std::string _coord_b{};
2709
2710 void out_of_bound_initialize_x(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002711 {
2712 if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
2713 {
2714 auto tensor_format = _mapper.tensor_argument()->format();
2715 auto shape = tensor_format.shape;
2716
2717 _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full);
2718 if(_ls_width_part.size() != 0)
2719 {
2720 _writer->write_text("if(" + coord + " > 0)\n");
2721 _writer->compound_statement_begin();
2722 }
2723 }
2724 };
2725
2726 void out_of_bound_finalize_x()
2727 {
2728 if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
2729 {
2730 if(_ls_width_part.size() != 0)
2731 {
2732 _writer->compound_statement_end();
2733 _writer->write_text("else\n");
2734 _writer->compound_statement_begin();
2735
2736 out_of_bound_initialize_z(_coord_orig_z);
2737 for(auto &i : _leftovers_x)
2738 {
2739 out_of_bound_initialize_y(i.first.second);
2740 _writer->write_text(i.second);
2741 _writer->write_text(";\n");
2742 out_of_bound_finalize_y(i.first.first);
2743 }
2744 out_of_bound_finalize_z();
2745 _writer->compound_statement_end();
2746 }
2747 }
2748 };
2749
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002750 void out_of_bound_initialize_y(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002751 {
2752 std::string max = "";
2753
2754 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
2755
2756 switch(address_mode_y)
2757 {
2758 case TensorSamplerAddressModeY::Skip:
2759 case TensorSamplerAddressModeY::ClampToBorder:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002760 // NOTE: This line should not be moved outside of the switch statement.
2761 // The reason for that is because when we query the component, the component is marked as used
2762 // and added to the list of arguments of the kernel. Since, not in all cases this component is required,
2763 // we should request the component only when used
2764 max = _mapper.tensor_component_y();
2765 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
2766 _writer->compound_statement_begin();
2767 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002768 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
2769 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002770 _writer->write_text("if(" + coord + " >= 0)\n");
2771 _writer->compound_statement_begin();
2772 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002773 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
2774 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002775 max = _mapper.tensor_component_y();
2776 _writer->write_text("if(" + coord + " < " + max + ")\n");
2777 _writer->compound_statement_begin();
2778 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002779 case TensorSamplerAddressModeY::ClampToNearest:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002780 max = _mapper.tensor_component_y();
2781 coord = "clamp(" + coord + ", 0, " + max + " - 1)";
2782 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002783 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002784 max = _mapper.tensor_component_y();
2785 coord = "min(" + coord + ", " + max + " - 1)";
2786 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002787 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002788 coord = "max(" + coord + ", 0)";
2789 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002790 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002791 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002792 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002793 std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
2794 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002795 }
2796 };
2797
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002798 void out_of_bound_finalize_y(const std::string &dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002799 {
2800 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
2801
2802 switch(address_mode_y)
2803 {
2804 case TensorSamplerAddressModeY::ClampToBorder:
2805 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
2806 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
2807 case TensorSamplerAddressModeY::Skip:
2808 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
2809 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002810 _writer->compound_statement_end();
2811 break;
SiCong Li16b37522023-07-18 17:56:49 +01002812 case TensorSamplerAddressModeY::None:
2813 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002814
2815 default:
2816 assert(false);
2817 }
2818
2819 switch(address_mode_y)
2820 {
2821 case TensorSamplerAddressModeY::ClampToBorder:
2822 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
2823 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002824 _writer->write_text("else\n");
2825 _writer->compound_statement_begin();
2826 _writer->write_text(dst);
2827 _writer->write_text(" = 0.0f;\n");
2828 _writer->compound_statement_end();
2829 break;
SiCong Li16b37522023-07-18 17:56:49 +01002830 case TensorSamplerAddressModeY::None:
2831 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002832
2833 default:
2834 assert(false);
2835 }
2836 };
2837
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002838 void out_of_bound_initialize_z(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002839 {
2840 std::string max = "";
2841
2842 const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
2843
2844 switch(address_mode_z)
2845 {
2846 case TensorSamplerAddressModeZ::Skip:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002847 max = _mapper.tensor_component_z();
2848 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
2849 _writer->compound_statement_begin();
2850 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002851 case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002852 _writer->write_text("if(" + coord + " >= 0)\n");
2853 _writer->compound_statement_begin();
2854 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002855 case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002856 max = _mapper.tensor_component_z();
2857 _writer->write_text("if(" + coord + " < " + max + ")\n");
2858 _writer->compound_statement_begin();
2859 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002860 case TensorSamplerAddressModeZ::ClampToNearest:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002861 max = _mapper.tensor_component_z();
2862 coord = "clamp(" + coord + ", 0, " + max + " - 1)";
2863 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002864 case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002865 max = _mapper.tensor_component_z();
2866 coord = "min(" + coord + ", " + max + " - 1)";
2867 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002868 case TensorSamplerAddressModeZ::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002869 coord = "max(" + coord + ", 0)";
2870 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002871 case TensorSamplerAddressModeZ::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002872 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002873 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002874 std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
2875 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002876 }
2877 };
2878
2879 void out_of_bound_finalize_z()
2880 {
2881 const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
2882
2883 switch(address_mode_z)
2884 {
2885 case TensorSamplerAddressModeZ::Skip:
2886 case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
2887 case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002888 _writer->compound_statement_end();
2889 break;
SiCong Li16b37522023-07-18 17:56:49 +01002890 case TensorSamplerAddressModeZ::None:
2891 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002892
2893 default:
2894 assert(false);
2895 }
2896 };
2897
2898 std::vector<int32_t> decompose_leftover_ls_vector_width(int32_t ls_leftover_vector_width) const
2899 {
2900 std::vector<int32_t> x;
2901
2902 switch(ls_leftover_vector_width)
2903 {
2904 case 0:
2905 break;
2906 case 1:
2907 case 2:
2908 case 3:
2909 case 4:
2910 case 8:
2911 case 16:
2912 x.push_back(ls_leftover_vector_width);
2913 break;
2914 case 5:
2915 x.push_back(4);
2916 x.push_back(1);
2917 break;
2918 case 6:
2919 x.push_back(4);
2920 x.push_back(2);
2921 break;
2922 case 7:
2923 x.push_back(4);
2924 x.push_back(3);
2925 break;
2926 case 9:
2927 x.push_back(8);
2928 x.push_back(1);
2929 break;
2930 case 10:
2931 x.push_back(8);
2932 x.push_back(2);
2933 break;
2934 case 11:
2935 x.push_back(8);
2936 x.push_back(3);
2937 break;
2938 case 12:
2939 x.push_back(8);
2940 x.push_back(4);
2941 break;
2942 case 13:
2943 x.push_back(8);
2944 x.push_back(4);
2945 x.push_back(1);
2946 break;
2947 case 14:
2948 x.push_back(8);
2949 x.push_back(4);
2950 x.push_back(2);
2951 break;
2952 case 15:
2953 x.push_back(8);
2954 x.push_back(4);
2955 x.push_back(3);
2956 break;
2957
2958 default:
2959 assert(false);
2960 }
2961 return x;
2962 }
2963
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002964 std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data,
2965 const std::string &address)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002966 {
2967 switch(type)
2968 {
2969 case GpuLoadStoreType::Load:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002970 if(vector_width != 1)
2971 {
2972 return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
2973 }
2974 else
2975 {
2976 return data + " = *(" + address + ")";
2977 }
2978 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002979 case GpuLoadStoreType::Store:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002980 if(vector_width != 1)
2981 {
2982 return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
2983 }
2984 else
2985 {
2986 return "*(" + address + ") = " + data;
2987 }
2988 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002989 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002990 std::cout << "Unsupported GpuLoadStoreType" << std::endl;
2991 assert(false);
2992 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002993 }
2994 }
2995
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002996 std::string to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z,
2997 const std::string &b) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002998 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002999 auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003000 assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003001 const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage);
3002 const std::string dst_type = get_cl_data_type(_dst->format().dt, 1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003003
3004 std::string address;
3005 address += "(__global ";
3006 address += dst_type;
3007 address += "*)(";
3008 address += ptr_buf;
3009 if(x != "0" && (_mapper.is_one_component_x() != true))
3010 {
3011 address += " + (";
3012 address += x + ") * sizeof(" + dst_type + ")";
3013 }
Jakub Sujake1c96e72023-07-31 13:36:58 +01003014 if(y != "0")
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003015 {
3016 const std::string stride_y = _mapper.tensor_component_stride_y();
3017 address += " + (";
3018 address += y + ")";
3019 address += " * ";
3020 address += stride_y;
3021 }
3022 if(z != "0" && (_mapper.is_one_component_z() != true))
3023 {
3024 const std::string stride_z = _mapper.tensor_component_stride_z();
3025 address += " + (";
3026 address += z + ")";
3027 address += " * ";
3028 address += stride_z;
3029 }
3030 if(b != "0" && (_mapper.is_one_component_batch() != true))
3031 {
3032 const std::string stride_b = _mapper.tensor_component_stride_batch();
3033 address += " + (";
3034 address += b + ")";
3035 address += " * ";
3036 address += stride_b;
3037 }
3038 address += ")";
3039 return address;
3040 }
3041};
3042
3043class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter
3044{
3045public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003046 static bool
3047 validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003048 {
3049 CKW_UNUSED(x);
3050
3051 if(dst->format().w != 4)
3052 {
3053 return false;
3054 }
3055 if(mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None)
3056 {
3057 return false;
3058 }
3059 if(mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None)
3060 {
3061 return false;
3062 }
3063 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load)
3064 {
3065 return false;
3066 }
3067 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && type == GpuLoadStoreType::Store)
3068 {
3069 return false;
3070 }
3071 if((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16))
3072 {
3073 return false;
3074 }
3075 return true;
3076 /*
3077 - x: Only GpuSamplerAddressModeX::None is supported and vector length = 4
3078 - z: Only GpuSamplerAddressModeZ::None is supported
3079 */
3080 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003081
3082 ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
3083 : IGpuLoadStoreHelperWriter(x, mapper, type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003084 {
3085 }
3086
3087 ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003088
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003089 ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default;
3090
3091 void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
3092 {
3093 assert(validate(_writer, _mapper, _type, dst));
3094
3095 _dst = dst;
3096 _ls_width_full = dst->format().w;
3097 _coord_x = x->scalar(0, 0).str;
3098 _coord_z = z->scalar(0, 0).str;
3099 _coord_b = b->scalar(0, 0).str;
3100
3101 /*
3102 if(y)
3103 {
3104 // full load/store width
3105 }
3106 else
3107 {
3108 // no load/store
3109 }
3110 */
3111 }
3112
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003113 void write(const std::pair<int32_t, std::string> &y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003114 {
3115 int32_t idx_y = y.first;
3116 std::string coord_y = y.second;
3117
3118 // The only check required is on Y.
3119 out_of_bound_initialize_y(coord_y);
3120
3121 const std::string dst = _dst->vector(idx_y).str;
3122 const std::string sampler = to_ls_image2d_sampler();
3123 const std::string coord = to_ls_image2d_coord(_coord_x, coord_y, _coord_z, _coord_b);
3124 const std::string ls_buf = to_ls_image2d(_type, _ls_width_full, dst, sampler, coord);
3125
3126 _writer->write_text(ls_buf);
3127 _writer->write_text(";\n");
3128
3129 out_of_bound_finalize_y(dst);
3130 }
3131
3132 void finalize() override
3133 {
3134 }
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003135
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003136private:
3137 IVectorTile *_dst{ nullptr };
3138 int32_t _ls_width_full{ 0 };
3139 std::string _coord_x{};
3140 std::string _coord_z{};
3141 std::string _coord_b{};
3142
3143 void out_of_bound_initialize_y(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003144 {
3145 std::string max = "";
3146
3147 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3148
3149 switch(address_mode_y)
3150 {
3151 case TensorSamplerAddressModeY::Skip:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003152 max = _mapper.tensor_component_y();
3153 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
3154 _writer->compound_statement_begin();
3155 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003156 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003157 _writer->write_text("if(" + coord + " >= 0)\n");
3158 _writer->compound_statement_begin();
3159 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003160 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003161 max = _mapper.tensor_component_y();
3162 _writer->write_text("if(" + coord + " < " + max + ")\n");
3163 _writer->compound_statement_begin();
3164 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003165 case TensorSamplerAddressModeY::ClampToBorder:
3166 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
3167 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
3168 case TensorSamplerAddressModeY::ClampToNearest:
3169 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
3170 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
3171 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003172 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003173 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003174 std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl;
3175 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003176 }
3177 };
3178
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003179 void out_of_bound_finalize_y(const std::string &dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003180 {
3181 CKW_UNUSED(dst);
3182
3183 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3184
3185 switch(address_mode_y)
3186 {
3187 case TensorSamplerAddressModeY::Skip:
3188 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
3189 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003190 _writer->compound_statement_end();
3191 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003192
3193 default:
3194 assert(false);
3195 }
3196 };
3197
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003198 std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string &data,
3199 const std::string &sampler, const std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003200 {
3201 CKW_UNUSED(vector_width);
3202
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003203 auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
3204 const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003205 const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003206
3207 switch(type)
3208 {
3209 case GpuLoadStoreType::Load:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003210 return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")";
3211 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003212 case GpuLoadStoreType::Store:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003213 return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003214 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003215 assert(false);
3216 std::cout << "Unsupported GpuLoadStoreType" << std::endl;
3217 assert(false);
3218 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003219 }
3220 }
3221
3222 std::string to_ls_image2d_sampler() const
3223 {
3224 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3225
3226 switch(address_mode_y)
3227 {
3228 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003229 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003230 case TensorSamplerAddressModeY::Skip:
3231 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
3232 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
3233 case TensorSamplerAddressModeY::ClampToBorder:
3234 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
3235 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003236 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003237 case TensorSamplerAddressModeY::ClampToNearest:
3238 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
3239 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003240 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003241 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003242 std::cout << "Unsupported address_mode_coord" << std::endl;
3243 assert(false);
3244 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003245 }
3246 }
3247
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003248 std::string to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z,
3249 const std::string &b) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003250 {
3251 std::string coord_x = "(" + x + ") >> 2";
3252 std::string coord_y = "(";
3253
Jakub Sujake1c96e72023-07-31 13:36:58 +01003254 if(y != "0")
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003255 {
3256 coord_y += y;
3257 }
3258 if(z != "0" && (_mapper.is_one_component_z() != true))
3259 {
3260 const std::string dim = _mapper.tensor_component_y();
3261 coord_y += " + (";
3262 coord_y += z + ")";
3263 coord_y += " * ";
3264 coord_y += dim;
3265 }
3266 if(b != "0" && (_mapper.is_one_component_batch() != true))
3267 {
3268 const std::string dim0 = _mapper.tensor_component_y();
3269 const std::string dim1 = _mapper.tensor_component_z();
3270 coord_y += " + (";
3271 coord_y += b + ")";
3272 coord_y += " * ";
3273 coord_y += dim0;
3274 coord_y += " * ";
3275 coord_y += dim1;
3276 }
3277 coord_y += ")";
3278 return "(int2)(" + coord_x + ", " + coord_y + ")";
3279 }
3280};
3281
3282/** IGpuLoadStoreHelperWriter factory class */
3283class ClLoadStoreHelperWriterFactory final
3284{
3285public:
3286 /** Static method to call the IGpuLoadStoreHelperWriter class accordingly with the tensor storage set in the mapper
3287 *
3288 *
3289 * @return IGpuLoadStoreHelperWriter
3290 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003291 static std::unique_ptr<IGpuLoadStoreHelperWriter>
3292 create(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003293 {
3294 const auto tensor_storage = mapper.gpu_sampler().storage;
3295 switch(tensor_storage)
3296 {
3297 case GpuSamplerTensorStorage::BufferUint8Ptr:
3298 return std::make_unique<ClLoadStoreBufferHelperWriter>(x, mapper, type);
3299 case GpuSamplerTensorStorage::Image2dReadOnly:
3300 case GpuSamplerTensorStorage::Image2dWriteOnly:
3301 return std::make_unique<ClLoadStoreImage2dHelperWriter>(x, mapper, type);
3302 default:
3303 std::cout << "Unsupported Gpu tensor storage" << std::endl;
3304 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01003305 return nullptr;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003306 }
3307 }
3308};
3309
3310// This utility method needs to go in utils.h
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003311inline bool is_tile_scalar(const IVectorTile *x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003312{
3313 return x->format().w == 1 && x->format().h == 1;
3314}
3315
3316class ClKernelWriter : public IGpuKernelWriter
3317{
3318public:
3319 ClKernelWriter(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
3320 {
3321 _data = x;
3322 _attr = attr;
3323 }
3324
3325 ClKernelWriter(const ClKernelWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003326
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003327 ClKernelWriter &operator=(const ClKernelWriter &) = default;
3328
3329 // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure
3330 // there are no conflicts or ambiguity in the code
3331 void set_IdSpace(int32_t id) override
3332 {
3333 _data->tiles.set_IdSpace(id);
3334 _data->arguments.set_IdSpace(id);
3335 }
3336
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003337 void import_tile(const std::string &dst_name, const IVectorTile *src) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003338 {
3339 _data->tiles.insert(dst_name, src);
3340 }
3341
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003342 void declare_argument(const std::string &name, const TensorInfo &tensor) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003343 {
3344 assert(_data->arguments[name] == nullptr);
3345 _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value);
3346 }
3347
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003348 void declare_tile(const std::string &name, const TileInfo &format) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003349 {
3350 assert(_data->tiles[name] == nullptr);
3351 _data->tiles.insert(name, format);
3352
3353 IVectorTile *x = _data->tiles[name];
3354
3355 for(auto &t : x->underlying_source_variables())
3356 {
3357 _data->code += t.type.str + " " + t.str + ";\n";
3358 }
3359 }
3360
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003361 void declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in,
3362 DataType dt) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003363 {
3364 assert(_data->tiles[name] == nullptr);
3365 _data->tiles.insert(name, in, dt);
3366 // Note: A constant does not need to be declared in the code
3367 }
3368
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003369 void write_text(const std::string &x) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003370 {
3371 _data->code += x;
3372 }
3373
3374 void compound_statement_begin() override
3375 {
3376 _data->tiles.increment_registry_level();
3377 _data->code += "{\n";
3378 }
3379
3380 void compound_statement_end() override
3381 {
3382 _data->tiles.decrement_registry_level();
3383 _data->code += "}\n";
3384 }
3385
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003386 void op_get_global_id(const Operand &dst_var, int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003387 {
3388 assert(dst_var.type() == OperandType::Tile);
3389 assert(_data->tiles.has_tile(dst_var.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003390 assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003391
3392 auto var = _data->tiles[dst_var.value()];
3393
3394 _data->code += var->scalar(0, 0).str;
3395 _data->code += " = get_global_id(";
3396 _data->code += std::to_string(dim);
3397 _data->code += ");\n";
3398 };
3399
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003400 void op_get_global_coord(const Operand &o_dst, const Operand &o_step, const TensorOperand &o_tensor,
3401 int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003402 {
3403 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003404 auto dst = operands.unpack(o_dst);
3405 auto step = operands.unpack(o_step);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003406
3407 // Validation: Check that x, y and z are scalar
3408
3409 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003410 auto tensor = tensor_operands.unpack(o_tensor);
3411 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003412
3413 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3414
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003415 switch(dim)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003416 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003417 case 0:
3418 if(mapper.is_one_component_x())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003419 {
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003420 _data->code += dst->scalar(0, 0).str;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003421 _data->code += " = 0;\n";
3422 }
3423 else
3424 {
3425 if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
3426 {
3427 // Validation: Check: fixed tensor shape
3428 // TO BE CHANGED
3429 _data->code += dst->scalar(0, 0).str;
3430 _data->code += " = get_global_id(0) * ";
3431 _data->code += step->scalar(0, 0).str;
3432 _data->code += ";\n";
3433 }
3434 else
3435 {
3436 _data->code += dst->scalar(0, 0).str;
3437 _data->code += " = get_global_id(0) * ";
3438 _data->code += step->scalar(0, 0).str;
3439 _data->code += ";\n";
3440 }
3441 }
3442 break;
3443 case 1:
3444 if(mapper.is_one_component_y())
3445 {
3446 _data->code += dst->scalar(0, 0).str;
3447 _data->code += " = 0;\n";
3448 }
3449 else
3450 {
3451 if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin)
3452 {
3453 }
3454 else
3455 {
3456 _data->code += dst->scalar(0, 0).str;
3457 _data->code += " = get_global_id(1) * ";
3458 _data->code += step->scalar(0, 0).str;
3459 _data->code += ";\n";
3460 }
3461 }
3462 break;
3463 case 2:
3464 if(mapper.is_one_component_z())
3465 {
3466 _data->code += dst->scalar(0, 0).str;
3467 _data->code += " = 0;\n";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003468 }
3469 else
3470 {
3471 _data->code += dst->scalar(0, 0).str;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003472 _data->code += " = get_global_id(2) * ";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003473 _data->code += step->scalar(0, 0).str;
3474 _data->code += ";\n";
3475 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003476 break;
3477 default:
3478 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003479 }
3480 };
3481
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003482 void op_get_global_batch(const Operand &o_dst, const TensorOperand &o_tensor) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003483 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003484 OperandUnpacker operands(_data->tiles, _data->arguments);
3485 const IVectorTile *dst = operands.unpack(o_dst);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003486
3487 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003488 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003489 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003490
3491 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3492
3493 if(mapper.is_one_component_batch())
3494 {
3495 _data->code += dst->scalar(0, 0).str;
3496 _data->code += " = 0;\n";
3497 }
3498 else
3499 {
3500 std::cout << "Unsupported batched computation" << std::endl;
3501 assert(false);
3502 }
3503 };
3504
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003505 void op_get_global_size(const Operand &dst_var, int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003506 {
3507 assert(dst_var.type() == OperandType::Tile);
3508 assert(_data->tiles.has_tile(dst_var.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003509 assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003510
3511 auto var = _data->tiles[dst_var.value()];
3512
3513 _data->code += var->scalar(0, 0).str;
3514 _data->code += " = get_global_size(";
3515 _data->code += std::to_string(dim);
3516 _data->code += ");\n";
3517 }
3518
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003519 void op_unary_expression(const Operand &dst_name, UnaryOp op, const Operand &src_name) override
3520 {
3521 OperandUnpacker operands(_data->tiles, _data->arguments);
3522 const IVectorTile *src = operands.unpack(src_name);
3523 const IVectorTile *dst = operands.unpack(dst_name);
3524
3525 const int32_t dst_w = dst->format().w;
3526 const int32_t dst_h = dst->format().h;
3527 const int32_t src_w = src->format().w;
3528 const std::string dt = dst->underlying_source_variables()[0].type.str;
3529
3530 const bool broadcast_src_x = dst_w != 1 && src_w == 1;
3531
3532 const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
3533
3534 // Broadcasting on Y is automatic
3535 for(int32_t y = 0; y < dst_h; ++y)
3536 {
3537 _data->code += dst->vector(y).str;
3538 _data->code += " = ";
3539 _data->code += to_string(op);
3540 _data->code += src_prefix + src->vector(y).str;
3541 _data->code += ";\n";
3542 }
3543 }
3544
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003545 void op_binary_expression(const Operand &dst_name, const Operand &lhs_name, BinaryOp op,
3546 const Operand &rhs_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003547 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003548 OperandUnpacker operands(_data->tiles, _data->arguments);
3549 const IVectorTile *lhs = operands.unpack(lhs_name);
3550 const IVectorTile *rhs = operands.unpack(rhs_name);
3551 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003552
3553 const int32_t dst_w = dst->format().w;
3554 const int32_t dst_h = dst->format().h;
3555 assert(lhs != nullptr);
3556 const int32_t lhs_w = lhs->format().w;
3557 const int32_t rhs_w = rhs->format().w;
3558
3559 if(op == BinaryOp::MatMul_Nt_T)
3560 {
3561 assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16));
3562 for(int32_t y = 0; y < dst_h; ++y)
3563 {
3564 for(int32_t x = 0; x < dst_w; ++x)
3565 {
3566 for(int32_t k = 0; k < lhs_w; ++k)
3567 {
3568 _data->code += dst->scalar(x, y).str;
3569 _data->code += " = fma(";
3570 _data->code += lhs->scalar(k, y).str;
3571 _data->code += ", ";
3572 _data->code += rhs->scalar(k, x).str;
3573 _data->code += ", ";
3574 _data->code += dst->scalar(x, y).str;
3575 _data->code += ");\n";
3576 }
3577 }
3578 }
3579
3580 return;
3581 }
3582
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003583 const bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1;
3584 const bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003585
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003586 const std::string lhs_prefix = broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
3587 const std::string rhs_prefix = broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
3588 const std::string op_str = to_string(op);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003589
3590 // Broadcasting on Y is automatic
3591 for(int32_t y = 0; y < dst_h; ++y)
3592 {
3593 _data->code += dst->vector(y).str;
3594 _data->code += " = ";
3595 _data->code += lhs_prefix + lhs->vector(y).str;
3596 _data->code += " ";
3597 _data->code += op_str;
3598 _data->code += " ";
3599 _data->code += rhs_prefix + rhs->vector(y).str;
3600 _data->code += ";\n";
3601 }
3602 };
3603
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003604 void op_cast_expression(const Operand &o_dst, const Operand &o_src, ConvertPolicy policy) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003605 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003606 OperandUnpacker operands(_data->tiles, _data->arguments);
3607 const IVectorTile *src = operands.unpack(o_src);
3608 const IVectorTile *dst = operands.unpack(o_dst);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003609 // const int32_t dst_w = dst->format().w;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003610 const int32_t dst_h = dst->format().h;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003611 const std::string dt = dst->underlying_source_variables()[0].type.str;
Adnan AlSinan66f3d382023-07-10 15:07:45 +01003612 const bool is_float = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16);
3613 const std::string sat = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : "");
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003614
3615 // Broadcasting on Y is automatic
3616 for(int32_t y = 0; y < dst_h; ++y)
3617 {
3618 _data->code += dst->vector(y).str;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003619 _data->code += " = convert_" + dt + sat + "(";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003620 _data->code += src->vector(y).str;
3621 _data->code += ");\n";
3622 }
3623 };
3624
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003625 void op_assign(const Operand &dst_name, const Operand &src_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003626 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003627 OperandUnpacker operands(_data->tiles, _data->arguments);
3628 const IVectorTile *src = operands.unpack(src_name);
3629 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003630
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003631 const int32_t dst_w = dst->format().w;
3632 const int32_t dst_h = dst->format().h;
3633 const int32_t src_w = src->format().w;
3634 const std::string dt = dst->underlying_source_variables()[0].type.str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003635
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003636 const bool broadcast_src_x = dst_w != 1 && src_w == 1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003637
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003638 const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003639
3640 // Broadcasting on Y is automatic
3641 for(int32_t y = 0; y < dst_h; ++y)
3642 {
3643 _data->code += dst->vector(y).str;
3644 _data->code += " = ";
3645 _data->code += src_prefix + src->vector(y).str;
3646 _data->code += ";\n";
3647 }
3648 }
3649
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003650 void
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003651 op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003652 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003653 OperandUnpacker operands(_data->tiles, _data->arguments);
3654 const IVectorTile *src = operands.unpack(src_name);
3655 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003656
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003657 const int32_t dst_h = dst->format().h;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003658 const std::string dt = dst->underlying_source_variables()[0].type.str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003659
SiCong Li16b37522023-07-18 17:56:49 +01003660 // Always perform an explicit cast. This automatically covers at least the 2 scenarios:
3661 // 1. Widen a scalar into a vector type. This enables scalar-vector broadcasting
3662 // 2. Ensure non-ambiguity over function overloads.
3663 // E.g. a constant tile may be accidentally initialized with a double literal. By casting it to single float,
3664 // it avoids ambiguous function calls
3665 const std::string src_prefix = "(" + dt + ")";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003666
3667 // Broadcasting on Y is automatic
3668 for(int32_t y = 0; y < dst_h; ++y)
3669 {
3670 _data->code += dst->vector(y).str;
3671 _data->code += " = ";
3672
3673 switch(func)
3674 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003675 case UnaryFunction::Exp:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003676 _data->code += "exp(";
3677 break;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003678 case UnaryFunction::Tanh:
3679 _data->code += "tanh(";
3680 break;
3681 case UnaryFunction::Sqrt:
3682 _data->code += "sqrt(";
3683 break;
3684 case UnaryFunction::Erf:
3685 _data->code += "erf(";
3686 break;
3687 case UnaryFunction::Fabs:
3688 _data->code += "fabs(";
3689 break;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003690 case UnaryFunction::Log:
3691 _data->code += "log(";
3692 break;
3693 case UnaryFunction::SizeOf:
3694 _data->code += "sizeof(";
3695 break;
3696 case UnaryFunction::Round:
3697 _data->code += "round(";
3698 break;
Gunes Bayir91cb7332023-07-25 17:00:33 +01003699 case UnaryFunction::Floor:
3700 _data->code += "floor(";
3701 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003702 default:
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003703 CKW_ASSERT_MSG(false, "Unexpected UnaryFunction used.");
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003704 }
3705
3706 _data->code += src_prefix + src->vector(y).str;
3707 _data->code += ");\n";
3708 }
3709 }
3710
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003711 void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003712 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003713 OperandUnpacker operands(_data->tiles, _data->arguments);
3714 const IVectorTile *first = operands.unpack(first_name);
3715 const IVectorTile *second = operands.unpack(second_name);
3716 const IVectorTile *dst = operands.unpack(dst_name);
3717
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003718 const int32_t dst_h = dst->format().h;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003719 const auto datatype = dst->underlying_source_variables()[0].type;
3720 const std::string datatype_str = datatype.str;
3721
SiCong Li16b37522023-07-18 17:56:49 +01003722 // Always perform an explicit cast. See similar comments in op_unary_elementwise_function
3723 const std::string first_prefix = "(" + datatype_str + ")";
3724 const std::string second_prefix = "(" + datatype_str + ")";
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003725
3726 const bool is_float = (datatype.dt == DataType::Fp32 || datatype.dt == DataType::Fp16);
3727
3728 // Broadcasting on Y is automatic
3729 for(int32_t y = 0; y < dst_h; ++y)
3730 {
3731 _data->code += dst->vector(y).str;
3732 _data->code += " = ";
3733
3734 switch(func)
3735 {
3736 case BinaryFunction::Min:
3737 _data->code += is_float ? "fmin(" : "min(";
3738 break;
3739 case BinaryFunction::Max:
3740 _data->code += is_float ? "fmax(" : "max(";
3741 break;
3742 default:
3743 CKW_ASSERT_MSG(false, "Unexpected BinaryFunction used.");
3744 }
3745
3746 _data->code += first_prefix + first->vector(y).str;
3747 _data->code += ", ";
3748 _data->code += second_prefix + second->vector(y).str;
3749 _data->code += ");\n";
3750 }
3751 }
3752
3753 void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name) override
3754 {
3755 OperandUnpacker operands(_data->tiles, _data->arguments);
3756 const IVectorTile *first = operands.unpack(first_name);
3757 const IVectorTile *second = operands.unpack(second_name);
3758 const IVectorTile *third = operands.unpack(third_name);
3759 const IVectorTile *dst = operands.unpack(dst_name);
3760
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003761 const int32_t dst_h = dst->format().h;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003762 const std::string dt = dst->underlying_source_variables()[0].type.str;
3763
SiCong Li16b37522023-07-18 17:56:49 +01003764 // Always perform an explicit cast. See similar comments in op_unary_elementwise_function
3765 const std::string first_prefix = "(" + dt + ")";
3766 const std::string second_prefix = "(" + dt + ")";
3767 const std::string third_prefix = "(" + dt + ")";
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003768
3769 // Broadcasting on Y is automatic
3770 for(int32_t y = 0; y < dst_h; ++y)
3771 {
3772 _data->code += dst->vector(y).str;
3773 _data->code += " = ";
3774
3775 switch(func)
3776 {
3777 case TernaryFunction::Select:
3778 _data->code += "select(";
3779 break;
Gunes Bayir91cb7332023-07-25 17:00:33 +01003780 case TernaryFunction::Clamp:
3781 _data->code += "clamp(";
3782 break;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003783 default:
3784 CKW_ASSERT_MSG(false, "Unexpected TernaryFunction used.");
3785 }
3786
3787 _data->code += first_prefix + first->vector(y).str;
3788 _data->code += ", ";
3789 _data->code += second_prefix + second->vector(y).str;
3790 _data->code += ", ";
3791 _data->code += third_prefix + third->vector(y).str;
3792 _data->code += ");\n";
3793 }
3794 }
3795
3796 void op_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override
3797 {
3798 OperandUnpacker operands(_data->tiles, _data->arguments);
3799 const IVectorTile *lhs = operands.unpack(o_lhs);
3800 const IVectorTile *rhs = operands.unpack(o_rhs);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003801
3802 assert(is_tile_scalar(lhs));
3803 assert(is_tile_scalar(rhs));
3804
3805 _data->code += "if(";
3806 _data->code += lhs->scalar(0, 0).str;
3807 _data->code += " ";
3808 _data->code += to_string(op);
3809 _data->code += " ";
3810 _data->code += rhs->scalar(0, 0).str;
3811 _data->code += ")\n";
3812 }
3813
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003814 void op_else_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003815 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003816 _data->code += "else ";
3817 op_if_header(o_lhs, op, o_rhs);
3818 }
3819
3820 void op_else_header() override
3821 {
3822 _data->code += "else\n";
3823 }
3824
Nikolaj Jensenfab6c212023-06-27 14:13:24 +01003825 void op_for_loop_header(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value_name, const Operand &update_var_name, AssignmentOp update_op, const Operand& update_value_name) override
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003826 {
3827 OperandUnpacker operands(_data->tiles, _data->arguments);
3828 const IVectorTile *var = operands.unpack(var_name);
3829 const IVectorTile *cond_value = operands.unpack(cond_value_name);
Nikolaj Jensenfab6c212023-06-27 14:13:24 +01003830 const IVectorTile *update_var = operands.unpack(update_var_name);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003831 const IVectorTile *update_value = operands.unpack(update_value_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003832
3833 const int32_t dst_w = var->format().w;
3834 const int32_t dst_h = var->format().h;
3835
3836 // It must be a scalar variable
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01003837 CKW_UNUSED(dst_w, dst_h);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003838 assert(dst_w == 1);
3839 assert(dst_h == 1);
3840
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003841 _data->code += "for(; ";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003842 _data->code += var->scalar(0, 0).str;
3843 _data->code += " ";
3844 _data->code += to_string(cond_op);
3845 _data->code += " " + cond_value->scalar(0, 0).str + "; ";
Nikolaj Jensenfab6c212023-06-27 14:13:24 +01003846 _data->code += update_var->scalar(0, 0).str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003847 _data->code += " ";
3848 _data->code += to_string(update_op);
3849 _data->code += " " + update_value->scalar(0, 0).str + ")";
3850 _data->code += "\n";
3851 }
3852
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003853 void op_load_immediate(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x,
3854 const Operand &o_y, const Operand &o_z, const Operand &o_batch_idx,
3855 const Operand &dilation_y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003856 {
3857 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003858
3859 // Not const as it requires changes to 'load_writer'.
3860 IVectorTile *dst = operands.unpack(o_dst);
3861 IVectorTile *x = operands.unpack(o_x);
3862 IVectorTile *y = operands.unpack(o_y);
3863 IVectorTile *z = operands.unpack(o_z);
3864 IVectorTile *dil_y = operands.unpack(dilation_y);
3865 IVectorTile *b = operands.unpack(o_batch_idx);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003866
3867 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003868 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003869 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003870
3871 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3872
3873 auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
3874
3875 // Initialize the constant part
3876 load_writer->initialize(dst, x, z, b);
3877
3878 for(int i = 0; i < dst->format().h; ++i)
3879 {
3880 std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i);
3881 if(dil_y->scalar(0, 0).str != "1")
3882 {
3883 coord_y += " * " + dil_y->scalar(0, 0).str;
3884 }
3885 load_writer->write(std::make_pair(i, coord_y));
3886 }
3887
3888 load_writer->finalize();
3889 }
3890
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003891 void op_load_indirect(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x,
3892 const Operand &o_indirect_h, const Operand &o_z,
3893 const Operand &o_batch_idx) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003894 {
3895 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003896
3897 // Not const as it requires changes to 'load_writer'.
3898 IVectorTile *dst = operands.unpack(o_dst);
3899 IVectorTile *x = operands.unpack(o_x);
3900 IVectorTile *y_ind = operands.unpack(o_indirect_h);
3901 IVectorTile *z = operands.unpack(o_z);
3902 IVectorTile *b = operands.unpack(o_batch_idx);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003903
3904 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003905 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003906 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003907
3908 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3909
3910 auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
3911
3912 // Initialize the constant part
3913 load_writer->initialize(dst, x, z, b);
3914
3915 for(int i = 0; i < dst->format().h; ++i)
3916 {
3917 load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str));
3918 }
3919
3920 load_writer->finalize();
3921 }
3922
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003923 void op_store_immediate(const TensorOperand &tensor_name, const Operand &src_name, const Operand &x_name,
3924 const Operand &y_name, const Operand &z_name,
3925 const Operand &batch_index_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003926 {
3927 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003928
3929 // Not const as it requires changes to 'load_writer'.
3930 IVectorTile *src = operands.unpack(src_name);
3931 IVectorTile *x = operands.unpack(x_name);
3932 IVectorTile *y = operands.unpack(y_name);
3933 IVectorTile *z = operands.unpack(z_name);
3934 IVectorTile *b = operands.unpack(batch_index_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003935
3936 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003937 IGpuTensorArgument *tensor = tensor_operands.unpack(tensor_name);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003938 auto gpu_sampler = tensor_name.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003939
3940 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3941
3942 auto store_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Store);
3943
3944 // Initialize the constant part
3945 store_writer->initialize(src, x, z, b);
3946
3947 int32_t tile_h = src->format().h;
3948
3949 for(int m0 = tile_h - 1; m0 >= 0; m0--)
3950 {
3951 store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0)));
3952 }
3953
3954 store_writer->finalize();
3955 }
3956
3957 void op_return() override
3958 {
3959 _data->code += "return;\n";
3960 }
3961
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003962 void util_get_indirect_buffer(const Operand &o_dst, const TensorOperand &o_tensor, const Operand &o_x,
3963 const Operand &o_y, const Operand &o_x_off, const Operand &o_y_off) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003964 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003965 OperandUnpacker operands(_data->tiles, _data->arguments);
3966 const IVectorTile *dst = operands.unpack(o_dst);
3967 const IVectorTile *x = operands.unpack(o_x);
3968 const IVectorTile *y = operands.unpack(o_y);
3969 const IVectorTile *x_off = operands.unpack(o_x_off);
3970 const IVectorTile *y_off = operands.unpack(o_y_off);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003971
3972 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003973 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003974
3975 assert(dst->format().w == 1);
3976 assert(x->format().w == 1);
3977 assert(y->format().w == 1);
3978 assert(x_off->format().w == 1);
3979 assert(y_off->format().w == 1);
3980 assert(dst->format().dt == DataType::Int32);
3981 assert(x->format().dt == DataType::Int32);
3982 assert(y->format().dt == DataType::Int32);
3983 assert(x_off->format().dt == DataType::Int32);
3984 assert(y_off->format().dt == DataType::Int32);
3985
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01003986 const std::string width = tensor->component(TensorComponentType::Dim1);
3987 const std::string height = tensor->component(TensorComponentType::Dim2);
3988 const std::string wxh = tensor->component(TensorComponentType::Dim1xDim2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003989 /*
3990 int x_s;
3991 int y_s;
3992 x_s = (xi_0 + x_k);
3993 y_s = (yi_0 + y_k);
3994 mi_0 = x_s + y_s * width + b * widthxheight;
3995 mi_0 = select(-1, mi_0, x_s >= 0);
3996 mi_0 = select(-1, mi_0, y_s >= 0);
3997 mi_0 = select(-1, mi_0, x_s < 128);
3998 mi_0 = select(-1, mi_0, y_s < 128);
3999 */
4000 compound_statement_begin();
4001 declare_tile("_x_s", TileInfo(DataType::Int32));
4002 declare_tile("_y_s", TileInfo(DataType::Int32));
4003 auto x_s = operands.unpack(Operand("_x_s"));
4004 auto y_s = operands.unpack(Operand("_y_s"));
4005 for(int i = 0; i < dst->format().h; ++i)
4006 {
4007 // x_s = (xi_0 + x_k);
4008 // y_s = (yi_0 + y_k);
4009 _data->code += x_s->scalar(0, i).str;
4010 _data->code += " = (";
4011 _data->code += x->scalar(0, i).str;
4012 _data->code += " + ";
4013 _data->code += x_off->scalar(0, i).str;
4014 _data->code += ");\n";
4015 _data->code += y_s->scalar(0, i).str;
4016 _data->code += " = (";
4017 _data->code += y->scalar(0, i).str;
4018 _data->code += " + ";
4019 _data->code += y_off->scalar(0, i).str;
4020 _data->code += ");\n";
4021 // mi_0 = x_s + y_s * width;
4022 _data->code += dst->scalar(0, i).str;
4023 _data->code += " = ";
4024 _data->code += x_s->scalar(0, i).str;
4025 _data->code += " + ";
4026 _data->code += y_s->scalar(0, i).str;
4027 _data->code += " * " + width + ";\n";
4028 // mi_0 = select(wxh, mi_0, x_s >= 0);
4029 _data->code += dst->scalar(0, i).str;
4030 _data->code += " = select(-1, ";
4031 _data->code += dst->scalar(0, i).str;
4032 _data->code += ", ";
4033 _data->code += x_s->scalar(0, i).str;
4034 _data->code += " >= 0);\n";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004035 // mi_0 = select(wxh, mi_0, x_s < width);
4036 _data->code += dst->scalar(0, i).str;
4037 _data->code += " = select(-1, ";
4038 _data->code += dst->scalar(0, i).str;
4039 _data->code += ", ";
4040 _data->code += x_s->scalar(0, i).str;
4041 _data->code += " < ";
4042 _data->code += width + ");\n";
Jakub Sujake1c96e72023-07-31 13:36:58 +01004043 // mi_0 = select(wxh, mi_0, y_s >= 0);
4044 _data->code += dst->scalar(0, i).str;
4045 _data->code += " = select(-1, ";
4046 _data->code += dst->scalar(0, i).str;
4047 _data->code += ", ";
4048 _data->code += y_s->scalar(0, i).str;
4049 _data->code += " >= 0);\n";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004050 // mi_0 = select(wxh, mi_0, y_s < height);
4051 _data->code += dst->scalar(0, i).str;
4052 _data->code += " = select(-1, ";
4053 _data->code += dst->scalar(0, i).str;
4054 _data->code += ", ";
4055 _data->code += y_s->scalar(0, i).str;
4056 _data->code += " < ";
4057 _data->code += height + ");\n";
4058 }
4059 compound_statement_end();
4060 }
4061
4062private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004063 GpuKernelWriterDataHolder *_data{ nullptr };
4064 GpuKernelWriterAttribute *_attr{ nullptr };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004065};
4066
4067/** IGpuKernelWriter factory class */
4068class GpuKernelWriterFactory final
4069{
4070public:
4071 /** Static method to call the IGpuKernelWriter class accordingly with the Gpu programming language
4072 *
4073 * @param[in] gpu GPU target
4074 *
4075 * @return IGpuKernelWriter
4076 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004077 static std::unique_ptr<IGpuKernelWriter>
4078 create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004079 {
4080 switch(x->programming_language())
4081 {
4082 case GpuTargetLanguage::OpenCL:
4083 return std::make_unique<ClKernelWriter>(attr, x);
4084 default:
4085 std::cout << "Unsupported Gpu programming language" << std::endl;
4086 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01004087 return nullptr;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004088 }
4089 }
4090};
4091
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004092inline int32_t
4093adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004094{
4095 auto tensor = tensor_info_id->shape;
4096
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004097 int32_t dim[3] = { 0 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004098
4099 switch(tensor_format)
4100 {
4101 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004102 dim[0] = tensor[0];
4103 dim[1] = tensor[1];
4104 dim[2] = tensor[2];
4105 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004106 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004107 dim[0] = tensor[0];
4108 dim[1] = tensor[1] * tensor[2];
4109 dim[2] = 1;
4110 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004111 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004112 std::cout << "Unsupported tensor format" << std::endl;
4113 assert(false);
4114 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004115 }
4116
4117 return std::min(step, dim[idx]);
4118}
4119
4120} // namespace prototype
4121} // namespace ckw
4122
Viet-Hoa Doce3c48c2023-07-03 13:44:43 +01004123#endif // CKW_PROTOTYPE_SRC_PROTOTYPE_H