blob: b9f1efa5427fa9647dca0c51eb77f11197ca8dc1 [file] [log] [blame]
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
Viet-Hoa Doce3c48c2023-07-03 13:44:43 +010025#ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H
26#define CKW_PROTOTYPE_SRC_PROTOTYPE_H
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010027
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010028#include <algorithm>
29#include <array>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010030#include <cassert> // assert (to be removed)
31#include <chrono>
32#include <cmath>
33#include <cstdint> // int32_t
Nikolaj Jensen5ff48022023-06-27 14:13:24 +010034#include <functional>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010035#include <iostream> // cout (to be removed)
36#include <map>
37#include <memory>
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010038#include <stdexcept>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010039#include <string>
40#include <unordered_map>
41#include <vector>
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010042
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010043#include "ckw/Error.h"
Nikolaj Jensenacea4072023-07-03 09:44:42 +010044#include "ckw/TensorInfo.h"
Nikolaj Jensen5ff48022023-06-27 14:13:24 +010045#include "ckw/types/ConvertPolicy.h"
46#include "ckw/types/DataType.h"
47#include "ckw/types/Functions.h"
48#include "ckw/types/GpuTargetLanguage.h"
49#include "ckw/types/Operators.h"
50#include "ckw/types/TensorSamplerTypes.h"
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010051
52namespace ckw
53{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010054namespace prototype
55{
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010056
57// Dummy data structure for Size2D
58using Size2D = std::vector<int32_t>;
59
60// Dummy Status
61using Status = void;
62
63enum class ComponentType : int32_t
64{
65 Complex = 0,
66 Simple = 1,
67 Unfusable = 2
68};
69
70enum class GpuCompilationSpeed
71{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010072 Fast = 0x00, // fast compilation may increase the latency of the network
73 Slow = 0x01 // slow compilation may decrease the latency of the network
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010074};
75
76enum class GpuExtensions
77{
78 Fp16,
79 Dot8,
80 Mmul,
81 FastMath
82};
83
84struct TensorInfo
85{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010086 TensorShape shape{ { 0 } };
87 DataType data_type{ DataType::Unknown };
88 TensorDataLayout data_layout{ TensorDataLayout::Nhwc };
89 int32_t id{ -1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010090};
91
92struct ComponentAttribute
93{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010094 GpuCompilationSpeed compilation_speed{ GpuCompilationSpeed::Fast };
95 bool overwrite_tile{ true };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010096};
97
98inline std::string data_type_to_cl_type(DataType dt)
99{
100 switch(dt)
101 {
102 case DataType::Fp32:
103 return "float";
104 case DataType::Fp16:
105 return "half";
106 case DataType::Int8:
107 return "char";
108 case DataType::Uint8:
109 return "uchar";
110 case DataType::Uint16:
111 return "ushort";
112 case DataType::Int16:
113 return "short";
114 case DataType::Uint32:
115 return "uint";
116 case DataType::Int32:
117 return "int";
118 case DataType::Bool:
119 return "bool";
120 default:
121 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100122 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100123 }
124}
125
126inline int32_t width_to_cl_vector_size(int32_t width)
127{
128 switch(width)
129 {
130 case 1:
131 return 1;
132 case 2:
133 return 2;
134 case 3:
135 return 3;
136 case 4:
137 return 4;
138 case 5:
139 case 6:
140 case 7:
141 case 8:
142 return 8;
143 case 9:
144 case 10:
145 case 11:
146 case 12:
147 case 13:
148 case 14:
149 case 15:
150 case 16:
151 return 16;
152 default:
153 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100154 return 0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100155 }
156}
157
158inline std::string get_cl_data_type(DataType dt, int32_t width)
159{
160 std::string data_type;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100161 int32_t w = width_to_cl_vector_size(width);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100162 data_type += data_type_to_cl_type(dt);
163 if(w != 1)
164 {
165 data_type += std::to_string(w);
166 }
167 return data_type;
168}
169
170inline std::string to_opencl_store(int32_t vector_length)
171{
172 if(vector_length != 1)
173 {
174 return "vstore" + std::to_string(vector_length) + "(";
175 }
176 else
177 {
178 return "*(";
179 }
180}
181
182struct TileInfo
183{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100184 TileInfo()
185 {
186 }
187
188 TileInfo(DataType dt)
189 : dt(dt), w(1), h(1)
190 {
191 }
192
193 TileInfo(DataType dt, int32_t width)
194 : dt(dt), w(width), h(1)
195 {
196 }
197
198 TileInfo(DataType dt, int32_t width, int32_t height)
199 : dt(dt), w(width), h(height)
200 {
201 }
202
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100203 DataType dt{ DataType::Unknown }; // Data type of the tile
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100204 int32_t w{ 0 }; // Width (i.e. c0 - portion of the channels)
205 int32_t h{ 0 }; // Height (i.e. s0 - portion of the spatial dimensions)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100206};
207
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100208inline std::ostream &operator<<(std::ostream &o, const TileInfo &a)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100209{
210 o << a.w << " x " << a.h;
211 return o;
212}
213
214struct DataTypeAsString
215{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100216 std::string str{ "" };
217 DataType dt{ DataType::Unknown };
218 int32_t size{ 1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100219};
220
221struct ValueAsString
222{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100223 std::string str{ "" };
224 DataTypeAsString type{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100225};
226
227// https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c
228// A Tile is a collection of variables used to express a 2D data.
229class IScalarTile
230{
231public:
232 virtual ~IScalarTile() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100233
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100234 /** Method to get the scalar variable from a tile
235 * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
236 * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
237 *
238 * @return the scalar variable as a string
239 */
240 virtual ValueAsString scalar(int32_t x, int32_t y) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100241
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100242 /** Method to get the list of underlying variable names used by the tile
243 *
244 * @return the list of variable names
245 */
246 virtual std::vector<ValueAsString> underlying_source_variables() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100247
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100248 /** Method to get the name of the tile.
249 *
250 * @return the name of the tile
251 */
252 std::string name() const
253 {
254 return _basename;
255 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100256
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100257 /** Method to get the tile format
258 *
259 * @return the format
260 */
261 TileInfo format() const
262 {
263 return _format;
264 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100265
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100266 /** Method to know whether the tile is assignable or not (constant)
267 *
268 * @return true if the tile is assignable
269 */
270 virtual bool is_assignable() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100271
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100272 /** Method to know whether the tile needs to be declared
273 *
274 * @return true if the tile needs to be declared in the code before being used
275 */
276 virtual bool need_declaration() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100277
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100278protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100279 TileInfo _format{}; // Tile format
280 std::string _basename{ "" }; // Tile name
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100281};
282
283// A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context.
284// The vector size is given by the width of the tile. The number of vectors height by depth defines the number of vectors
285class IVectorTile : public IScalarTile
286{
287public:
288 virtual ~IVectorTile() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100289
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100290 /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
291 * The user can query the list of supported width for the vectors through preferred_vector_sizes().
292 *
293 * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
294 *
295 * @return the vector variable as a string
296 */
297 virtual ValueAsString vector(int32_t y) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100298
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100299 /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
300 *
301 * @return the vector variable as a string
302 */
303 virtual ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const = 0;
304 /** Method to get the preferred vector sizes.
305 *
306 * @return a vector with the preferred vector sizes
307 */
308 //virtual std::vector<int32_t> preferred_vector_sizes() const = 0;
309};
310
311class ClTile : public IVectorTile
312{
313public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100314 ClTile(const std::string &name, TileInfo format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100315 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100316 _format = format;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100317 _basename = name;
318 }
319
320 ValueAsString scalar(int32_t x, int32_t y) const override
321 {
322 x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
323 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
324
325 ValueAsString t;
326 t.str = build_variable_name(y);
327 t.type.str = get_cl_data_type(_format.dt, 1);
328 t.type.dt = _format.dt;
329 t.type.size = 1;
330
331 // Check required because if the width has only one element, we cannot use .s0
332 if(_format.w != 1)
333 {
334 // Automatic broadcasting
335 t.str += ".s" + std::to_string(x);
336 }
337
338 return t;
339 }
340
341 ValueAsString vector(int32_t y) const override
342 {
343 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
344
345 ValueAsString t;
346 t.str = build_variable_name(y);
347 t.type.str = get_cl_data_type(_format.dt, _format.w);
348 t.type.dt = _format.dt;
349 t.type.size = _format.w;
350 return t;
351 }
352
353 ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
354 {
355 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
356
357 ValueAsString t;
358 t.str = build_variable_name(y);
359 t.type.str = get_cl_data_type(_format.dt, width);
360 t.type.dt = _format.dt;
361 t.type.size = width;
362
363 if(_format.w != 1)
364 {
365 t.str += ".s";
366 for(int i = 0; i < width; ++i)
367 {
368 t.str += to_scalar_hex(x_start + i);
369 }
370 }
371 return t;
372 }
373
374 std::vector<ValueAsString> underlying_source_variables() const override
375 {
376 std::vector<ValueAsString> vars;
377 for(int32_t y = 0; y < _format.h; ++y)
378 {
379 ValueAsString t;
380 t.str = build_variable_name(y);
381 t.type.str = get_cl_data_type(_format.dt, _format.w);
382 t.type.dt = _format.dt;
383 t.type.size = _format.w;
384 vars.push_back(t);
385 }
386 return vars;
387 }
388
389 bool is_assignable() const override
390 {
391 return true;
392 }
393
394 bool need_declaration() const override
395 {
396 return true;
397 }
398
399private:
400 std::string build_variable_name(int32_t y) const
401 {
402 std::string var_name = _basename;
403
404 if(_format.h == 1)
405 {
406 return var_name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100407 }
408 else
409 {
410 var_name += "_";
411 var_name += std::to_string(y);
412 }
413
414 return var_name;
415 }
416
417 std::string to_scalar_hex(int32_t x) const
418 {
419 switch(x)
420 {
421 case 0:
422 case 1:
423 case 2:
424 case 3:
425 case 4:
426 case 5:
427 case 6:
428 case 7:
429 case 8:
430 case 9:
431 return std::to_string(x);
432 case 10:
433 return "A";
434 case 11:
435 return "B";
436 case 12:
437 return "C";
438 case 13:
439 return "D";
440 case 14:
441 return "E";
442 case 15:
443 return "F";
444 default:
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100445 std::cout << "Unsupported hexadecimal value" << std::endl;
446 assert(false);
447 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100448 }
449 }
450};
451
452// Unique features: It contains values in the form of string. The name used for this object is misleading since the variables can change the value over time.
453class ClConstantTile : public IVectorTile
454{
455public:
456 ClConstantTile(const std::vector<std::vector<std::string>> &in, DataType dt)
457 {
458 _format.w = in[0].size();
459 _format.h = in.size();
460 _format.dt = dt;
461
462 _data = std::vector<std::vector<std::string>>(_format.h, std::vector<std::string>(_format.w));
463
464 for(int32_t y = 0; y < _format.h; ++y)
465 {
466 for(int32_t x = 0; x < _format.w; ++x)
467 {
468 _data[y][x] = in[y][x];
469 }
470 }
471 }
472
473 ValueAsString scalar(int32_t x, int32_t y) const override
474 {
475 x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
476 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
477
478 ValueAsString t;
479 t.str = _data[y][x];
480 t.type.str = get_cl_data_type(_format.dt, 1);
481 t.type.dt = _format.dt;
482 t.type.size = 1;
483
484 return t;
485 }
486
487 ValueAsString vector(int32_t y) const override
488 {
489 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
490
491 return vector(0, _format.w, y);
492 }
493
494 ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
495 {
496 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
497
498 ValueAsString t;
499 t.str = "";
500 t.type.str = get_cl_data_type(_format.dt, width);
501 t.type.dt = _format.dt;
502 t.type.size = width;
503
504 if(width > 1)
505 {
506 t.str += "((" + get_cl_data_type(_format.dt, width) + ")(";
507 }
508
509 int32_t x = x_start;
510 for(; x < width - 1; ++x)
511 {
512 t.str += scalar(x, y).str;
513 t.str += ", ";
514 }
515 t.str += scalar(x, y).str;
516
517 if(width > 1)
518 {
519 t.str += "))";
520 }
521
522 return t;
523 }
524
525 std::vector<ValueAsString> underlying_source_variables() const override
526 {
527 std::vector<ValueAsString> vars;
528
529 for(int32_t y = 0; y < _format.h; ++y)
530 {
531 for(int32_t x = 0; x < _format.w; ++x)
532 {
533 ValueAsString t;
534 t.str = _data[y][x];
535 t.type.str = get_cl_data_type(_format.dt, 1);
536 t.type.dt = _format.dt;
537 t.type.size = 1;
538 vars.push_back(t);
539 }
540 }
541
542 return vars;
543 }
544
545 bool is_assignable() const override
546 {
547 return false;
548 }
549
550 bool need_declaration() const override
551 {
552 return false;
553 }
554
555private:
556 std::vector<std::vector<std::string>> _data{};
557};
558
559enum class TensorComponentIndex : int32_t
560{
561 IndexMask = 0x0000000f,
562};
563
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100564enum class TensorComponentGroup : int32_t
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100565{
566 OffsetFirstElement = 0x00000100,
567 Stride = 0x00001000,
568 Dimension = 0x00010000,
569 FoldedDimension = 0x00100000,
570 Constant = 0x01000000
571};
572
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100573inline std::string to_string(TensorComponentType x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100574{
575 switch(x)
576 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100577 case TensorComponentType::Unknown:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100578 return "Unknown";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100579 case TensorComponentType::OffsetFirstElement:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100580 return "OffsetFirstElement";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100581 case TensorComponentType::Stride1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100582 return "Stride1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100583 case TensorComponentType::Stride2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100584 return "Stride2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100585 case TensorComponentType::Stride3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100586 return "Stride3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100587 case TensorComponentType::Stride4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100588 return "Stride4";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100589 case TensorComponentType::Dim0:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100590 return "Dim0";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100591 case TensorComponentType::Dim1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100592 return "Dim1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100593 case TensorComponentType::Dim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100594 return "Dim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100595 case TensorComponentType::Dim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100596 return "Dim3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100597 case TensorComponentType::Dim4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100598 return "Dim4";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100599 case TensorComponentType::Dim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100600 return "Dim1xDim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100601 case TensorComponentType::Dim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100602 return "Dim1xDim2xDim3";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100603 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100604 assert(false);
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100605 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100606 }
607}
608
609class ITensorArgument
610{
611public:
612 virtual ~ITensorArgument() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100613
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100614 /** Method to get the tensor component as a string
615 *
616 * @param[in] x tensor component to query
617 *
618 * @return the tensor component as a string
619 */
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100620 virtual std::string component(TensorComponentType x) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100621
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100622 /** Method to get the tensor component type declaration as a string
623 *
624 * @return the tensor component type declaration as a string
625 */
626 virtual std::string component_type_declaration() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100627
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100628 /** Method to get the tensor component data type
629 *
630 * @return the tensor component data type
631 */
632 virtual DataType component_data_type() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100633
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100634 /** Method to get the tensor component declarations
635 *
636 * @return a vector containing the tensor component declarations
637 */
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100638 virtual std::vector<TensorComponentType> component_declarations() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100639
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100640 /** Method to get the name of the tensor argument.
641 *
642 * @return the name of the tensor argument
643 */
644 std::string name() const
645 {
646 return _basename;
647 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100648
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100649 /** Method to get the tensor format
650 *
651 * @return the format
652 */
653 TensorInfo format() const
654 {
655 return _format;
656 }
657
658protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100659 TensorInfo _format{};
660 std::string _basename{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100661};
662
663enum class GpuTensorStorage : int32_t
664{
665 Unknown = 0x0000,
666 BufferUint8Ptr = 0x0012,
667 Image2dReadOnly = 0x0020,
668 Image2dWriteOnly = 0x0021,
669 Image3dReadOnly = 0x0030,
670 Image3dWriteOnly = 0x0031
671};
672
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100673inline GpuTensorStorage to_gpu_tensor_storage(TensorStorageType s)
674{
675 switch(s)
676 {
677 case TensorStorageType::Unknown:
678 return GpuTensorStorage::Unknown;
679
680 case TensorStorageType::BufferUint8Ptr:
681 return GpuTensorStorage::BufferUint8Ptr;
682
683 case TensorStorageType::Texture2dReadOnly:
684 return GpuTensorStorage::Image2dReadOnly;
685
686 case TensorStorageType::Texture2dWriteOnly:
687 return GpuTensorStorage::Image2dWriteOnly;
688
689 default:
690 assert(false);
691 return GpuTensorStorage::Unknown;
692 }
693}
694
695inline TensorStorageType to_tensor_storage(GpuTensorStorage s)
696{
697 switch(s)
698 {
699 case GpuTensorStorage::Unknown:
700 return TensorStorageType::Unknown;
701
702 case GpuTensorStorage::BufferUint8Ptr:
703 return TensorStorageType::BufferUint8Ptr;
704
705 case GpuTensorStorage::Image2dReadOnly:
706 return TensorStorageType::Texture2dReadOnly;
707
708 case GpuTensorStorage::Image2dWriteOnly:
709 return TensorStorageType::Texture2dWriteOnly;
710
711 default:
712 assert(false);
713 return TensorStorageType::Unknown;
714 }
715}
716
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100717class IGpuTensorArgument : public ITensorArgument
718{
719public:
720 virtual ~IGpuTensorArgument() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100721
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100722 /** Method to get the tensor storage, which is the underlying storage used to keep the data memory
723 *
724 * @param[in] x tensor storage to query
725 *
726 * @return the tensor storage as a string
727 */
728 virtual std::string storage(GpuTensorStorage x) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100729
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100730 /** Method to get the tensor storage type declaration as a string
731 *
732 * @param[in] x tensor component to query
733 *
734 * @return the tensor storage type declaration as a string
735 */
736 virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100737
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100738 /** Method to get the tensor storage declarations
739 *
740 * @return a vector containing the tensor storage declarations
741 */
742 virtual std::vector<GpuTensorStorage> storage_declarations() const = 0;
743};
744
745class ClTensorArgument : public IGpuTensorArgument
746{
747public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100748 ClTensorArgument(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100749 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100750 _basename = name;
751 _format = x;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100752 _return_by_value_when_possible = return_by_value_when_possible;
753 }
754
755 // Methods to override
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100756 std::string component(TensorComponentType x) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100757 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100758 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Constant)))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100759 {
760 int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
761 return std::to_string(idx - 1);
762 }
763
764 if(_return_by_value_when_possible)
765 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100766 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Dimension)))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100767 {
768 int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
769 return std::to_string(_format.shape[idx]);
770 }
771
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100772 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::FoldedDimension)))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100773 {
774 switch(x)
775 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100776 case TensorComponentType::Dim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100777 return std::to_string(_format.shape[1] * _format.shape[2]);
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100778 case TensorComponentType::Dim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100779 return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100780 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100781 std::cout << "Unsupported folded dimension" << std::endl;
782 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100783 }
784 }
785 }
786
787 if(std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end())
788 {
789 _components_required.push_back(x);
790 }
791
792 return build_component_name(x);
793 }
794
795 std::string component_type_declaration() const override
796 {
797 return "int";
798 };
799
800 DataType component_data_type() const override
801 {
802 return DataType::Int32;
803 }
804
805 std::string storage(GpuTensorStorage x) override
806 {
807 if(std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end())
808 {
809 _storage_required.push_back(x);
810 }
811
812 return build_storage_name(x);
813 }
814
815 std::string storage_type_declaration(GpuTensorStorage x) const override
816 {
817 switch(x)
818 {
819 case GpuTensorStorage::BufferUint8Ptr:
820 return "__global uchar*";
821 case GpuTensorStorage::Image2dReadOnly:
822 return "__read_only image2d_t";
823 case GpuTensorStorage::Image2dWriteOnly:
824 return "__write_only image2d_t";
825 case GpuTensorStorage::Image3dReadOnly:
826 return "__read_only image3d_t ";
827 case GpuTensorStorage::Image3dWriteOnly:
828 return "__write_only image3d_t ";
829 default:
830 std::cout << "Unsupported storage" << std::endl;
831 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100832 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100833 }
834 };
835
836 std::vector<GpuTensorStorage> storage_declarations() const override
837 {
838 return _storage_required;
839 }
840
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100841 std::vector<TensorComponentType> component_declarations() const override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100842 {
843 return _components_required;
844 }
845
846private:
847 std::string build_storage_name(GpuTensorStorage x) const
848 {
849 std::string var_name = _basename;
850
851 switch(x)
852 {
853 case GpuTensorStorage::BufferUint8Ptr:
854 return var_name + "_ptr";
855 case GpuTensorStorage::Image2dReadOnly:
856 case GpuTensorStorage::Image2dWriteOnly:
857 return var_name + "_img2d";
858 case GpuTensorStorage::Image3dReadOnly:
859 case GpuTensorStorage::Image3dWriteOnly:
860 return var_name + "_img3d";
861 default:
862 std::cout << "Unsupported storage" << std::endl;
863 assert(false);
864 }
865
866 return var_name;
867 }
868
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100869 std::string build_component_name(TensorComponentType x) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100870 {
871 std::string var_name = _basename;
872
873 switch(x)
874 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100875 case TensorComponentType::OffsetFirstElement:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100876 return var_name + "_offset_first_element";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100877 case TensorComponentType::Stride1:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100878 return var_name + "_stride1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100879 case TensorComponentType::Stride2:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100880 return var_name + "_stride2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100881 case TensorComponentType::Stride3:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100882 return var_name + "_stride3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100883 case TensorComponentType::Dim0:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100884 return var_name + "_dim0";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100885 case TensorComponentType::Dim1:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100886 return var_name + "_dim1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100887 case TensorComponentType::Dim2:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100888 return var_name + "_dim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100889 case TensorComponentType::Dim3:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100890 return var_name + "_dim3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100891 case TensorComponentType::Dim1xDim2:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100892 return var_name + "_dim1xdim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100893 case TensorComponentType::Dim1xDim2xDim3:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100894 return var_name + "_dim1xdim2xdim3";
895 default:
896 std::cout << "Unsupported component" << std::endl;
897 assert(false);
898 }
899
900 return var_name;
901 }
902
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100903 bool _return_by_value_when_possible{ false };
904 std::vector<GpuTensorStorage> _storage_required{};
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100905 std::vector<TensorComponentType> _components_required{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100906};
907
908/**
909 * @brief Data structure that contains the declared tiles by the components.
910 * The registry is a linear data structure that follows the similar principle of the stack. The user can use the @p increment_registry_level() method to
911 * increase the level of the stack (0 when it starts). When the user uses the @p decrement_registry_level() method, the registry decreases the level of the stack
912 * and remove (pop) all the tiles from the level above.
913 * When a tile is declared on the level 0, it is a global tile. A global tile is visible in all parts of the code.
914 * Since different components may use the same name to define a tile, the registry adopts the IdSpace concept, an @p id to prevent name collisions
915 * when declaring tiles among different components.
916 *
917 */
918class GpuTileRegistry
919{
920public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100921 enum class RegistryTileType
922 {
923 Tile,
924 Link
925 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100926
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100927 using RegistryIdSpace = int32_t;
928 using RegistryLevel = int32_t;
929 using RegistryTileName = std::string;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100930
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100931 struct RegistryTileTableEntry
932 {
933 RegistryLevel registry_level{ 0 };
934 std::unique_ptr<IVectorTile> tile_object{ nullptr };
935 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100936
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100937 struct RegistryTileTypeTableEntry
938 {
939 RegistryTileType tile_type{ RegistryTileType::Tile };
940 RegistryTileName tile_name{};
941 RegistryIdSpace registry_idspace{ 0 };
942 RegistryLevel registry_level{ 0 };
943 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100944
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100945 using RegistryTileTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTableEntry>>;
946 using RegistryTileTypeTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTypeTableEntry>>;
947
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100948 /**
949 * @brief Construct a new Gpu Tile Registry object
950 *
951 */
952 GpuTileRegistry()
953 {
954 _language = GpuTargetLanguage::Unknown;
955 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100956
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100957 /**
958 * @brief Construct a new Gpu Tile Registry object providing the Gpu programming language
959 *
960 * @param[in] language Gpu programming language to use
961 */
962 GpuTileRegistry(GpuTargetLanguage language)
963 {
964 _language = language;
965 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100966
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100967 /**
968 * @brief Default destructor. Destroy the Gpu Tile Registry object
969 *
970 */
971 ~GpuTileRegistry() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100972
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100973 /**
974 * @brief Set the working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles.
975 * Therefore, the IdSpace should be set before declaring any tiles.
976 *
977 * @param[in] id The IdSpace id
978 */
979 void set_IdSpace(int32_t id)
980 {
981 _IdSpace = id;
982 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100983
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100984 /**
985 * @brief Get the current working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles
986 *
987 * @return The IdSpace id
988 */
989 int32_t IdSpace() const
990 {
991 return _IdSpace;
992 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100993
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100994 /**
995 * @brief Gets all the IdSpace declarations defined in the tile registry.
996 *
997 * @return all the IdSpace declarations defined in the tile registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
998 */
999 std::vector<int32_t> IdSpace_declarations() const
1000 {
1001 std::vector<int32_t> x;
1002
1003 auto it = _frags.begin();
1004
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001005 while(it != _frags.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001006 {
1007 x.push_back(it->first);
1008
1009 it++;
1010 }
1011
1012 return x;
1013 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001014
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001015 /**
1016 * @brief Declare a tile from a previously created tile
1017 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001018 void insert(const std::string &name, const IVectorTile *frag)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001019 {
1020 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001021 const int32_t key_IdSpace = _IdSpace;
1022 const std::string key_var_name = name;
1023 const std::string var_name = frag->name();
1024 TileInfo format = frag->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001025
1026 // First check whether a tile with the same name exists
1027 IVectorTile *result = (*this)[key_var_name];
1028 assert(result == nullptr);
1029 if(result == nullptr)
1030 {
1031 std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
1032
1033 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1034 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1035
1036 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Link;
1037 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1038 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1039 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1040 }
1041 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001042
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001043 /**
1044 * @brief Declare a tile with TileInfo. The tile will be stored in the IdSpace set with @p set_IdSpace()
1045 *
1046 * @note The reference name used for declaring the tile should not be previously used in the IdSpace
1047 *
1048 * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
1049 * @param[in] format Tile format use to use
1050 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001051 void insert(const std::string &name, const TileInfo &format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001052 {
1053 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001054 const int32_t key_IdSpace = _IdSpace;
1055 const std::string key_var_name = name;
1056 const std::string var_name = generate_tile_name(name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001057
1058 // First check whether a tile with the same name exists
1059 IVectorTile *result = (*this)[key_var_name];
1060 assert(result == nullptr);
1061 if(result == nullptr)
1062 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001063 std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001064 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1065 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1066
1067 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1068 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1069 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1070 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1071 }
1072 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001073
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001074 /**
1075 * @brief Declare a constant tile. The content of the tile is passed as a vector of std::string
1076 *
1077 * @note The reference name used for declaring the tile should not be previously used in the IdSpace
1078 *
1079 * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
1080 * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
1081 * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user's responsibilty to ensure
1082 * that the data type is aligned with the content of the std::string.
1083 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001084 void insert(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001085 {
1086 assert(_language == GpuTargetLanguage::OpenCL);
1087 const int32_t key_IdSpace = _IdSpace;
1088 const std::string key_var_name = name;
1089
1090 // First check whether a tile with the same name exists
1091 IVectorTile *result = (*this)[key_var_name];
1092 assert(result == nullptr);
1093 if(result == nullptr)
1094 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001095 std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001096 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1097 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1098
1099 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1100 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1101 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1102 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1103 }
1104 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001105
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001106 /**
1107 * @brief Declare an anonymous constant tile. The content of the tile is passed as a vector of std::string
1108 *
1109 * @note This method can be used to declare temporary tiles that need to be accessed only once.
1110 *
1111 * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
1112 * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user responsibilty to ensure
1113 * that the data type is aligned with what passed with the std::string.
1114 *
1115 * @return IVectorTile* the anonymous constant tile
1116 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001117 IVectorTile *insert(const std::vector<std::vector<std::string>> &in, DataType dt)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001118 {
1119 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001120 const int32_t key_IdSpace = _IdSpace;
1121 const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001122
1123 // First check whether a tile with the same name exists
1124 IVectorTile *result = (*this)[key_var_name];
1125 assert(result == nullptr);
1126 if(result == nullptr)
1127 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001128 std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001129 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1130 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1131
1132 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1133 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1134 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1135 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1136 }
1137
1138 return (*this)[key_var_name];
1139 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001140
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001141 /**
1142 * @brief Get the tile from the registry. This method searches the tile in the IdSpace provided by the user
1143 *
1144 * @param[in] name The name of the tile to retrieve
1145 * @param[in] IdSpace The IdSpace id where to search the tile
1146 *
1147 * @return IVectorTile* The tile
1148 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001149 IVectorTile *get(const std::string &name, int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001150 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001151 const int32_t key_IdSpace = IdSpace;
1152 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001153
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001154 IVectorTile *result = nullptr;
1155 auto search_IdSpace = _frags.find(key_IdSpace);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001156 if(search_IdSpace != _frags.end())
1157 {
1158 auto search_tile = _frags[key_IdSpace].find(key_var_name);
1159 if(search_tile != _frags[key_IdSpace].end())
1160 {
1161 result = search_tile->second.tile_object.get();
1162 assert(result != nullptr);
1163 }
1164 }
1165
1166 return result;
1167 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001168
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001169 /**
1170 * @brief Get the tile from the registry. This method searches the tile in the IdSpace set with @p set_IdSpace()
1171 *
1172 * @param[in] name The name of the tile to retrieve
1173 *
1174 * @return IVectorTile* The tile
1175 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001176 IVectorTile *operator[](const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001177 {
1178 return get(name, _IdSpace);
1179 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001180
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001181 /**
1182 * @brief Check whether the tile in the in the IdSpace provided by the user exists
1183 *
1184 * @param[in] name Name of the tile to search for
1185 * @param[in] IdSpace The IdSpace id where to search the tile
1186 *
1187 * @return true if the tile exists
1188 * @return false if the tile does not exist
1189 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001190 bool has_tile(const std::string &name, int32_t IdSpace) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001191 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001192 const int32_t key_IdSpace = IdSpace;
1193 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001194
1195 // IVectorTile* result = nullptr;
1196 auto search_IdSpace = _frags.find(key_IdSpace);
1197
1198 return search_IdSpace != _frags.end();
1199 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001200
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001201 /**
1202 * @brief Check whether the tile within the current IdSpace exists
1203 *
1204 * @param[in] name Name of the tile to search for
1205 *
1206 * @return true if the tile exists
1207 * @return false if the tile does not exist
1208 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001209 bool has_tile(const std::string &name) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001210 {
1211 return has_tile(name, _IdSpace);
1212 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001213
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001214 /**
1215 * @brief Get all the tiles declared within the IdSpace provided by the user
1216 *
1217 * @param[in] IdSpace IdSpace where to retrieve all the declared tiles
1218 *
1219 * @return std::vector<IVectorTile*> A vector with all the declared tiles in the IdSpace provided by the user
1220 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001221 std::vector<IVectorTile *> tile_declarations(int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001222 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001223 std::vector<IVectorTile *> tiles;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001224
1225 std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it = _frag_types[IdSpace].begin();
1226
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001227 while(it != _frag_types[IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001228 {
1229 // The following line should be enabled. However, we cannot at this stage
1230 // because it used to retrieve the output tile produced by each component.
1231 // However, this method should NOT be used to retrieve the output tile
1232 //if(it->second.tile_type == RegistryTileType::Tile)
1233 {
1234 tiles.push_back(get(it->second.tile_name, it->second.registry_idspace));
1235 }
1236 it++;
1237 }
1238
1239 return tiles;
1240 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001241
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001242 /**
1243 * @brief Increase the level of stack.
1244 *
1245 */
1246 void increment_registry_level()
1247 {
1248 _registry_level++;
1249 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001250
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001251 /**
1252 * @brief Remove all the tiles declared at the current stack level and decrease the level of the stack.
1253 *
1254 */
1255 void decrement_registry_level()
1256 {
1257 assert(_registry_level >= 0);
1258
1259 // Remove all variables in the local scope
1260 std::map<RegistryTileName, RegistryTileTableEntry>::iterator it = _frags[_IdSpace].begin();
1261
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001262 while(it != _frags[_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001263 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001264 if(it->second.registry_level == _registry_level)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001265 {
1266 it = _frags[_IdSpace].erase(it);
1267 }
1268 else
1269 {
1270 it++;
1271 }
1272 }
1273
1274 std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it_type = _frag_types[_IdSpace].begin();
1275
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001276 while(it_type != _frag_types[_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001277 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001278 if(it_type->second.registry_level == _registry_level)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001279 {
1280 it_type = _frag_types[_IdSpace].erase(it_type);
1281 }
1282 else
1283 {
1284 it_type++;
1285 }
1286 }
1287
1288 _registry_level--;
1289 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001290
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001291 /**
1292 * @brief Get the level of the stack
1293 *
1294 */
1295 int32_t level() const
1296 {
1297 return _registry_level;
1298 }
1299
1300private:
1301 // This method ensures that the key is unique among different components
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001302 std::string generate_tile_name(const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001303 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001304 assert(_IdSpace >= 0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001305 if(_registry_level == 0)
1306 {
1307 return "_G" + std::to_string(_IdSpace) + "_" + name;
1308 }
1309 else
1310 {
1311 return name;
1312 }
1313 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001314
1315 RegistryTileTable _frags{};
1316 RegistryTileTypeTable _frag_types{};
1317 RegistryLevel _registry_level{ 0 };
1318 RegistryIdSpace _IdSpace{ -1 };
1319 int32_t _anonymous_frag_count{ 0 }; // Counter used to create the anonymous tiles
1320 GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001321};
1322
1323using TensorEntry = std::unique_ptr<IGpuTensorArgument>;
1324
1325/**
1326 * @brief Data structure that contains the tensors consumed by the components.
1327 * Since different components may use the same name as reference for a tensor, the registry adopts the IdSpace concept, an @p id to prevent name collisions
1328 * when declaring tensors among different components.
1329 *
1330 */
1331class GpuTensorArgumentRegistry
1332{
1333public:
1334 /**
1335 * @brief Construct a new Gpu Tensor Registry object
1336 *
1337 */
1338 GpuTensorArgumentRegistry()
1339 {
1340 _language = GpuTargetLanguage::Unknown;
1341 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001342
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001343 /**
1344 * @brief Construct a new Gpu Tensor Registry object
1345 *
1346 * @param[in] language Gpu programming language to use
1347 */
1348 GpuTensorArgumentRegistry(GpuTargetLanguage language)
1349 {
1350 _language = language;
1351 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001352
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001353 /**
1354 * @brief Default destructor. Destroy the Gpu Tensor Registry object
1355 *
1356 */
1357 ~GpuTensorArgumentRegistry() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001358
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001359 /**
1360 * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors.
1361 * Therefore, the IdSpace should be set before declaring any tensors.
1362 *
1363 * @param[in] id The IdSpace id
1364 */
1365 void set_IdSpace(int32_t id)
1366 {
1367 _IdSpace = id;
1368 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001369
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001370 /**
1371 * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors
1372 *
1373 * @return The IdSpace id
1374 */
1375 int32_t IdSpace() const
1376 {
1377 return _IdSpace;
1378 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001379
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001380 /**
1381 * @brief Gets all the IdSpace declarations defined in the tensor registry.
1382 *
1383 * @return all the IdSpace declarations defined in the tensor registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
1384 */
1385 std::vector<int32_t> IdSpace_declarations() const
1386 {
1387 std::vector<int32_t> x;
1388
1389 auto it = _refs.begin();
1390
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001391 while(it != _refs.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001392 {
1393 x.push_back(it->first);
1394
1395 it++;
1396 }
1397
1398 return x;
1399 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001400
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001401 /**
1402 * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace()
1403 *
1404 * @note The reference name used for declaring the tensor should not be previously used in the IdSpace
1405 *
1406 * @param[in] name Reference name for the tensor. The reference name can be used to retrieve the tensor stored in the registry.
1407 * @param[in] x Pair of tensor info and tensor id
1408 * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components
1409 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001410 void insert(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001411 {
1412 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001413 const int32_t key_IdSpace = _IdSpace;
1414 const int32_t tensor_id = x.id;
1415 const std::string key_var_name = name;
1416 const std::string var_name = generate_tensor_name(name, tensor_id);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001417
1418 // First, check whether the tensor has already a reference. If so, trigger an assert
1419 assert(!has_tensor_argument(name));
1420
1421 // Check whether a tensor with that tensorID exists
1422 auto result = _tensor_arguments.find(tensor_id);
1423 if(result == _tensor_arguments.end())
1424 {
1425 // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001426 std::unique_ptr<ClTensorArgument> arg = std::make_unique<ClTensorArgument>(var_name, x,
1427 return_by_value_when_possible);
1428 _tensor_arguments[tensor_id] = std::move(arg);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001429 }
1430
1431 _refs[key_IdSpace][key_var_name] = tensor_id;
1432 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001433
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001434 /**
1435 * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace()
1436 *
1437 * @param[in] name The name of the tensor to retrieve
1438 *
1439 * @return IGpuTensor* The tensor
1440 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001441 IGpuTensorArgument *operator[](const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001442 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001443 const int32_t key_IdSpace = _IdSpace;
1444 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001445
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001446 IGpuTensorArgument *result = nullptr;
1447 auto search_IdSpace = _refs.find(key_IdSpace);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001448 if(search_IdSpace != _refs.end())
1449 {
1450 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1451
1452 if(search_tensor_id != _refs[key_IdSpace].end())
1453 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001454 const int32_t tensor_id = search_tensor_id->second;
1455 auto search_tensor_argument = _tensor_arguments.find(tensor_id);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001456 if(search_tensor_argument != _tensor_arguments.end())
1457 {
1458 result = search_tensor_argument->second.get();
1459 }
1460 assert(result != nullptr);
1461 }
1462 }
1463
1464 return result;
1465 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001466
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001467 /**
1468 * @brief Get all the tensors declared in the IdSpace provided by the user
1469 *
1470 * @return std::vector<IGpuTensorArgument*> A vector with all the declared tensors
1471 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001472 std::vector<IGpuTensorArgument *> tensor_argument_declarations()
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001473 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001474 std::vector<IGpuTensorArgument *> args;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001475
1476 auto it = _tensor_arguments.begin();
1477
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001478 while(it != _tensor_arguments.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001479 {
1480 args.push_back(it->second.get());
1481 it++;
1482 }
1483
1484 return args;
1485 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001486
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001487 /**
1488 * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists
1489 *
1490 * @param[in] name Name of the tensor argument to search for
1491 *
1492 * @return true if the tensor argument exists
1493 * @return false if the tensor argument does not exist
1494 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001495 bool has_tensor_argument(const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001496 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001497 const int32_t key_IdSpace = _IdSpace;
1498 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001499
1500 auto search_IdSpace = _refs.find(key_IdSpace);
1501
1502 if(search_IdSpace != _refs.end())
1503 {
1504 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1505
1506 return search_tensor_id != _refs[key_IdSpace].end();
1507 }
1508 else
1509 {
1510 return false;
1511 }
1512 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001513
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001514 /**
1515 * @brief Check whether the tensor argument is in the the IdSpace provided by the user
1516 *
1517 * @param[in] name Name of the tensor argument to search for
1518 * @param[in] IdSpace The IdSpace id where to search the tensor argument
1519 *
1520 * @return true if the tile exists
1521 * @return false if the tile does not exist
1522 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001523 bool has_tensor_argument(const std::string &name, int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001524 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001525 const int32_t key_IdSpace = IdSpace;
1526 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001527
1528 auto search_IdSpace = _refs.find(key_IdSpace);
1529
1530 if(search_IdSpace != _refs.end())
1531 {
1532 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1533
1534 return search_tensor_id != _refs[key_IdSpace].end();
1535 }
1536 else
1537 {
1538 return false;
1539 }
1540 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001541
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001542private:
1543 // This method ensures that the key is unique among different components
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001544 std::string generate_tensor_name(const std::string &name, int32_t tensor_id)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001545 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001546 assert(tensor_id >= 0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001547
1548 return name + std::to_string(tensor_id);
1549 }
1550
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001551 std::map<int32_t, TensorEntry> _tensor_arguments{};
1552 std::map<int32_t, std::map<std::string, int32_t>> _refs{};
1553 int32_t _IdSpace{ -1 };
1554 GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001555};
1556
1557enum class OpType : int32_t
1558{
1559 Elementwise = 0x0000,
1560 Relational = 0x1000,
1561 Algebra = 0x2000
1562};
1563
1564inline std::string to_string(AssignmentOp op)
1565{
1566 switch(op)
1567 {
1568 case AssignmentOp::Decrement:
1569 return "-=";
1570 case AssignmentOp::Increment:
1571 return "+=";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001572 default:
1573 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001574 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001575 }
1576}
1577
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01001578inline std::string to_string(UnaryOp op)
1579{
1580 switch(op)
1581 {
1582 case UnaryOp::LogicalNot:
1583 return "!";
1584 default:
1585 assert(false);
1586 return "";
1587 }
1588}
1589
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001590inline std::string to_string(BinaryOp op)
1591{
1592 switch(op)
1593 {
1594 case BinaryOp::Add:
1595 return "+";
1596 case BinaryOp::Sub:
1597 return "-";
1598 case BinaryOp::Mul:
1599 return "*";
1600 case BinaryOp::Div:
1601 return "/";
1602 case BinaryOp::Mod:
1603 return "%";
1604 case BinaryOp::Equal:
1605 return "==";
1606 case BinaryOp::Less:
1607 return "<";
1608 case BinaryOp::LessEqual:
1609 return "<=";
1610 case BinaryOp::Greater:
1611 return ">";
1612 case BinaryOp::GreaterEqual:
1613 return ">=";
1614 case BinaryOp::LogicalAnd:
1615 return "&&";
1616 case BinaryOp::LogicalOr:
1617 return "||";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001618 default:
1619 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001620 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001621 }
1622}
1623
1624inline std::string binary_op_string(BinaryOp op)
1625{
1626 switch(op)
1627 {
1628 case BinaryOp::Add:
1629 return "add";
1630 case BinaryOp::Sub:
1631 return "sub";
1632 case BinaryOp::Mul:
1633 return "mul";
1634 case BinaryOp::Div:
1635 return "div";
1636 case BinaryOp::Mod:
1637 return "mod";
1638 case BinaryOp::Equal:
1639 return "eq";
1640 case BinaryOp::Less:
1641 return "gt";
1642 case BinaryOp::LessEqual:
1643 return "gteq";
1644 case BinaryOp::Greater:
1645 return "lt";
1646 case BinaryOp::GreaterEqual:
1647 return "lte";
1648 default:
1649 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001650 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001651 }
1652}
1653
1654enum class OperandType : int32_t
1655{
1656 Unknown = 0x00000000,
1657 ScalarFp32 = 0x00001011, // Immediate scalar tile
1658 ScalarFp16 = 0x00001012, // Immediate scalar tile
1659 ScalarInt32 = 0x00001021, // Immediate scalar tile
1660 ScalarInt16 = 0x00001022, // Immediate scalar tile
1661 ScalarInt8 = 0x00001024, // Immediate scalar tile
1662 ScalarUInt32 = 0x00001031, // Immediate scalar tile
1663 ScalarUInt16 = 0x00001032, // Immediate scalar tile
1664 ScalarUInt8 = 0x00001034, // Immediate scalar tile
1665 ScalarBool = 0x00001041, // Immediate scalar tile
1666 ScalarTile = 0x00001050, // Scalar from a tile
1667 Tile = 0x00010000, // Tile
1668 TensorStride1 = 0x00100001, // Tensor component
1669 TensorStride2 = 0x00100002, // Tensor component
1670 TensorStride3 = 0x00100003, // Tensor component
1671 TensorStride4 = 0x00100004, // Tensor component
1672 TensorDim0 = 0x00100010, // Tensor component
1673 TensorDim1 = 0x00100020, // Tensor component
1674 TensorDim2 = 0x00100030, // Tensor component
1675 TensorDim3 = 0x00100040, // Tensor component
1676 TensorDim4 = 0x00100050, // Tensor component
1677 TensorC = 0x00100010, // Tensor component
1678 TensorW = 0x00100020, // Tensor component
1679 TensorH = 0x00100030, // Tensor component
1680 TensorD = 0x00100040, // Tensor component
1681 TensorN = 0x00100050, // Tensor component
1682 TensorDim1xDim2 = 0x00100100, // Tensor component
1683 TensorDim1xDim2xDim3 = 0x00100200, // Tensor component
1684 TensorWxH = 0x00100300, // Tensor component
1685 TensorWxHxD = 0x00100400, // Tensor component
1686 TensorDataOffset = 0x00100500, // Tensor component
1687};
1688
1689struct ScalarTileCoord
1690{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001691 ScalarTileCoord()
1692 {
1693 }
1694
1695 ScalarTileCoord(int32_t x0, int32_t y0)
1696 : x(x0), y(y0)
1697 {
1698 }
1699
1700 int32_t x{ -1 };
1701 int32_t y{ -1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001702};
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001703
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001704/**
1705 * @brief Operand class. This object is used to pass the operands to the operations performed by the writer.
1706 * Operand can be of three types:
1707 * -# Scalar immediate: constant expression
1708 * -# Tile: A tile
1709 * -# Tensor component: A component (scalar) of a tensor
1710 *
1711 */
1712class Operand
1713{
1714public:
1715 Operand(const std::string &val)
1716 {
1717 _str = val;
1718 _type = OperandType::Tile;
1719 }
1720
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001721 Operand(const std::string &val, const ScalarTileCoord &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001722 {
1723 _str = val;
1724 _type = OperandType::ScalarTile;
1725 _coord = coord;
1726 }
1727
1728 Operand(const std::string &val, OperandType type)
1729 {
1730 _str = val;
1731 _type = type;
1732 }
1733
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001734 Operand(const Operand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001735 {
1736 _str = t.value();
1737 _type = t.type();
1738 }
1739
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001740 Operand &operator=(const Operand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001741 {
1742 _str = t.value();
1743 _type = t.type();
1744 _coord = t.scalar_tile_coordinate();
1745 return *this;
1746 }
1747
1748 std::string value() const
1749 {
1750 return _str;
1751 }
1752
1753 OperandType type() const
1754 {
1755 return _type;
1756 }
1757
1758 ScalarTileCoord scalar_tile_coordinate() const
1759 {
1760 return _coord;
1761 }
1762
1763private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001764 std::string _str{};
1765 OperandType _type{ OperandType::Unknown };
1766 ScalarTileCoord _coord{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001767};
1768
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01001769using GpuSamplerTensorStorage = GpuTensorStorage;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001770
1771struct GpuSampler
1772{
1773 GpuSampler() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001774
1775 TensorSamplerFormat format{ TensorSamplerFormat::Unknown };
1776 GpuSamplerTensorStorage storage{ GpuSamplerTensorStorage::Unknown };
1777 TensorSamplerAddressModeX address_mode_x{ TensorSamplerAddressModeX::Unknown };
1778 TensorSamplerAddressModeY address_mode_y{ TensorSamplerAddressModeY::Unknown };
1779 TensorSamplerAddressModeZ address_mode_z{ TensorSamplerAddressModeZ::Unknown };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001780};
1781
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001782inline GpuSampler
1783create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y,
1784 int32_t step_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001785{
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001786 CKW_UNUSED(step_x, step_y, step_z);
1787
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001788 auto tensor = tensor_info_id->shape;
1789
1790 GpuSampler dst_sampler;
1791 dst_sampler.format = sampler.format;
1792 dst_sampler.storage = GpuSamplerTensorStorage::BufferUint8Ptr;
1793 dst_sampler.address_mode_x = sampler.address_mode_x;
1794 dst_sampler.address_mode_y = sampler.address_mode_y;
1795 dst_sampler.address_mode_z = sampler.address_mode_z;
1796
1797 int32_t dim_x = 0;
1798 int32_t dim_y = 0;
1799 int32_t dim_z = 0;
1800
1801 switch(sampler.format)
1802 {
1803 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001804 dim_x = tensor[0];
1805 dim_y = tensor[1];
1806 dim_z = tensor[2];
1807 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001808 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001809 dim_x = tensor[0];
1810 dim_y = tensor[1] * tensor[2];
1811 dim_z = 1;
1812 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001813 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001814 std::cout << "Unsupported tensor format" << std::endl;
1815 assert(false);
1816 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001817 }
1818
1819 if(dim_x == 1)
1820 {
1821 assert(step_x == 1);
1822 dst_sampler.address_mode_x = TensorSamplerAddressModeX::None;
1823 }
1824
1825 if(dim_y == 1)
1826 {
1827 assert(step_y == 1);
1828 dst_sampler.address_mode_y = TensorSamplerAddressModeY::None;
1829 }
1830
1831 if(dim_z == 1)
1832 {
1833 assert(step_z == 1);
1834 dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None;
1835 }
1836
1837 return dst_sampler;
1838}
1839
1840class GpuOutputSampler
1841{
1842public:
1843 GpuOutputSampler() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001844
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001845 /**
1846 * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once
1847 * by the root component. Once initialized, all simpler components will need to used this sampler
1848 * or a broadcasted version of it
1849 *
1850 * @param[in] sampler GpuSampler
1851 * @param[in] step_x Increment step in the X direction. Not necessarily it is the same of n0 of tile!
1852 * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile!
1853 * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile!
1854 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001855 void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage,
1856 TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001857 {
1858 assert(_is_initialized == false);
1859
1860 _step_x = step_x;
1861 _step_y = step_y;
1862 _step_z = step_z;
1863 _tensor_info_id = tensor_info_id;
1864 _sampler = create_sampler(tensor_storage, tensor_format);
1865 _is_initialized = true;
1866 };
1867
1868 GpuSampler sampler() const
1869 {
1870 return _sampler;
1871 };
1872
1873 int32_t step_x() const
1874 {
1875 return _step_x;
1876 };
1877
1878 int32_t step_y() const
1879 {
1880 return _step_y;
1881 };
1882
1883 int32_t step_z() const
1884 {
1885 return _step_z;
1886 };
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001887
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001888private:
1889 GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format)
1890 {
1891 // Output can only be in output mode
1892 assert(tensor_storage != GpuSamplerTensorStorage::Image2dReadOnly);
1893 assert(tensor_storage != GpuSamplerTensorStorage::Image3dReadOnly);
1894
1895 auto tensor = _tensor_info_id->shape;
1896
1897 GpuSampler sampler;
1898 sampler.format = tensor_format;
1899 sampler.storage = tensor_storage;
1900 sampler.address_mode_x = TensorSamplerAddressModeX::None;
1901 sampler.address_mode_y = TensorSamplerAddressModeY::None;
1902 sampler.address_mode_z = TensorSamplerAddressModeZ::None;
1903
1904 // In the case of texture, we do not need any special checks at the border
1905 if(tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr)
1906 {
1907 int32_t dim_x = 0;
1908 int32_t dim_y = 0;
1909 int32_t dim_z = 0;
1910
1911 switch(tensor_format)
1912 {
1913 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001914 dim_x = tensor[0];
1915 dim_y = tensor[1];
1916 dim_z = tensor[2];
1917 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001918 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001919 dim_x = tensor[0];
1920 dim_y = tensor[1] * tensor[2];
1921 dim_z = 1;
1922 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001923 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001924 std::cout << "Unsupported tensor format" << std::endl;
1925 assert(false);
1926 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001927 }
1928
1929 if((dim_x % _step_x) != 0 && dim_x != 1)
1930 {
1931 sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin;
1932 }
1933
1934 if((dim_y % _step_y) != 0 && dim_y != 1)
1935 {
1936 sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly;
1937 }
1938
1939 if((dim_z % _step_z) != 0 && dim_z != 1)
1940 {
1941 sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly;
1942 }
1943 }
1944
1945 return sampler;
1946 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001947
1948 GpuSampler _sampler{}; // GpuSampler
1949 int32_t _step_x{ 1 };
1950 int32_t _step_y{ 1 };
1951 int32_t _step_z{ 1 };
1952 const TensorInfo *_tensor_info_id{ nullptr };
1953 bool _is_initialized{ false };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001954};
1955
1956/**
1957 * @brief Tensor operand class. This object is used to pass the operands as tensor to the operations performed by the writer.
1958 */
1959class TensorOperand
1960{
1961public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001962 TensorOperand(const std::string &val, GpuSampler sampler)
1963 : _str(val), _sampler(sampler)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001964 {
1965 }
1966
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001967 TensorOperand &operator=(const TensorOperand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001968 {
1969 _str = t.value();
1970 _sampler = t.sampler();
1971 return *this;
1972 }
1973
1974 std::string value() const
1975 {
1976 return _str;
1977 }
1978
1979 GpuSampler sampler() const
1980 {
1981 return _sampler;
1982 }
1983
1984private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001985 std::string _str{};
1986 GpuSampler _sampler{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001987};
1988
1989/**
1990 * @brief Data structure that contains all the necessary information to write the Gpu kernel with the Gpu kernel Writer
1991 * This data structure must be initialized before being passed to the Gpu Kernel Writer
1992 *
1993 */
1994class GpuKernelWriterDataHolder
1995{
1996public:
1997 /**
1998 * @brief Construct a new Gpu Kernel Data object. In this phase, we should also store
1999 * the GPU target and target specific capabilities (extensions). For now, we just initialize the
2000 * programming language
2001 *
2002 * @param[in] language Gpu programming language to use
2003 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002004 GpuKernelWriterDataHolder(GpuTargetLanguage language)
2005 : tiles(language), arguments(language), code(""), _language(language)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002006 {
2007 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002008
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002009 /**
2010 * @brief Get the Gpu programming language used
2011 *
2012 * @return GpuTargetLanguage the Gpu programming language
2013 */
2014 GpuTargetLanguage programming_language() const
2015 {
2016 return _language;
2017 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002018
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002019 /**
2020 * @brief @ref GpuTileRegistry
2021 *
2022 */
2023 GpuTileRegistry tiles{};
2024 /**
2025 * @brief @ref GpuTensorArgumentRegistry
2026 *
2027 */
2028 GpuTensorArgumentRegistry arguments{};
2029 /**
2030 * @brief @ref GpuOutputSampler.
2031 *
2032 */
2033 GpuOutputSampler output_sampler{};
2034 /**
2035 * @brief Source code
2036 *
2037 */
2038 std::string code{};
2039
2040 // GpuExtensionRegistry extensions{};
2041private:
2042 GpuTargetLanguage _language;
2043};
2044
2045struct LWS
2046{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002047 int32_t x{ 1 };
2048 int32_t y{ 1 };
2049 int32_t z{ 1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002050};
2051
2052/**
2053 * @brief Utility class used to get the tile from the operand. If the operand is not a tile, @ref OperandUnpacker
2054 * declare an anonymous tile in the tile registry.
2055 */
2056class OperandUnpacker
2057{
2058public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002059 OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments)
2060 : _tiles(tiles), _arguments(arguments)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002061 {
2062 // Increase the level of the stack to allocate possible temporary tiles
2063 _tiles.increment_registry_level();
2064 };
2065
2066 ~OperandUnpacker()
2067 {
2068 // Decrease the level of the stack to deallocate any temporary tiles
2069 _tiles.decrement_registry_level();
2070 }
2071
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002072 IVectorTile *unpack(const Operand &src)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002073 {
2074 // Get the tile
2075 if(src.type() == OperandType::Tile)
2076 {
2077 assert(_tiles.has_tile(src.value()));
2078 return _tiles[src.value()];
2079 }
2080 // Create an anonymous tile with a constant
2081 else if(static_cast<int32_t>(src.type()) & 0x00001000)
2082 {
2083 if(src.type() == OperandType::ScalarTile)
2084 {
2085 ScalarTileCoord coord = src.scalar_tile_coordinate();
2086 assert(_tiles.has_tile(src.value()));
2087 assert(coord.x >= 0);
2088 assert(coord.y >= 0);
2089 auto val = _tiles[src.value()]->scalar(coord.x, coord.y);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002090 return _tiles.insert({ { { val.str } } }, val.type.dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002091 }
2092 else
2093 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002094 return _tiles.insert({ { { src.value() } } }, to_tile_data_type(src.type()));
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002095 }
2096 }
2097 // Create an anonymous tile with the tensor component
2098 else
2099 {
2100 assert(_arguments.has_tensor_argument(src.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002101 auto x = _arguments[src.value()];
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002102 const std::string val = x->component(to_tensor_component(src.type()));
2103 const DataType dt = x->component_data_type();
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002104 return _tiles.insert({ { { val } } }, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002105 }
2106 }
2107
2108private:
2109 DataType to_tile_data_type(OperandType x)
2110 {
2111 return static_cast<DataType>(static_cast<int32_t>(x) & 0x00ff);
2112 }
2113
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002114 TensorComponentType to_tensor_component(OperandType x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002115 {
2116 switch(x)
2117 {
2118 case OperandType::TensorDim0:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002119 return TensorComponentType::Dim0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002120 case OperandType::TensorDim1:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002121 return TensorComponentType::Dim1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002122 case OperandType::TensorDim2:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002123 return TensorComponentType::Dim2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002124 case OperandType::TensorDim3:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002125 return TensorComponentType::Dim3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002126 case OperandType::TensorDim4:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002127 return TensorComponentType::Dim4;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002128 case OperandType::TensorStride1:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002129 return TensorComponentType::Stride1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002130 case OperandType::TensorStride2:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002131 return TensorComponentType::Stride2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002132 case OperandType::TensorStride3:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002133 return TensorComponentType::Stride3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002134 case OperandType::TensorStride4:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002135 return TensorComponentType::Stride4;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002136 case OperandType::TensorDim1xDim2:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002137 return TensorComponentType::Dim1xDim2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002138 case OperandType::TensorDim1xDim2xDim3:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002139 return TensorComponentType::Dim1xDim2xDim3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002140 case OperandType::TensorDataOffset:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002141 return TensorComponentType::OffsetFirstElement;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002142 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002143 assert(false);
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002144 return TensorComponentType::Unknown;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002145 }
2146 }
2147
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002148 GpuTileRegistry &_tiles;
2149 GpuTensorArgumentRegistry &_arguments;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002150};
2151
2152/**
2153 * @brief Utility class used to get the tensor argument from the operand. If the operand is not a tile, @ref OperandUnpacker
2154 * declare an anonymous tile in the tile registry.
2155 * Tensor dimension reduction aims for reducing the tensor data dimension while keeping data's tensor structure.
2156 */
2157class TensorOperandUnpacker
2158{
2159public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002160 TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments)
2161 : _arguments(arguments){};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002162
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002163 IGpuTensorArgument *unpack(const TensorOperand &src)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002164 {
2165 assert(_arguments.has_tensor_argument(src.value()));
2166 return _arguments[src.value()];
2167 }
2168
2169private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002170 GpuTensorArgumentRegistry &_arguments;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002171};
2172
2173/**
2174 * @brief The GpuKernel will be used in three occasions (stages):
2175 * #- Compilation stage
2176 * #- Tuning stage
2177 * #- Dispatch stage
2178 */
2179struct GpuKernel
2180{
2181 // Compilation stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002182 std::string code{}; // Source code, required for the compilation stage
2183 std::vector<GpuExtensions> list_extensions{}; // Extensions, required for the compilation stage
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002184 // Tuning stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002185 std::string config_id{}; // Unique id, required for the tuning stage
2186 std::vector<LWS> list_lws{}; // LWS to test, required for the tuning stage
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002187 // Dispatch stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002188 GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage
2189 std::vector<std::pair<int32_t, GpuTensorStorage>> list_tensor_storages; // List of tensor storages, required for the dispatch stage
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002190 std::vector<std::pair<int32_t, TensorComponentType>> list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002191};
2192
2193// This function should produce an object with the source
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002194inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002195{
2196 std::string code;
2197 code += "__kernel void ";
2198 code += name;
2199 code += "(\n";
2200
2201 auto IdSpaces = in.arguments.IdSpace_declarations();
2202
2203 std::vector<std::string> arg_str;
2204
2205 auto tensor_args = in.arguments.tensor_argument_declarations();
2206
2207 for(auto &i : tensor_args)
2208 {
2209 // For each tensor used, get the storage and tensor components
2210 auto storages = i->storage_declarations();
2211 auto components = i->component_declarations();
2212
2213 for(auto &y : storages)
2214 {
2215 std::string str;
2216 str += i->storage_type_declaration(y);
2217 str += " ";
2218 str += i->storage(y);
2219 arg_str.push_back(str);
2220 }
2221
2222 for(auto &y : components)
2223 {
2224 std::string str;
2225 str += i->component_type_declaration();
2226 str += " ";
2227 str += i->component(y);
2228 arg_str.push_back(str);
2229 }
2230 }
2231
2232 for(size_t i = 0; i < arg_str.size(); ++i)
2233 {
2234 code += arg_str[i];
2235 if(i + 1 < arg_str.size())
2236 {
2237 code += ",\n";
2238 }
2239 }
2240
2241 code += ")\n";
2242 code += "{\n";
2243 code += in.code;
2244 code += "}\n";
2245
2246 return code;
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002247}
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002248
2249/**
2250 * @brief This class is responsible to map a N-Tensor to a 3d tensor. The mapper needs the GpuSampler to know
2251 * how to reduce the dimensionality of a tensor
2252 *
2253 */
2254class GpuTensor3dMapper
2255{
2256public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002257 GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler)
2258 : _sampler(sampler), _tensor(tensor){};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002259
2260 std::string tensor_component_x() const
2261 {
2262 const auto format = _sampler.format;
2263 switch(format)
2264 {
2265 case TensorSamplerFormat::C_WH_1:
2266 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002267 return _tensor->component(TensorComponentType::Dim0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002268 default:
2269 std::cout << "Unsupported tensor format" << std::endl;
2270 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002271 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002272 }
2273 }
2274
2275 std::string tensor_component_y() const
2276 {
2277 const auto format = _sampler.format;
2278 switch(format)
2279 {
2280 case TensorSamplerFormat::C_WH_1:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002281 return _tensor->component(TensorComponentType::Dim1xDim2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002282 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002283 return _tensor->component(TensorComponentType::Dim1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002284 default:
2285 std::cout << "Unsupported tensor format" << std::endl;
2286 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002287 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002288 }
2289 }
2290
2291 std::string tensor_component_z() const
2292 {
2293 const auto format = _sampler.format;
2294 switch(format)
2295 {
2296 case TensorSamplerFormat::C_WH_1:
2297 return "1";
2298 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002299 return _tensor->component(TensorComponentType::Dim2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002300 default:
2301 std::cout << "Unsupported tensor format" << std::endl;
2302 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002303 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002304 }
2305 }
2306
2307 std::string tensor_component_stride_y() const
2308 {
2309 const auto format = _sampler.format;
2310 switch(format)
2311 {
2312 case TensorSamplerFormat::C_WH_1:
2313 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002314 return _tensor->component(TensorComponentType::Stride1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002315 default:
2316 std::cout << "Unsupported tensor format" << std::endl;
2317 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002318 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002319 }
2320 }
2321
2322 std::string tensor_component_stride_z() const
2323 {
2324 const auto format = _sampler.format;
2325 switch(format)
2326 {
2327 case TensorSamplerFormat::C_WH_1:
2328 return "0";
2329 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002330 return _tensor->component(TensorComponentType::Stride2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002331 default:
2332 std::cout << "Unsupported tensor format" << std::endl;
2333 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002334 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002335 }
2336 }
2337
2338 std::string tensor_component_stride_batch() const
2339 {
2340 const auto format = _sampler.format;
2341 switch(format)
2342 {
2343 case TensorSamplerFormat::C_WH_1:
2344 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002345 return _tensor->component(TensorComponentType::Stride3);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002346 default:
2347 std::cout << "Unsupported tensor format" << std::endl;
2348 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002349 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002350 }
2351 }
2352
2353 bool is_one_component_x() const
2354 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002355 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002356 const auto format = _sampler.format;
2357 switch(format)
2358 {
2359 case TensorSamplerFormat::C_WH_1:
2360 case TensorSamplerFormat::C_W_H:
2361 return t.shape[0] == 1;
2362 default:
2363 std::cout << "Unsupported tensor format" << std::endl;
2364 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002365 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002366 }
2367 }
2368
2369 bool is_one_component_y() const
2370 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002371 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002372 const auto format = _sampler.format;
2373 switch(format)
2374 {
2375 case TensorSamplerFormat::C_WH_1:
2376 return (t.shape[1] * t.shape[2]) == 1;
2377 case TensorSamplerFormat::C_W_H:
2378 return t.shape[1] == 1;
2379 default:
2380 std::cout << "Unsupported tensor format" << std::endl;
2381 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002382 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002383 }
2384 }
2385
2386 bool is_one_component_z() const
2387 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002388 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002389 const auto format = _sampler.format;
2390 switch(format)
2391 {
2392 case TensorSamplerFormat::C_WH_1:
2393 return true;
2394 case TensorSamplerFormat::C_W_H:
2395 return t.shape[2] == 1;
2396 default:
2397 std::cout << "Unsupported tensor format" << std::endl;
2398 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002399 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002400 }
2401 }
2402
2403 bool is_one_component_batch() const
2404 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002405 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002406 const auto format = _sampler.format;
2407 switch(format)
2408 {
2409 case TensorSamplerFormat::C_WH_1:
2410 case TensorSamplerFormat::C_W_H:
2411 return t.shape[3] == 1;
2412 default:
2413 std::cout << "Unsupported tensor format" << std::endl;
2414 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002415 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002416 }
2417 }
2418
2419 GpuSampler gpu_sampler() const
2420 {
2421 return _sampler;
2422 }
2423
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002424 IGpuTensorArgument *tensor_argument() const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002425 {
2426 return _tensor;
2427 }
2428
2429private:
2430 GpuSampler _sampler;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002431 IGpuTensorArgument *_tensor;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002432};
2433
2434struct GpuKernelWriterAttribute
2435{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002436 bool return_tensor_component_by_value{ false };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002437};
2438
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002439enum class RoundingMode
2440{
2441 None,
2442 Rte,
2443 Rtz,
2444 Rtp,
2445 Rtn
2446};
2447
2448// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl05.html
2449class IGpuKernelWriter
2450{
2451public:
2452 virtual ~IGpuKernelWriter() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002453
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002454 virtual void set_IdSpace(int32_t id) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002455
2456 virtual void import_tile(const std::string &dst, const IVectorTile *src) = 0;
2457
2458 virtual void declare_argument(const std::string &name, const TensorInfo &tensor) = 0;
2459
2460 virtual void declare_tile(const std::string &name, const TileInfo &info) = 0;
2461
2462 virtual void declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) = 0;
2463
2464 virtual void write_text(const std::string &x) = 0;
2465
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002466 virtual void compound_statement_begin() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002467
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002468 virtual void compound_statement_end() = 0;
2469
2470 // Operations
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002471 virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002472
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002473 virtual void op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002474
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002475 virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002476
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002477 virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002478
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002479 virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002480
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002481 virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002482
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002483 virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002484
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002485 virtual void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002486
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002487 virtual void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002488
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002489 virtual void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name) = 0;
2490
2491 virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
2492
2493 virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
2494
2495 virtual void op_else_header() = 0;
2496
2497 virtual void op_for_loop_header(const Operand &var_name, BinaryOp cond_op, const Operand &cond_value, AssignmentOp update_op, const Operand &update_value) = 0;
2498
2499 virtual void op_load_indirect(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y_indirect, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002500
2501 virtual void op_load_immediate(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32), const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0;
2502
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002503 virtual void op_store_immediate(const TensorOperand &tensor, const Operand &src, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002504
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002505 virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002506
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002507 virtual void op_return() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002508
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002509 // Utils
2510 // It is the process of converting
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002511 virtual void util_get_indirect_buffer(const Operand &dst, const TensorOperand &tensor, const Operand &x,
2512 const Operand &y, const Operand &x_off, const Operand &y_off) = 0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002513};
2514
2515enum class GpuLoadStoreType
2516{
2517 Load = 1,
2518 Store = 2
2519};
2520
2521class IGpuLoadStoreHelperWriter
2522{
2523public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002524 IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type)
2525 : _writer(x), _mapper(mapper), _type(type)
2526 {
2527 }
2528
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002529 IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002530
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002531 IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002532
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002533 virtual ~IGpuLoadStoreHelperWriter() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002534
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002535 virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002536
2537 virtual void write(const std::pair<int32_t, std::string> &y) = 0;
2538
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002539 virtual void finalize() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002540
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002541protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002542 IGpuKernelWriter *_writer;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002543 GpuTensor3dMapper _mapper;
2544 GpuLoadStoreType _type;
2545};
2546
2547class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter
2548{
2549public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002550 ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
2551 : IGpuLoadStoreHelperWriter(x, mapper, type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002552 {
2553 }
2554
2555 ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002556
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002557 ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default;
2558
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002559 static bool
2560 validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002561 {
2562 CKW_UNUSED(x, type, dst);
2563
2564 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr)
2565 {
2566 return false;
2567 }
2568 return true;
2569 }
2570
2571 void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
2572 {
2573 assert(validate(_writer, _mapper, _type, dst));
2574
2575 _dst = dst;
2576 _ls_width_full = dst->format().w;
2577
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002578 _coord_x = x->scalar(0, 0).str;
2579 _coord_z = z->scalar(0, 0).str;
2580 _coord_b = b->scalar(0, 0).str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002581 _coord_orig_z = _coord_z;
2582
2583 out_of_bound_initialize_x(_coord_x);
2584 out_of_bound_initialize_z(_coord_z);
2585
2586 /*
2587 meaning of else:
2588 - x: partial load/store
2589 - y: no load/store operation
2590 - z: no load/store operation
2591 if(x)
2592 {
2593 if(z)
2594 {
2595 if(y)
2596 {
2597 // full load/store width
2598 }
2599 else
2600 {
2601 // no load/store
2602 }
2603 }
2604 else
2605 {
2606 // no load/store
2607 }
2608 }
2609 else
2610 {
2611 if(z)
2612 {
2613 if(y)
2614 {
2615 // partial load/store width
2616 }
2617 else
2618 {
2619 // no load/store
2620 }
2621 }
2622 else
2623 {
2624 // no load/store
2625 }
2626 }
2627 */
2628 }
2629
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002630 void write(const std::pair<int32_t, std::string> &y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002631 {
2632 int32_t idx_y = y.first;
2633 std::string coord_y = y.second;
2634
2635 // The only check required is on Y.
2636 out_of_bound_initialize_y(coord_y);
2637
2638 const std::string dst = _dst->vector(idx_y).str;
2639 const std::string address = to_ls_buffer_address(_coord_x, coord_y, _coord_z, _coord_b);
2640 const std::string ls_buf = to_ls_buffer(_type, _ls_width_full, dst, address);
2641
2642 _writer->write_text(ls_buf);
2643 _writer->write_text(";\n");
2644
2645 out_of_bound_finalize_y(dst);
2646
2647 // The left over load/store will be written in the finalize stage
2648 if(_ls_width_part.size() != 0)
2649 {
2650 int32_t w = 0;
2651 for(auto &p : _ls_width_part)
2652 {
2653 const std::string dst0 = _dst->vector(w, p, idx_y).str;
2654 const std::string coord_x = _coord_x + " + " + std::to_string(w);
2655 const std::string address = to_ls_buffer_address(coord_x, coord_y, _coord_z, _coord_b);
2656 const std::string ls_buf0 = to_ls_buffer(_type, p, dst0, address);
2657 _leftovers_x.push_back(std::make_pair(std::make_pair(dst0, coord_y), ls_buf0));
2658
2659 w += p;
2660 }
2661 }
2662 }
2663
2664 void finalize() override
2665 {
2666 out_of_bound_finalize_z();
2667 out_of_bound_finalize_x();
2668 }
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002669
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002670private:
2671 IVectorTile *_dst{ nullptr };
2672 int32_t _ls_width_full{ 0 };
2673 std::vector<int32_t> _ls_width_part{};
2674 std::vector<std::pair<std::pair<std::string, std::string>, std::string>> _leftovers_x{};
2675 std::string _coord_x{};
2676 std::string _coord_z{};
2677 std::string _coord_orig_z{};
2678 std::string _coord_b{};
2679
2680 void out_of_bound_initialize_x(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002681 {
2682 if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
2683 {
2684 auto tensor_format = _mapper.tensor_argument()->format();
2685 auto shape = tensor_format.shape;
2686
2687 _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full);
2688 if(_ls_width_part.size() != 0)
2689 {
2690 _writer->write_text("if(" + coord + " > 0)\n");
2691 _writer->compound_statement_begin();
2692 }
2693 }
2694 };
2695
2696 void out_of_bound_finalize_x()
2697 {
2698 if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
2699 {
2700 if(_ls_width_part.size() != 0)
2701 {
2702 _writer->compound_statement_end();
2703 _writer->write_text("else\n");
2704 _writer->compound_statement_begin();
2705
2706 out_of_bound_initialize_z(_coord_orig_z);
2707 for(auto &i : _leftovers_x)
2708 {
2709 out_of_bound_initialize_y(i.first.second);
2710 _writer->write_text(i.second);
2711 _writer->write_text(";\n");
2712 out_of_bound_finalize_y(i.first.first);
2713 }
2714 out_of_bound_finalize_z();
2715 _writer->compound_statement_end();
2716 }
2717 }
2718 };
2719
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002720 void out_of_bound_initialize_y(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002721 {
2722 std::string max = "";
2723
2724 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
2725
2726 switch(address_mode_y)
2727 {
2728 case TensorSamplerAddressModeY::Skip:
2729 case TensorSamplerAddressModeY::ClampToBorder:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002730 // NOTE: This line should not be moved outside of the switch statement.
2731 // The reason for that is because when we query the component, the component is marked as used
2732 // and added to the list of arguments of the kernel. Since, not in all cases this component is required,
2733 // we should request the component only when used
2734 max = _mapper.tensor_component_y();
2735 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
2736 _writer->compound_statement_begin();
2737 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002738 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
2739 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002740 _writer->write_text("if(" + coord + " >= 0)\n");
2741 _writer->compound_statement_begin();
2742 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002743 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
2744 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002745 max = _mapper.tensor_component_y();
2746 _writer->write_text("if(" + coord + " < " + max + ")\n");
2747 _writer->compound_statement_begin();
2748 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002749 case TensorSamplerAddressModeY::ClampToNearest:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002750 max = _mapper.tensor_component_y();
2751 coord = "clamp(" + coord + ", 0, " + max + " - 1)";
2752 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002753 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002754 max = _mapper.tensor_component_y();
2755 coord = "min(" + coord + ", " + max + " - 1)";
2756 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002757 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002758 coord = "max(" + coord + ", 0)";
2759 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002760 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002761 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002762 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002763 std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
2764 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002765 }
2766 };
2767
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002768 void out_of_bound_finalize_y(const std::string &dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002769 {
2770 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
2771
2772 switch(address_mode_y)
2773 {
2774 case TensorSamplerAddressModeY::ClampToBorder:
2775 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
2776 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
2777 case TensorSamplerAddressModeY::Skip:
2778 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
2779 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002780 _writer->compound_statement_end();
2781 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002782
2783 default:
2784 assert(false);
2785 }
2786
2787 switch(address_mode_y)
2788 {
2789 case TensorSamplerAddressModeY::ClampToBorder:
2790 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
2791 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002792 _writer->write_text("else\n");
2793 _writer->compound_statement_begin();
2794 _writer->write_text(dst);
2795 _writer->write_text(" = 0.0f;\n");
2796 _writer->compound_statement_end();
2797 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002798
2799 default:
2800 assert(false);
2801 }
2802 };
2803
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002804 void out_of_bound_initialize_z(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002805 {
2806 std::string max = "";
2807
2808 const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
2809
2810 switch(address_mode_z)
2811 {
2812 case TensorSamplerAddressModeZ::Skip:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002813 max = _mapper.tensor_component_z();
2814 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
2815 _writer->compound_statement_begin();
2816 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002817 case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002818 _writer->write_text("if(" + coord + " >= 0)\n");
2819 _writer->compound_statement_begin();
2820 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002821 case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002822 max = _mapper.tensor_component_z();
2823 _writer->write_text("if(" + coord + " < " + max + ")\n");
2824 _writer->compound_statement_begin();
2825 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002826 case TensorSamplerAddressModeZ::ClampToNearest:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002827 max = _mapper.tensor_component_z();
2828 coord = "clamp(" + coord + ", 0, " + max + " - 1)";
2829 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002830 case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002831 max = _mapper.tensor_component_z();
2832 coord = "min(" + coord + ", " + max + " - 1)";
2833 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002834 case TensorSamplerAddressModeZ::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002835 coord = "max(" + coord + ", 0)";
2836 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002837 case TensorSamplerAddressModeZ::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002838 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002839 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002840 std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
2841 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002842 }
2843 };
2844
2845 void out_of_bound_finalize_z()
2846 {
2847 const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
2848
2849 switch(address_mode_z)
2850 {
2851 case TensorSamplerAddressModeZ::Skip:
2852 case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
2853 case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002854 _writer->compound_statement_end();
2855 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002856
2857 default:
2858 assert(false);
2859 }
2860 };
2861
2862 std::vector<int32_t> decompose_leftover_ls_vector_width(int32_t ls_leftover_vector_width) const
2863 {
2864 std::vector<int32_t> x;
2865
2866 switch(ls_leftover_vector_width)
2867 {
2868 case 0:
2869 break;
2870 case 1:
2871 case 2:
2872 case 3:
2873 case 4:
2874 case 8:
2875 case 16:
2876 x.push_back(ls_leftover_vector_width);
2877 break;
2878 case 5:
2879 x.push_back(4);
2880 x.push_back(1);
2881 break;
2882 case 6:
2883 x.push_back(4);
2884 x.push_back(2);
2885 break;
2886 case 7:
2887 x.push_back(4);
2888 x.push_back(3);
2889 break;
2890 case 9:
2891 x.push_back(8);
2892 x.push_back(1);
2893 break;
2894 case 10:
2895 x.push_back(8);
2896 x.push_back(2);
2897 break;
2898 case 11:
2899 x.push_back(8);
2900 x.push_back(3);
2901 break;
2902 case 12:
2903 x.push_back(8);
2904 x.push_back(4);
2905 break;
2906 case 13:
2907 x.push_back(8);
2908 x.push_back(4);
2909 x.push_back(1);
2910 break;
2911 case 14:
2912 x.push_back(8);
2913 x.push_back(4);
2914 x.push_back(2);
2915 break;
2916 case 15:
2917 x.push_back(8);
2918 x.push_back(4);
2919 x.push_back(3);
2920 break;
2921
2922 default:
2923 assert(false);
2924 }
2925 return x;
2926 }
2927
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002928 std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data,
2929 const std::string &address)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002930 {
2931 switch(type)
2932 {
2933 case GpuLoadStoreType::Load:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002934 if(vector_width != 1)
2935 {
2936 return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
2937 }
2938 else
2939 {
2940 return data + " = *(" + address + ")";
2941 }
2942 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002943 case GpuLoadStoreType::Store:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002944 if(vector_width != 1)
2945 {
2946 return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
2947 }
2948 else
2949 {
2950 return "*(" + address + ") = " + data;
2951 }
2952 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002953 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002954 std::cout << "Unsupported GpuLoadStoreType" << std::endl;
2955 assert(false);
2956 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002957 }
2958 }
2959
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002960 std::string to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z,
2961 const std::string &b) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002962 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002963 auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002964 assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002965 const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage);
2966 const std::string dst_type = get_cl_data_type(_dst->format().dt, 1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002967
2968 std::string address;
2969 address += "(__global ";
2970 address += dst_type;
2971 address += "*)(";
2972 address += ptr_buf;
2973 if(x != "0" && (_mapper.is_one_component_x() != true))
2974 {
2975 address += " + (";
2976 address += x + ") * sizeof(" + dst_type + ")";
2977 }
2978 if(y != "0" && (_mapper.is_one_component_y() != true))
2979 {
2980 const std::string stride_y = _mapper.tensor_component_stride_y();
2981 address += " + (";
2982 address += y + ")";
2983 address += " * ";
2984 address += stride_y;
2985 }
2986 if(z != "0" && (_mapper.is_one_component_z() != true))
2987 {
2988 const std::string stride_z = _mapper.tensor_component_stride_z();
2989 address += " + (";
2990 address += z + ")";
2991 address += " * ";
2992 address += stride_z;
2993 }
2994 if(b != "0" && (_mapper.is_one_component_batch() != true))
2995 {
2996 const std::string stride_b = _mapper.tensor_component_stride_batch();
2997 address += " + (";
2998 address += b + ")";
2999 address += " * ";
3000 address += stride_b;
3001 }
3002 address += ")";
3003 return address;
3004 }
3005};
3006
3007class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter
3008{
3009public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003010 static bool
3011 validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003012 {
3013 CKW_UNUSED(x);
3014
3015 if(dst->format().w != 4)
3016 {
3017 return false;
3018 }
3019 if(mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None)
3020 {
3021 return false;
3022 }
3023 if(mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None)
3024 {
3025 return false;
3026 }
3027 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load)
3028 {
3029 return false;
3030 }
3031 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && type == GpuLoadStoreType::Store)
3032 {
3033 return false;
3034 }
3035 if((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16))
3036 {
3037 return false;
3038 }
3039 return true;
3040 /*
3041 - x: Only GpuSamplerAddressModeX::None is supported and vector length = 4
3042 - z: Only GpuSamplerAddressModeZ::None is supported
3043 */
3044 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003045
3046 ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
3047 : IGpuLoadStoreHelperWriter(x, mapper, type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003048 {
3049 }
3050
3051 ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003052
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003053 ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default;
3054
3055 void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
3056 {
3057 assert(validate(_writer, _mapper, _type, dst));
3058
3059 _dst = dst;
3060 _ls_width_full = dst->format().w;
3061 _coord_x = x->scalar(0, 0).str;
3062 _coord_z = z->scalar(0, 0).str;
3063 _coord_b = b->scalar(0, 0).str;
3064
3065 /*
3066 if(y)
3067 {
3068 // full load/store width
3069 }
3070 else
3071 {
3072 // no load/store
3073 }
3074 */
3075 }
3076
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003077 void write(const std::pair<int32_t, std::string> &y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003078 {
3079 int32_t idx_y = y.first;
3080 std::string coord_y = y.second;
3081
3082 // The only check required is on Y.
3083 out_of_bound_initialize_y(coord_y);
3084
3085 const std::string dst = _dst->vector(idx_y).str;
3086 const std::string sampler = to_ls_image2d_sampler();
3087 const std::string coord = to_ls_image2d_coord(_coord_x, coord_y, _coord_z, _coord_b);
3088 const std::string ls_buf = to_ls_image2d(_type, _ls_width_full, dst, sampler, coord);
3089
3090 _writer->write_text(ls_buf);
3091 _writer->write_text(";\n");
3092
3093 out_of_bound_finalize_y(dst);
3094 }
3095
3096 void finalize() override
3097 {
3098 }
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003099
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003100private:
3101 IVectorTile *_dst{ nullptr };
3102 int32_t _ls_width_full{ 0 };
3103 std::string _coord_x{};
3104 std::string _coord_z{};
3105 std::string _coord_b{};
3106
3107 void out_of_bound_initialize_y(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003108 {
3109 std::string max = "";
3110
3111 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3112
3113 switch(address_mode_y)
3114 {
3115 case TensorSamplerAddressModeY::Skip:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003116 max = _mapper.tensor_component_y();
3117 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
3118 _writer->compound_statement_begin();
3119 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003120 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003121 _writer->write_text("if(" + coord + " >= 0)\n");
3122 _writer->compound_statement_begin();
3123 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003124 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003125 max = _mapper.tensor_component_y();
3126 _writer->write_text("if(" + coord + " < " + max + ")\n");
3127 _writer->compound_statement_begin();
3128 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003129 case TensorSamplerAddressModeY::ClampToBorder:
3130 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
3131 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
3132 case TensorSamplerAddressModeY::ClampToNearest:
3133 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
3134 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
3135 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003136 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003137 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003138 std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl;
3139 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003140 }
3141 };
3142
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003143 void out_of_bound_finalize_y(const std::string &dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003144 {
3145 CKW_UNUSED(dst);
3146
3147 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3148
3149 switch(address_mode_y)
3150 {
3151 case TensorSamplerAddressModeY::Skip:
3152 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
3153 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003154 _writer->compound_statement_end();
3155 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003156
3157 default:
3158 assert(false);
3159 }
3160 };
3161
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003162 std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string &data,
3163 const std::string &sampler, const std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003164 {
3165 CKW_UNUSED(vector_width);
3166
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003167 auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
3168 const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003169 const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003170
3171 switch(type)
3172 {
3173 case GpuLoadStoreType::Load:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003174 return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")";
3175 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003176 case GpuLoadStoreType::Store:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003177 return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003178 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003179 assert(false);
3180 std::cout << "Unsupported GpuLoadStoreType" << std::endl;
3181 assert(false);
3182 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003183 }
3184 }
3185
3186 std::string to_ls_image2d_sampler() const
3187 {
3188 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3189
3190 switch(address_mode_y)
3191 {
3192 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003193 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003194 case TensorSamplerAddressModeY::Skip:
3195 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
3196 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
3197 case TensorSamplerAddressModeY::ClampToBorder:
3198 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
3199 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003200 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003201 case TensorSamplerAddressModeY::ClampToNearest:
3202 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
3203 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003204 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003205 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003206 std::cout << "Unsupported address_mode_coord" << std::endl;
3207 assert(false);
3208 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003209 }
3210 }
3211
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003212 std::string to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z,
3213 const std::string &b) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003214 {
3215 std::string coord_x = "(" + x + ") >> 2";
3216 std::string coord_y = "(";
3217
3218 if(y != "0" && (_mapper.is_one_component_y() != true))
3219 {
3220 coord_y += y;
3221 }
3222 if(z != "0" && (_mapper.is_one_component_z() != true))
3223 {
3224 const std::string dim = _mapper.tensor_component_y();
3225 coord_y += " + (";
3226 coord_y += z + ")";
3227 coord_y += " * ";
3228 coord_y += dim;
3229 }
3230 if(b != "0" && (_mapper.is_one_component_batch() != true))
3231 {
3232 const std::string dim0 = _mapper.tensor_component_y();
3233 const std::string dim1 = _mapper.tensor_component_z();
3234 coord_y += " + (";
3235 coord_y += b + ")";
3236 coord_y += " * ";
3237 coord_y += dim0;
3238 coord_y += " * ";
3239 coord_y += dim1;
3240 }
3241 coord_y += ")";
3242 return "(int2)(" + coord_x + ", " + coord_y + ")";
3243 }
3244};
3245
3246/** IGpuLoadStoreHelperWriter factory class */
3247class ClLoadStoreHelperWriterFactory final
3248{
3249public:
3250 /** Static method to call the IGpuLoadStoreHelperWriter class accordingly with the tensor storage set in the mapper
3251 *
3252 *
3253 * @return IGpuLoadStoreHelperWriter
3254 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003255 static std::unique_ptr<IGpuLoadStoreHelperWriter>
3256 create(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003257 {
3258 const auto tensor_storage = mapper.gpu_sampler().storage;
3259 switch(tensor_storage)
3260 {
3261 case GpuSamplerTensorStorage::BufferUint8Ptr:
3262 return std::make_unique<ClLoadStoreBufferHelperWriter>(x, mapper, type);
3263 case GpuSamplerTensorStorage::Image2dReadOnly:
3264 case GpuSamplerTensorStorage::Image2dWriteOnly:
3265 return std::make_unique<ClLoadStoreImage2dHelperWriter>(x, mapper, type);
3266 default:
3267 std::cout << "Unsupported Gpu tensor storage" << std::endl;
3268 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01003269 return nullptr;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003270 }
3271 }
3272};
3273
3274// This utility method needs to go in utils.h
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003275inline bool is_tile_scalar(const IVectorTile *x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003276{
3277 return x->format().w == 1 && x->format().h == 1;
3278}
3279
3280class ClKernelWriter : public IGpuKernelWriter
3281{
3282public:
3283 ClKernelWriter(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
3284 {
3285 _data = x;
3286 _attr = attr;
3287 }
3288
3289 ClKernelWriter(const ClKernelWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003290
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003291 ClKernelWriter &operator=(const ClKernelWriter &) = default;
3292
3293 // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure
3294 // there are no conflicts or ambiguity in the code
3295 void set_IdSpace(int32_t id) override
3296 {
3297 _data->tiles.set_IdSpace(id);
3298 _data->arguments.set_IdSpace(id);
3299 }
3300
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003301 void import_tile(const std::string &dst_name, const IVectorTile *src) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003302 {
3303 _data->tiles.insert(dst_name, src);
3304 }
3305
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003306 void declare_argument(const std::string &name, const TensorInfo &tensor) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003307 {
3308 assert(_data->arguments[name] == nullptr);
3309 _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value);
3310 }
3311
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003312 void declare_tile(const std::string &name, const TileInfo &format) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003313 {
3314 assert(_data->tiles[name] == nullptr);
3315 _data->tiles.insert(name, format);
3316
3317 IVectorTile *x = _data->tiles[name];
3318
3319 for(auto &t : x->underlying_source_variables())
3320 {
3321 _data->code += t.type.str + " " + t.str + ";\n";
3322 }
3323 }
3324
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003325 void declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in,
3326 DataType dt) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003327 {
3328 assert(_data->tiles[name] == nullptr);
3329 _data->tiles.insert(name, in, dt);
3330 // Note: A constant does not need to be declared in the code
3331 }
3332
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003333 void write_text(const std::string &x) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003334 {
3335 _data->code += x;
3336 }
3337
3338 void compound_statement_begin() override
3339 {
3340 _data->tiles.increment_registry_level();
3341 _data->code += "{\n";
3342 }
3343
3344 void compound_statement_end() override
3345 {
3346 _data->tiles.decrement_registry_level();
3347 _data->code += "}\n";
3348 }
3349
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003350 void op_get_global_id(const Operand &dst_var, int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003351 {
3352 assert(dst_var.type() == OperandType::Tile);
3353 assert(_data->tiles.has_tile(dst_var.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003354 assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003355
3356 auto var = _data->tiles[dst_var.value()];
3357
3358 _data->code += var->scalar(0, 0).str;
3359 _data->code += " = get_global_id(";
3360 _data->code += std::to_string(dim);
3361 _data->code += ");\n";
3362 };
3363
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003364 void op_get_global_coord(const Operand &o_dst, const Operand &o_step, const TensorOperand &o_tensor,
3365 int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003366 {
3367 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003368 auto dst = operands.unpack(o_dst);
3369 auto step = operands.unpack(o_step);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003370
3371 // Validation: Check that x, y and z are scalar
3372
3373 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003374 auto tensor = tensor_operands.unpack(o_tensor);
3375 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003376
3377 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3378
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003379 switch(dim)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003380 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003381 case 0:
3382 if(mapper.is_one_component_x())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003383 {
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003384 _data->code += dst->scalar(0, 0).str;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003385 _data->code += " = 0;\n";
3386 }
3387 else
3388 {
3389 if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
3390 {
3391 // Validation: Check: fixed tensor shape
3392 // TO BE CHANGED
3393 _data->code += dst->scalar(0, 0).str;
3394 _data->code += " = get_global_id(0) * ";
3395 _data->code += step->scalar(0, 0).str;
3396 _data->code += ";\n";
3397 }
3398 else
3399 {
3400 _data->code += dst->scalar(0, 0).str;
3401 _data->code += " = get_global_id(0) * ";
3402 _data->code += step->scalar(0, 0).str;
3403 _data->code += ";\n";
3404 }
3405 }
3406 break;
3407 case 1:
3408 if(mapper.is_one_component_y())
3409 {
3410 _data->code += dst->scalar(0, 0).str;
3411 _data->code += " = 0;\n";
3412 }
3413 else
3414 {
3415 if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin)
3416 {
3417 }
3418 else
3419 {
3420 _data->code += dst->scalar(0, 0).str;
3421 _data->code += " = get_global_id(1) * ";
3422 _data->code += step->scalar(0, 0).str;
3423 _data->code += ";\n";
3424 }
3425 }
3426 break;
3427 case 2:
3428 if(mapper.is_one_component_z())
3429 {
3430 _data->code += dst->scalar(0, 0).str;
3431 _data->code += " = 0;\n";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003432 }
3433 else
3434 {
3435 _data->code += dst->scalar(0, 0).str;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003436 _data->code += " = get_global_id(2) * ";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003437 _data->code += step->scalar(0, 0).str;
3438 _data->code += ";\n";
3439 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003440 break;
3441 default:
3442 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003443 }
3444 };
3445
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003446 void op_get_global_batch(const Operand &o_dst, const TensorOperand &o_tensor) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003447 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003448 OperandUnpacker operands(_data->tiles, _data->arguments);
3449 const IVectorTile *dst = operands.unpack(o_dst);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003450
3451 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003452 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003453 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003454
3455 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3456
3457 if(mapper.is_one_component_batch())
3458 {
3459 _data->code += dst->scalar(0, 0).str;
3460 _data->code += " = 0;\n";
3461 }
3462 else
3463 {
3464 std::cout << "Unsupported batched computation" << std::endl;
3465 assert(false);
3466 }
3467 };
3468
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003469 void op_get_global_size(const Operand &dst_var, int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003470 {
3471 assert(dst_var.type() == OperandType::Tile);
3472 assert(_data->tiles.has_tile(dst_var.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003473 assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003474
3475 auto var = _data->tiles[dst_var.value()];
3476
3477 _data->code += var->scalar(0, 0).str;
3478 _data->code += " = get_global_size(";
3479 _data->code += std::to_string(dim);
3480 _data->code += ");\n";
3481 }
3482
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003483 void op_unary_expression(const Operand &dst_name, UnaryOp op, const Operand &src_name) override
3484 {
3485 OperandUnpacker operands(_data->tiles, _data->arguments);
3486 const IVectorTile *src = operands.unpack(src_name);
3487 const IVectorTile *dst = operands.unpack(dst_name);
3488
3489 const int32_t dst_w = dst->format().w;
3490 const int32_t dst_h = dst->format().h;
3491 const int32_t src_w = src->format().w;
3492 const std::string dt = dst->underlying_source_variables()[0].type.str;
3493
3494 const bool broadcast_src_x = dst_w != 1 && src_w == 1;
3495
3496 const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
3497
3498 // Broadcasting on Y is automatic
3499 for(int32_t y = 0; y < dst_h; ++y)
3500 {
3501 _data->code += dst->vector(y).str;
3502 _data->code += " = ";
3503 _data->code += to_string(op);
3504 _data->code += src_prefix + src->vector(y).str;
3505 _data->code += ";\n";
3506 }
3507 }
3508
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003509 void op_binary_expression(const Operand &dst_name, const Operand &lhs_name, BinaryOp op,
3510 const Operand &rhs_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003511 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003512 OperandUnpacker operands(_data->tiles, _data->arguments);
3513 const IVectorTile *lhs = operands.unpack(lhs_name);
3514 const IVectorTile *rhs = operands.unpack(rhs_name);
3515 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003516
3517 const int32_t dst_w = dst->format().w;
3518 const int32_t dst_h = dst->format().h;
3519 assert(lhs != nullptr);
3520 const int32_t lhs_w = lhs->format().w;
3521 const int32_t rhs_w = rhs->format().w;
3522
3523 if(op == BinaryOp::MatMul_Nt_T)
3524 {
3525 assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16));
3526 for(int32_t y = 0; y < dst_h; ++y)
3527 {
3528 for(int32_t x = 0; x < dst_w; ++x)
3529 {
3530 for(int32_t k = 0; k < lhs_w; ++k)
3531 {
3532 _data->code += dst->scalar(x, y).str;
3533 _data->code += " = fma(";
3534 _data->code += lhs->scalar(k, y).str;
3535 _data->code += ", ";
3536 _data->code += rhs->scalar(k, x).str;
3537 _data->code += ", ";
3538 _data->code += dst->scalar(x, y).str;
3539 _data->code += ");\n";
3540 }
3541 }
3542 }
3543
3544 return;
3545 }
3546
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003547 const bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1;
3548 const bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003549
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003550 const std::string lhs_prefix = broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
3551 const std::string rhs_prefix = broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
3552 const std::string op_str = to_string(op);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003553
3554 // Broadcasting on Y is automatic
3555 for(int32_t y = 0; y < dst_h; ++y)
3556 {
3557 _data->code += dst->vector(y).str;
3558 _data->code += " = ";
3559 _data->code += lhs_prefix + lhs->vector(y).str;
3560 _data->code += " ";
3561 _data->code += op_str;
3562 _data->code += " ";
3563 _data->code += rhs_prefix + rhs->vector(y).str;
3564 _data->code += ";\n";
3565 }
3566 };
3567
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003568 void op_cast_expression(const Operand &o_dst, const Operand &o_src, ConvertPolicy policy) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003569 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003570 OperandUnpacker operands(_data->tiles, _data->arguments);
3571 const IVectorTile *src = operands.unpack(o_src);
3572 const IVectorTile *dst = operands.unpack(o_dst);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003573
3574 // const int32_t dst_w = dst->format().w;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003575 const int32_t dst_h = dst->format().h;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003576 const std::string dt = dst->underlying_source_variables()[0].type.str;
3577 const std::string sat = (policy == ConvertPolicy::Saturate ? "_sat" : "");
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003578
3579 // Broadcasting on Y is automatic
3580 for(int32_t y = 0; y < dst_h; ++y)
3581 {
3582 _data->code += dst->vector(y).str;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003583 _data->code += " = convert_" + dt + sat + "(";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003584 _data->code += src->vector(y).str;
3585 _data->code += ");\n";
3586 }
3587 };
3588
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003589 void op_assign(const Operand &dst_name, const Operand &src_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003590 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003591 OperandUnpacker operands(_data->tiles, _data->arguments);
3592 const IVectorTile *src = operands.unpack(src_name);
3593 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003594
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003595 const int32_t dst_w = dst->format().w;
3596 const int32_t dst_h = dst->format().h;
3597 const int32_t src_w = src->format().w;
3598 const std::string dt = dst->underlying_source_variables()[0].type.str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003599
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003600 const bool broadcast_src_x = dst_w != 1 && src_w == 1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003601
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003602 const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003603
3604 // Broadcasting on Y is automatic
3605 for(int32_t y = 0; y < dst_h; ++y)
3606 {
3607 _data->code += dst->vector(y).str;
3608 _data->code += " = ";
3609 _data->code += src_prefix + src->vector(y).str;
3610 _data->code += ";\n";
3611 }
3612 }
3613
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003614 void
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003615 op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003616 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003617 OperandUnpacker operands(_data->tiles, _data->arguments);
3618 const IVectorTile *src = operands.unpack(src_name);
3619 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003620
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003621 const int32_t dst_w = dst->format().w;
3622 const int32_t dst_h = dst->format().h;
3623 const int32_t src_w = src->format().w;
3624 const std::string dt = dst->underlying_source_variables()[0].type.str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003625
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003626 const bool broadcast_src_x = dst_w != 1 && src_w == 1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003627
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003628 const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003629
3630 // Broadcasting on Y is automatic
3631 for(int32_t y = 0; y < dst_h; ++y)
3632 {
3633 _data->code += dst->vector(y).str;
3634 _data->code += " = ";
3635
3636 switch(func)
3637 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003638 case UnaryFunction::Exp:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003639 _data->code += "exp(";
3640 break;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003641 case UnaryFunction::Tanh:
3642 _data->code += "tanh(";
3643 break;
3644 case UnaryFunction::Sqrt:
3645 _data->code += "sqrt(";
3646 break;
3647 case UnaryFunction::Erf:
3648 _data->code += "erf(";
3649 break;
3650 case UnaryFunction::Fabs:
3651 _data->code += "fabs(";
3652 break;
3653 case UnaryFunction::IsGreaterEqual:
3654 _data->code += "isgreaterequal(";
3655 break;
3656 case UnaryFunction::Log:
3657 _data->code += "log(";
3658 break;
3659 case UnaryFunction::SizeOf:
3660 _data->code += "sizeof(";
3661 break;
3662 case UnaryFunction::Round:
3663 _data->code += "round(";
3664 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003665 default:
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003666 CKW_ASSERT_MSG(false, "Unexpected UnaryFunction used.");
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003667 }
3668
3669 _data->code += src_prefix + src->vector(y).str;
3670 _data->code += ");\n";
3671 }
3672 }
3673
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003674 void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003675 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003676 OperandUnpacker operands(_data->tiles, _data->arguments);
3677 const IVectorTile *first = operands.unpack(first_name);
3678 const IVectorTile *second = operands.unpack(second_name);
3679 const IVectorTile *dst = operands.unpack(dst_name);
3680
3681 const int32_t dst_w = dst->format().w;
3682 const int32_t dst_h = dst->format().h;
3683 const int32_t first_w = first->format().w;
3684 const int32_t second_w = second->format().w;
3685 const auto datatype = dst->underlying_source_variables()[0].type;
3686 const std::string datatype_str = datatype.str;
3687
3688 const bool broadcast_first_x = dst_w != 1 && first_w == 1;
3689 const bool broadcast_second_x = dst_w != 1 && second_w == 1;
3690
3691 const std::string first_prefix = broadcast_first_x ? "(" + datatype_str + ")" : "";
3692 const std::string second_prefix = broadcast_second_x ? "(" + datatype_str + ")" : "";
3693
3694 const bool is_float = (datatype.dt == DataType::Fp32 || datatype.dt == DataType::Fp16);
3695
3696 // Broadcasting on Y is automatic
3697 for(int32_t y = 0; y < dst_h; ++y)
3698 {
3699 _data->code += dst->vector(y).str;
3700 _data->code += " = ";
3701
3702 switch(func)
3703 {
3704 case BinaryFunction::Min:
3705 _data->code += is_float ? "fmin(" : "min(";
3706 break;
3707 case BinaryFunction::Max:
3708 _data->code += is_float ? "fmax(" : "max(";
3709 break;
3710 default:
3711 CKW_ASSERT_MSG(false, "Unexpected BinaryFunction used.");
3712 }
3713
3714 _data->code += first_prefix + first->vector(y).str;
3715 _data->code += ", ";
3716 _data->code += second_prefix + second->vector(y).str;
3717 _data->code += ");\n";
3718 }
3719 }
3720
3721 void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name) override
3722 {
3723 OperandUnpacker operands(_data->tiles, _data->arguments);
3724 const IVectorTile *first = operands.unpack(first_name);
3725 const IVectorTile *second = operands.unpack(second_name);
3726 const IVectorTile *third = operands.unpack(third_name);
3727 const IVectorTile *dst = operands.unpack(dst_name);
3728
3729 const int32_t dst_w = dst->format().w;
3730 const int32_t dst_h = dst->format().h;
3731 const int32_t first_w = first->format().w;
3732 const int32_t second_w = second->format().w;
3733 const int32_t third_w = third->format().w;
3734 const std::string dt = dst->underlying_source_variables()[0].type.str;
3735
3736 const bool broadcast_first_x = dst_w != 1 && first_w == 1;
3737 const bool broadcast_second_x = dst_w != 1 && second_w == 1;
3738 const bool broadcast_third_x = dst_w != 1 && third_w == 1;
3739
3740 const std::string first_prefix = broadcast_first_x ? "(" + dt + ")" : "";
3741 const std::string second_prefix = broadcast_second_x ? "(" + dt + ")" : "";
3742 const std::string third_prefix = broadcast_third_x ? "(" + dt + ")" : "";
3743
3744 // Broadcasting on Y is automatic
3745 for(int32_t y = 0; y < dst_h; ++y)
3746 {
3747 _data->code += dst->vector(y).str;
3748 _data->code += " = ";
3749
3750 switch(func)
3751 {
3752 case TernaryFunction::Select:
3753 _data->code += "select(";
3754 break;
3755 default:
3756 CKW_ASSERT_MSG(false, "Unexpected TernaryFunction used.");
3757 }
3758
3759 _data->code += first_prefix + first->vector(y).str;
3760 _data->code += ", ";
3761 _data->code += second_prefix + second->vector(y).str;
3762 _data->code += ", ";
3763 _data->code += third_prefix + third->vector(y).str;
3764 _data->code += ");\n";
3765 }
3766 }
3767
3768 void op_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override
3769 {
3770 OperandUnpacker operands(_data->tiles, _data->arguments);
3771 const IVectorTile *lhs = operands.unpack(o_lhs);
3772 const IVectorTile *rhs = operands.unpack(o_rhs);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003773
3774 assert(is_tile_scalar(lhs));
3775 assert(is_tile_scalar(rhs));
3776
3777 _data->code += "if(";
3778 _data->code += lhs->scalar(0, 0).str;
3779 _data->code += " ";
3780 _data->code += to_string(op);
3781 _data->code += " ";
3782 _data->code += rhs->scalar(0, 0).str;
3783 _data->code += ")\n";
3784 }
3785
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003786 void op_else_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003787 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003788 _data->code += "else ";
3789 op_if_header(o_lhs, op, o_rhs);
3790 }
3791
3792 void op_else_header() override
3793 {
3794 _data->code += "else\n";
3795 }
3796
3797 void op_for_loop_header(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value_name, AssignmentOp update_op, const Operand& update_value_name) override
3798 {
3799 OperandUnpacker operands(_data->tiles, _data->arguments);
3800 const IVectorTile *var = operands.unpack(var_name);
3801 const IVectorTile *cond_value = operands.unpack(cond_value_name);
3802 const IVectorTile *update_value = operands.unpack(update_value_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003803
3804 const int32_t dst_w = var->format().w;
3805 const int32_t dst_h = var->format().h;
3806
3807 // It must be a scalar variable
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01003808 CKW_UNUSED(dst_w, dst_h);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003809 assert(dst_w == 1);
3810 assert(dst_h == 1);
3811
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003812 _data->code += "for(; ";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003813 _data->code += var->scalar(0, 0).str;
3814 _data->code += " ";
3815 _data->code += to_string(cond_op);
3816 _data->code += " " + cond_value->scalar(0, 0).str + "; ";
3817 _data->code += var->scalar(0, 0).str;
3818 _data->code += " ";
3819 _data->code += to_string(update_op);
3820 _data->code += " " + update_value->scalar(0, 0).str + ")";
3821 _data->code += "\n";
3822 }
3823
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003824 void op_load_immediate(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x,
3825 const Operand &o_y, const Operand &o_z, const Operand &o_batch_idx,
3826 const Operand &dilation_y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003827 {
3828 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003829
3830 // Not const as it requires changes to 'load_writer'.
3831 IVectorTile *dst = operands.unpack(o_dst);
3832 IVectorTile *x = operands.unpack(o_x);
3833 IVectorTile *y = operands.unpack(o_y);
3834 IVectorTile *z = operands.unpack(o_z);
3835 IVectorTile *dil_y = operands.unpack(dilation_y);
3836 IVectorTile *b = operands.unpack(o_batch_idx);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003837
3838 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003839 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003840 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003841
3842 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3843
3844 auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
3845
3846 // Initialize the constant part
3847 load_writer->initialize(dst, x, z, b);
3848
3849 for(int i = 0; i < dst->format().h; ++i)
3850 {
3851 std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i);
3852 if(dil_y->scalar(0, 0).str != "1")
3853 {
3854 coord_y += " * " + dil_y->scalar(0, 0).str;
3855 }
3856 load_writer->write(std::make_pair(i, coord_y));
3857 }
3858
3859 load_writer->finalize();
3860 }
3861
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003862 void op_load_indirect(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x,
3863 const Operand &o_indirect_h, const Operand &o_z,
3864 const Operand &o_batch_idx) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003865 {
3866 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003867
3868 // Not const as it requires changes to 'load_writer'.
3869 IVectorTile *dst = operands.unpack(o_dst);
3870 IVectorTile *x = operands.unpack(o_x);
3871 IVectorTile *y_ind = operands.unpack(o_indirect_h);
3872 IVectorTile *z = operands.unpack(o_z);
3873 IVectorTile *b = operands.unpack(o_batch_idx);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003874
3875 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003876 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003877 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003878
3879 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3880
3881 auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
3882
3883 // Initialize the constant part
3884 load_writer->initialize(dst, x, z, b);
3885
3886 for(int i = 0; i < dst->format().h; ++i)
3887 {
3888 load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str));
3889 }
3890
3891 load_writer->finalize();
3892 }
3893
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003894 void op_store_immediate(const TensorOperand &tensor_name, const Operand &src_name, const Operand &x_name,
3895 const Operand &y_name, const Operand &z_name,
3896 const Operand &batch_index_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003897 {
3898 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003899
3900 // Not const as it requires changes to 'load_writer'.
3901 IVectorTile *src = operands.unpack(src_name);
3902 IVectorTile *x = operands.unpack(x_name);
3903 IVectorTile *y = operands.unpack(y_name);
3904 IVectorTile *z = operands.unpack(z_name);
3905 IVectorTile *b = operands.unpack(batch_index_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003906
3907 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003908 IGpuTensorArgument *tensor = tensor_operands.unpack(tensor_name);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003909 auto gpu_sampler = tensor_name.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003910
3911 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3912
3913 auto store_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Store);
3914
3915 // Initialize the constant part
3916 store_writer->initialize(src, x, z, b);
3917
3918 int32_t tile_h = src->format().h;
3919
3920 for(int m0 = tile_h - 1; m0 >= 0; m0--)
3921 {
3922 store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0)));
3923 }
3924
3925 store_writer->finalize();
3926 }
3927
3928 void op_return() override
3929 {
3930 _data->code += "return;\n";
3931 }
3932
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003933 void util_get_indirect_buffer(const Operand &o_dst, const TensorOperand &o_tensor, const Operand &o_x,
3934 const Operand &o_y, const Operand &o_x_off, const Operand &o_y_off) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003935 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003936 OperandUnpacker operands(_data->tiles, _data->arguments);
3937 const IVectorTile *dst = operands.unpack(o_dst);
3938 const IVectorTile *x = operands.unpack(o_x);
3939 const IVectorTile *y = operands.unpack(o_y);
3940 const IVectorTile *x_off = operands.unpack(o_x_off);
3941 const IVectorTile *y_off = operands.unpack(o_y_off);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003942
3943 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003944 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003945
3946 assert(dst->format().w == 1);
3947 assert(x->format().w == 1);
3948 assert(y->format().w == 1);
3949 assert(x_off->format().w == 1);
3950 assert(y_off->format().w == 1);
3951 assert(dst->format().dt == DataType::Int32);
3952 assert(x->format().dt == DataType::Int32);
3953 assert(y->format().dt == DataType::Int32);
3954 assert(x_off->format().dt == DataType::Int32);
3955 assert(y_off->format().dt == DataType::Int32);
3956
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01003957 const std::string width = tensor->component(TensorComponentType::Dim1);
3958 const std::string height = tensor->component(TensorComponentType::Dim2);
3959 const std::string wxh = tensor->component(TensorComponentType::Dim1xDim2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003960 /*
3961 int x_s;
3962 int y_s;
3963 x_s = (xi_0 + x_k);
3964 y_s = (yi_0 + y_k);
3965 mi_0 = x_s + y_s * width + b * widthxheight;
3966 mi_0 = select(-1, mi_0, x_s >= 0);
3967 mi_0 = select(-1, mi_0, y_s >= 0);
3968 mi_0 = select(-1, mi_0, x_s < 128);
3969 mi_0 = select(-1, mi_0, y_s < 128);
3970 */
3971 compound_statement_begin();
3972 declare_tile("_x_s", TileInfo(DataType::Int32));
3973 declare_tile("_y_s", TileInfo(DataType::Int32));
3974 auto x_s = operands.unpack(Operand("_x_s"));
3975 auto y_s = operands.unpack(Operand("_y_s"));
3976 for(int i = 0; i < dst->format().h; ++i)
3977 {
3978 // x_s = (xi_0 + x_k);
3979 // y_s = (yi_0 + y_k);
3980 _data->code += x_s->scalar(0, i).str;
3981 _data->code += " = (";
3982 _data->code += x->scalar(0, i).str;
3983 _data->code += " + ";
3984 _data->code += x_off->scalar(0, i).str;
3985 _data->code += ");\n";
3986 _data->code += y_s->scalar(0, i).str;
3987 _data->code += " = (";
3988 _data->code += y->scalar(0, i).str;
3989 _data->code += " + ";
3990 _data->code += y_off->scalar(0, i).str;
3991 _data->code += ");\n";
3992 // mi_0 = x_s + y_s * width;
3993 _data->code += dst->scalar(0, i).str;
3994 _data->code += " = ";
3995 _data->code += x_s->scalar(0, i).str;
3996 _data->code += " + ";
3997 _data->code += y_s->scalar(0, i).str;
3998 _data->code += " * " + width + ";\n";
3999 // mi_0 = select(wxh, mi_0, x_s >= 0);
4000 _data->code += dst->scalar(0, i).str;
4001 _data->code += " = select(-1, ";
4002 _data->code += dst->scalar(0, i).str;
4003 _data->code += ", ";
4004 _data->code += x_s->scalar(0, i).str;
4005 _data->code += " >= 0);\n";
4006 // mi_0 = select(wxh, mi_0, y_s >= 0);
4007 _data->code += dst->scalar(0, i).str;
4008 _data->code += " = select(-1, ";
4009 _data->code += dst->scalar(0, i).str;
4010 _data->code += ", ";
4011 _data->code += y_s->scalar(0, i).str;
4012 _data->code += " >= 0);\n";
4013 // mi_0 = select(wxh, mi_0, x_s < width);
4014 _data->code += dst->scalar(0, i).str;
4015 _data->code += " = select(-1, ";
4016 _data->code += dst->scalar(0, i).str;
4017 _data->code += ", ";
4018 _data->code += x_s->scalar(0, i).str;
4019 _data->code += " < ";
4020 _data->code += width + ");\n";
4021 // mi_0 = select(wxh, mi_0, y_s < height);
4022 _data->code += dst->scalar(0, i).str;
4023 _data->code += " = select(-1, ";
4024 _data->code += dst->scalar(0, i).str;
4025 _data->code += ", ";
4026 _data->code += y_s->scalar(0, i).str;
4027 _data->code += " < ";
4028 _data->code += height + ");\n";
4029 }
4030 compound_statement_end();
4031 }
4032
4033private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004034 GpuKernelWriterDataHolder *_data{ nullptr };
4035 GpuKernelWriterAttribute *_attr{ nullptr };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004036};
4037
4038/** IGpuKernelWriter factory class */
4039class GpuKernelWriterFactory final
4040{
4041public:
4042 /** Static method to call the IGpuKernelWriter class accordingly with the Gpu programming language
4043 *
4044 * @param[in] gpu GPU target
4045 *
4046 * @return IGpuKernelWriter
4047 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004048 static std::unique_ptr<IGpuKernelWriter>
4049 create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004050 {
4051 switch(x->programming_language())
4052 {
4053 case GpuTargetLanguage::OpenCL:
4054 return std::make_unique<ClKernelWriter>(attr, x);
4055 default:
4056 std::cout << "Unsupported Gpu programming language" << std::endl;
4057 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01004058 return nullptr;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004059 }
4060 }
4061};
4062
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004063inline int32_t
4064adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004065{
4066 auto tensor = tensor_info_id->shape;
4067
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004068 int32_t dim[3] = { 0 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004069
4070 switch(tensor_format)
4071 {
4072 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004073 dim[0] = tensor[0];
4074 dim[1] = tensor[1];
4075 dim[2] = tensor[2];
4076 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004077 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004078 dim[0] = tensor[0];
4079 dim[1] = tensor[1] * tensor[2];
4080 dim[2] = 1;
4081 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004082 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004083 std::cout << "Unsupported tensor format" << std::endl;
4084 assert(false);
4085 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004086 }
4087
4088 return std::min(step, dim[idx]);
4089}
4090
4091} // namespace prototype
4092} // namespace ckw
4093
Viet-Hoa Doce3c48c2023-07-03 13:44:43 +01004094#endif // CKW_PROTOTYPE_SRC_PROTOTYPE_H