blob: fdb4ab1babd15af386478bf2ad1759832ae7ec34 [file] [log] [blame]
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
Viet-Hoa Doce3c48c2023-07-03 13:44:43 +010025#ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H
26#define CKW_PROTOTYPE_SRC_PROTOTYPE_H
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010027
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010028#include <algorithm>
29#include <array>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010030#include <cassert> // assert (to be removed)
31#include <chrono>
32#include <cmath>
33#include <cstdint> // int32_t
34#include <iostream> // cout (to be removed)
35#include <map>
36#include <memory>
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010037#include <stdexcept>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010038#include <string>
39#include <unordered_map>
40#include <vector>
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010041
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010042#include "ckw/Error.h"
Nikolaj Jensenacea4072023-07-03 09:44:42 +010043#include "ckw/TensorInfo.h"
44#include "ckw/Types.h"
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010045
46namespace ckw
47{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010048namespace prototype
49{
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010050
51// Dummy data structure for Size2D
52using Size2D = std::vector<int32_t>;
53
54// Dummy Status
55using Status = void;
56
57enum class ComponentType : int32_t
58{
59 Complex = 0,
60 Simple = 1,
61 Unfusable = 2
62};
63
64enum class GpuCompilationSpeed
65{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010066 Fast = 0x00, // fast compilation may increase the latency of the network
67 Slow = 0x01 // slow compilation may decrease the latency of the network
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010068};
69
70enum class GpuExtensions
71{
72 Fp16,
73 Dot8,
74 Mmul,
75 FastMath
76};
77
78struct TensorInfo
79{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010080 TensorShape shape{ { 0 } };
81 DataType data_type{ DataType::Unknown };
82 TensorDataLayout data_layout{ TensorDataLayout::Nhwc };
83 int32_t id{ -1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010084};
85
86struct ComponentAttribute
87{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010088 GpuCompilationSpeed compilation_speed{ GpuCompilationSpeed::Fast };
89 bool overwrite_tile{ true };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010090};
91
92inline std::string data_type_to_cl_type(DataType dt)
93{
94 switch(dt)
95 {
96 case DataType::Fp32:
97 return "float";
98 case DataType::Fp16:
99 return "half";
100 case DataType::Int8:
101 return "char";
102 case DataType::Uint8:
103 return "uchar";
104 case DataType::Uint16:
105 return "ushort";
106 case DataType::Int16:
107 return "short";
108 case DataType::Uint32:
109 return "uint";
110 case DataType::Int32:
111 return "int";
112 case DataType::Bool:
113 return "bool";
114 default:
115 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100116 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100117 }
118}
119
120inline int32_t width_to_cl_vector_size(int32_t width)
121{
122 switch(width)
123 {
124 case 1:
125 return 1;
126 case 2:
127 return 2;
128 case 3:
129 return 3;
130 case 4:
131 return 4;
132 case 5:
133 case 6:
134 case 7:
135 case 8:
136 return 8;
137 case 9:
138 case 10:
139 case 11:
140 case 12:
141 case 13:
142 case 14:
143 case 15:
144 case 16:
145 return 16;
146 default:
147 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100148 return 0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100149 }
150}
151
152inline std::string get_cl_data_type(DataType dt, int32_t width)
153{
154 std::string data_type;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100155 int32_t w = width_to_cl_vector_size(width);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100156 data_type += data_type_to_cl_type(dt);
157 if(w != 1)
158 {
159 data_type += std::to_string(w);
160 }
161 return data_type;
162}
163
164inline std::string to_opencl_store(int32_t vector_length)
165{
166 if(vector_length != 1)
167 {
168 return "vstore" + std::to_string(vector_length) + "(";
169 }
170 else
171 {
172 return "*(";
173 }
174}
175
176struct TileInfo
177{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100178 TileInfo()
179 {
180 }
181
182 TileInfo(DataType dt)
183 : dt(dt), w(1), h(1)
184 {
185 }
186
187 TileInfo(DataType dt, int32_t width)
188 : dt(dt), w(width), h(1)
189 {
190 }
191
192 TileInfo(DataType dt, int32_t width, int32_t height)
193 : dt(dt), w(width), h(height)
194 {
195 }
196
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100197 DataType dt{ DataType::Unknown }; // Data type of the tile
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100198 int32_t w{ 0 }; // Width (i.e. c0 - portion of the channels)
199 int32_t h{ 0 }; // Height (i.e. s0 - portion of the spatial dimensions)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100200};
201
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100202inline std::ostream &operator<<(std::ostream &o, const TileInfo &a)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100203{
204 o << a.w << " x " << a.h;
205 return o;
206}
207
208struct DataTypeAsString
209{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100210 std::string str{ "" };
211 DataType dt{ DataType::Unknown };
212 int32_t size{ 1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100213};
214
215struct ValueAsString
216{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100217 std::string str{ "" };
218 DataTypeAsString type{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100219};
220
221// https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c
222// A Tile is a collection of variables used to express a 2D data.
223class IScalarTile
224{
225public:
226 virtual ~IScalarTile() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100227
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100228 /** Method to get the scalar variable from a tile
229 * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
230 * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
231 *
232 * @return the scalar variable as a string
233 */
234 virtual ValueAsString scalar(int32_t x, int32_t y) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100235
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100236 /** Method to get the list of underlying variable names used by the tile
237 *
238 * @return the list of variable names
239 */
240 virtual std::vector<ValueAsString> underlying_source_variables() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100241
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100242 /** Method to get the name of the tile.
243 *
244 * @return the name of the tile
245 */
246 std::string name() const
247 {
248 return _basename;
249 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100250
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100251 /** Method to get the tile format
252 *
253 * @return the format
254 */
255 TileInfo format() const
256 {
257 return _format;
258 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100259
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100260 /** Method to know whether the tile is assignable or not (constant)
261 *
262 * @return true if the tile is assignable
263 */
264 virtual bool is_assignable() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100265
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100266 /** Method to know whether the tile needs to be declared
267 *
268 * @return true if the tile needs to be declared in the code before being used
269 */
270 virtual bool need_declaration() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100271
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100272protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100273 TileInfo _format{}; // Tile format
274 std::string _basename{ "" }; // Tile name
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100275};
276
277// A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context.
278// The vector size is given by the width of the tile. The number of vectors height by depth defines the number of vectors
279class IVectorTile : public IScalarTile
280{
281public:
282 virtual ~IVectorTile() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100283
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100284 /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
285 * The user can query the list of supported width for the vectors through preferred_vector_sizes().
286 *
287 * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
288 *
289 * @return the vector variable as a string
290 */
291 virtual ValueAsString vector(int32_t y) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100292
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100293 /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
294 *
295 * @return the vector variable as a string
296 */
297 virtual ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const = 0;
298 /** Method to get the preferred vector sizes.
299 *
300 * @return a vector with the preferred vector sizes
301 */
302 //virtual std::vector<int32_t> preferred_vector_sizes() const = 0;
303};
304
305class ClTile : public IVectorTile
306{
307public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100308 ClTile(const std::string &name, TileInfo format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100309 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100310 _format = format;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100311 _basename = name;
312 }
313
314 ValueAsString scalar(int32_t x, int32_t y) const override
315 {
316 x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
317 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
318
319 ValueAsString t;
320 t.str = build_variable_name(y);
321 t.type.str = get_cl_data_type(_format.dt, 1);
322 t.type.dt = _format.dt;
323 t.type.size = 1;
324
325 // Check required because if the width has only one element, we cannot use .s0
326 if(_format.w != 1)
327 {
328 // Automatic broadcasting
329 t.str += ".s" + std::to_string(x);
330 }
331
332 return t;
333 }
334
335 ValueAsString vector(int32_t y) const override
336 {
337 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
338
339 ValueAsString t;
340 t.str = build_variable_name(y);
341 t.type.str = get_cl_data_type(_format.dt, _format.w);
342 t.type.dt = _format.dt;
343 t.type.size = _format.w;
344 return t;
345 }
346
347 ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
348 {
349 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
350
351 ValueAsString t;
352 t.str = build_variable_name(y);
353 t.type.str = get_cl_data_type(_format.dt, width);
354 t.type.dt = _format.dt;
355 t.type.size = width;
356
357 if(_format.w != 1)
358 {
359 t.str += ".s";
360 for(int i = 0; i < width; ++i)
361 {
362 t.str += to_scalar_hex(x_start + i);
363 }
364 }
365 return t;
366 }
367
368 std::vector<ValueAsString> underlying_source_variables() const override
369 {
370 std::vector<ValueAsString> vars;
371 for(int32_t y = 0; y < _format.h; ++y)
372 {
373 ValueAsString t;
374 t.str = build_variable_name(y);
375 t.type.str = get_cl_data_type(_format.dt, _format.w);
376 t.type.dt = _format.dt;
377 t.type.size = _format.w;
378 vars.push_back(t);
379 }
380 return vars;
381 }
382
383 bool is_assignable() const override
384 {
385 return true;
386 }
387
388 bool need_declaration() const override
389 {
390 return true;
391 }
392
393private:
394 std::string build_variable_name(int32_t y) const
395 {
396 std::string var_name = _basename;
397
398 if(_format.h == 1)
399 {
400 return var_name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100401 }
402 else
403 {
404 var_name += "_";
405 var_name += std::to_string(y);
406 }
407
408 return var_name;
409 }
410
411 std::string to_scalar_hex(int32_t x) const
412 {
413 switch(x)
414 {
415 case 0:
416 case 1:
417 case 2:
418 case 3:
419 case 4:
420 case 5:
421 case 6:
422 case 7:
423 case 8:
424 case 9:
425 return std::to_string(x);
426 case 10:
427 return "A";
428 case 11:
429 return "B";
430 case 12:
431 return "C";
432 case 13:
433 return "D";
434 case 14:
435 return "E";
436 case 15:
437 return "F";
438 default:
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100439 std::cout << "Unsupported hexadecimal value" << std::endl;
440 assert(false);
441 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100442 }
443 }
444};
445
446// Unique features: It contains values in the form of string. The name used for this object is misleading since the variables can change the value over time.
447class ClConstantTile : public IVectorTile
448{
449public:
450 ClConstantTile(const std::vector<std::vector<std::string>> &in, DataType dt)
451 {
452 _format.w = in[0].size();
453 _format.h = in.size();
454 _format.dt = dt;
455
456 _data = std::vector<std::vector<std::string>>(_format.h, std::vector<std::string>(_format.w));
457
458 for(int32_t y = 0; y < _format.h; ++y)
459 {
460 for(int32_t x = 0; x < _format.w; ++x)
461 {
462 _data[y][x] = in[y][x];
463 }
464 }
465 }
466
467 ValueAsString scalar(int32_t x, int32_t y) const override
468 {
469 x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
470 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
471
472 ValueAsString t;
473 t.str = _data[y][x];
474 t.type.str = get_cl_data_type(_format.dt, 1);
475 t.type.dt = _format.dt;
476 t.type.size = 1;
477
478 return t;
479 }
480
481 ValueAsString vector(int32_t y) const override
482 {
483 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
484
485 return vector(0, _format.w, y);
486 }
487
488 ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
489 {
490 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
491
492 ValueAsString t;
493 t.str = "";
494 t.type.str = get_cl_data_type(_format.dt, width);
495 t.type.dt = _format.dt;
496 t.type.size = width;
497
498 if(width > 1)
499 {
500 t.str += "((" + get_cl_data_type(_format.dt, width) + ")(";
501 }
502
503 int32_t x = x_start;
504 for(; x < width - 1; ++x)
505 {
506 t.str += scalar(x, y).str;
507 t.str += ", ";
508 }
509 t.str += scalar(x, y).str;
510
511 if(width > 1)
512 {
513 t.str += "))";
514 }
515
516 return t;
517 }
518
519 std::vector<ValueAsString> underlying_source_variables() const override
520 {
521 std::vector<ValueAsString> vars;
522
523 for(int32_t y = 0; y < _format.h; ++y)
524 {
525 for(int32_t x = 0; x < _format.w; ++x)
526 {
527 ValueAsString t;
528 t.str = _data[y][x];
529 t.type.str = get_cl_data_type(_format.dt, 1);
530 t.type.dt = _format.dt;
531 t.type.size = 1;
532 vars.push_back(t);
533 }
534 }
535
536 return vars;
537 }
538
539 bool is_assignable() const override
540 {
541 return false;
542 }
543
544 bool need_declaration() const override
545 {
546 return false;
547 }
548
549private:
550 std::vector<std::vector<std::string>> _data{};
551};
552
553enum class TensorComponentIndex : int32_t
554{
555 IndexMask = 0x0000000f,
556};
557
558enum class TensorComponentType : int32_t
559{
560 OffsetFirstElement = 0x00000100,
561 Stride = 0x00001000,
562 Dimension = 0x00010000,
563 FoldedDimension = 0x00100000,
564 Constant = 0x01000000
565};
566
567enum class TensorComponent : int32_t
568{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100569 Unknown = 0x00000000,
570 OffsetFirstElement = 0x00000100,
571 Stride1 = 0x00001001,
572 Stride2 = 0x00001002,
573 Stride3 = 0x00001003,
574 Stride4 = 0x00001004,
575 Dim0 = 0x00010000,
576 Dim1 = 0x00010001,
577 Dim2 = 0x00010002,
578 Dim3 = 0x00010003,
579 Dim4 = 0x00010004,
580 C = 0x00010000, // Dim0
581 W = 0x00010001, // Dim1
582 H = 0x00010002, // Dim2
583 D = 0x00010003,
584 N = 0x00010004,
585 Dim1xDim2 = 0x00100021,
586 Dim1xDim2xDim3 = 0x00100321,
587 WxH = 0x00100021,
588 WxHxD = 0x00100321
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100589};
590
591inline std::string to_string(TensorComponent x)
592{
593 switch(x)
594 {
595 case TensorComponent::Unknown:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100596 return "Unknown";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100597 case TensorComponent::OffsetFirstElement:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100598 return "OffsetFirstElement";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100599 case TensorComponent::Stride1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100600 return "Stride1";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100601 case TensorComponent::Stride2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100602 return "Stride2";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100603 case TensorComponent::Stride3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100604 return "Stride3";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100605 case TensorComponent::Stride4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100606 return "Stride4";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100607 case TensorComponent::Dim0:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100608 return "Dim0";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100609 case TensorComponent::Dim1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100610 return "Dim1";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100611 case TensorComponent::Dim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100612 return "Dim2";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100613 case TensorComponent::Dim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100614 return "Dim3";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100615 case TensorComponent::Dim4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100616 return "Dim4";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100617 case TensorComponent::Dim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100618 return "Dim1xDim2";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100619 case TensorComponent::Dim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100620 return "Dim1xDim2xDim3";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100621 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100622 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100623 }
624}
625
626class ITensorArgument
627{
628public:
629 virtual ~ITensorArgument() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100630
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100631 /** Method to get the tensor component as a string
632 *
633 * @param[in] x tensor component to query
634 *
635 * @return the tensor component as a string
636 */
637 virtual std::string component(TensorComponent x) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100638
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100639 /** Method to get the tensor component type declaration as a string
640 *
641 * @return the tensor component type declaration as a string
642 */
643 virtual std::string component_type_declaration() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100644
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100645 /** Method to get the tensor component data type
646 *
647 * @return the tensor component data type
648 */
649 virtual DataType component_data_type() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100650
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100651 /** Method to get the tensor component declarations
652 *
653 * @return a vector containing the tensor component declarations
654 */
655 virtual std::vector<TensorComponent> component_declarations() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100656
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100657 /** Method to get the name of the tensor argument.
658 *
659 * @return the name of the tensor argument
660 */
661 std::string name() const
662 {
663 return _basename;
664 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100665
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100666 /** Method to get the tensor format
667 *
668 * @return the format
669 */
670 TensorInfo format() const
671 {
672 return _format;
673 }
674
675protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100676 TensorInfo _format{};
677 std::string _basename{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100678};
679
680enum class GpuTensorStorage : int32_t
681{
682 Unknown = 0x0000,
683 BufferUint8Ptr = 0x0012,
684 Image2dReadOnly = 0x0020,
685 Image2dWriteOnly = 0x0021,
686 Image3dReadOnly = 0x0030,
687 Image3dWriteOnly = 0x0031
688};
689
690class IGpuTensorArgument : public ITensorArgument
691{
692public:
693 virtual ~IGpuTensorArgument() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100694
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100695 /** Method to get the tensor storage, which is the underlying storage used to keep the data memory
696 *
697 * @param[in] x tensor storage to query
698 *
699 * @return the tensor storage as a string
700 */
701 virtual std::string storage(GpuTensorStorage x) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100702
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100703 /** Method to get the tensor storage type declaration as a string
704 *
705 * @param[in] x tensor component to query
706 *
707 * @return the tensor storage type declaration as a string
708 */
709 virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100710
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100711 /** Method to get the tensor storage declarations
712 *
713 * @return a vector containing the tensor storage declarations
714 */
715 virtual std::vector<GpuTensorStorage> storage_declarations() const = 0;
716};
717
718class ClTensorArgument : public IGpuTensorArgument
719{
720public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100721 ClTensorArgument(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100722 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100723 _basename = name;
724 _format = x;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100725 _return_by_value_when_possible = return_by_value_when_possible;
726 }
727
728 // Methods to override
729 std::string component(TensorComponent x) override
730 {
731 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentType::Constant)))
732 {
733 int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
734 return std::to_string(idx - 1);
735 }
736
737 if(_return_by_value_when_possible)
738 {
739 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentType::Dimension)))
740 {
741 int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
742 return std::to_string(_format.shape[idx]);
743 }
744
745 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentType::FoldedDimension)))
746 {
747 switch(x)
748 {
749 case TensorComponent::Dim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100750 return std::to_string(_format.shape[1] * _format.shape[2]);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100751 case TensorComponent::Dim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100752 return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100753 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100754 std::cout << "Unsupported folded dimension" << std::endl;
755 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100756 }
757 }
758 }
759
760 if(std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end())
761 {
762 _components_required.push_back(x);
763 }
764
765 return build_component_name(x);
766 }
767
768 std::string component_type_declaration() const override
769 {
770 return "int";
771 };
772
773 DataType component_data_type() const override
774 {
775 return DataType::Int32;
776 }
777
778 std::string storage(GpuTensorStorage x) override
779 {
780 if(std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end())
781 {
782 _storage_required.push_back(x);
783 }
784
785 return build_storage_name(x);
786 }
787
788 std::string storage_type_declaration(GpuTensorStorage x) const override
789 {
790 switch(x)
791 {
792 case GpuTensorStorage::BufferUint8Ptr:
793 return "__global uchar*";
794 case GpuTensorStorage::Image2dReadOnly:
795 return "__read_only image2d_t";
796 case GpuTensorStorage::Image2dWriteOnly:
797 return "__write_only image2d_t";
798 case GpuTensorStorage::Image3dReadOnly:
799 return "__read_only image3d_t ";
800 case GpuTensorStorage::Image3dWriteOnly:
801 return "__write_only image3d_t ";
802 default:
803 std::cout << "Unsupported storage" << std::endl;
804 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100805 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100806 }
807 };
808
809 std::vector<GpuTensorStorage> storage_declarations() const override
810 {
811 return _storage_required;
812 }
813
814 std::vector<TensorComponent> component_declarations() const override
815 {
816 return _components_required;
817 }
818
819private:
820 std::string build_storage_name(GpuTensorStorage x) const
821 {
822 std::string var_name = _basename;
823
824 switch(x)
825 {
826 case GpuTensorStorage::BufferUint8Ptr:
827 return var_name + "_ptr";
828 case GpuTensorStorage::Image2dReadOnly:
829 case GpuTensorStorage::Image2dWriteOnly:
830 return var_name + "_img2d";
831 case GpuTensorStorage::Image3dReadOnly:
832 case GpuTensorStorage::Image3dWriteOnly:
833 return var_name + "_img3d";
834 default:
835 std::cout << "Unsupported storage" << std::endl;
836 assert(false);
837 }
838
839 return var_name;
840 }
841
842 std::string build_component_name(TensorComponent x) const
843 {
844 std::string var_name = _basename;
845
846 switch(x)
847 {
848 case TensorComponent::OffsetFirstElement:
849 return var_name + "_offset_first_element";
850 case TensorComponent::Stride1:
851 return var_name + "_stride1";
852 case TensorComponent::Stride2:
853 return var_name + "_stride2";
854 case TensorComponent::Stride3:
855 return var_name + "_stride3";
856 case TensorComponent::Dim0:
857 return var_name + "_dim0";
858 case TensorComponent::Dim1:
859 return var_name + "_dim1";
860 case TensorComponent::Dim2:
861 return var_name + "_dim2";
862 case TensorComponent::Dim3:
863 return var_name + "_dim3";
864 case TensorComponent::Dim1xDim2:
865 return var_name + "_dim1xdim2";
866 case TensorComponent::Dim1xDim2xDim3:
867 return var_name + "_dim1xdim2xdim3";
868 default:
869 std::cout << "Unsupported component" << std::endl;
870 assert(false);
871 }
872
873 return var_name;
874 }
875
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100876 bool _return_by_value_when_possible{ false };
877 std::vector<GpuTensorStorage> _storage_required{};
878 std::vector<TensorComponent> _components_required{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100879};
880
881/**
882 * @brief Data structure that contains the declared tiles by the components.
883 * The registry is a linear data structure that follows the similar principle of the stack. The user can use the @p increment_registry_level() method to
884 * increase the level of the stack (0 when it starts). When the user uses the @p decrement_registry_level() method, the registry decreases the level of the stack
885 * and remove (pop) all the tiles from the level above.
886 * When a tile is declared on the level 0, it is a global tile. A global tile is visible in all parts of the code.
887 * Since different components may use the same name to define a tile, the registry adopts the IdSpace concept, an @p id to prevent name collisions
888 * when declaring tiles among different components.
889 *
890 */
891class GpuTileRegistry
892{
893public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100894 enum class RegistryTileType
895 {
896 Tile,
897 Link
898 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100899
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100900 using RegistryIdSpace = int32_t;
901 using RegistryLevel = int32_t;
902 using RegistryTileName = std::string;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100903
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100904 struct RegistryTileTableEntry
905 {
906 RegistryLevel registry_level{ 0 };
907 std::unique_ptr<IVectorTile> tile_object{ nullptr };
908 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100909
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100910 struct RegistryTileTypeTableEntry
911 {
912 RegistryTileType tile_type{ RegistryTileType::Tile };
913 RegistryTileName tile_name{};
914 RegistryIdSpace registry_idspace{ 0 };
915 RegistryLevel registry_level{ 0 };
916 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100917
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100918 using RegistryTileTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTableEntry>>;
919 using RegistryTileTypeTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTypeTableEntry>>;
920
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100921 /**
922 * @brief Construct a new Gpu Tile Registry object
923 *
924 */
925 GpuTileRegistry()
926 {
927 _language = GpuTargetLanguage::Unknown;
928 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100929
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100930 /**
931 * @brief Construct a new Gpu Tile Registry object providing the Gpu programming language
932 *
933 * @param[in] language Gpu programming language to use
934 */
935 GpuTileRegistry(GpuTargetLanguage language)
936 {
937 _language = language;
938 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100939
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100940 /**
941 * @brief Default destructor. Destroy the Gpu Tile Registry object
942 *
943 */
944 ~GpuTileRegistry() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100945
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100946 /**
947 * @brief Set the working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles.
948 * Therefore, the IdSpace should be set before declaring any tiles.
949 *
950 * @param[in] id The IdSpace id
951 */
952 void set_IdSpace(int32_t id)
953 {
954 _IdSpace = id;
955 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100956
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100957 /**
958 * @brief Get the current working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles
959 *
960 * @return The IdSpace id
961 */
962 int32_t IdSpace() const
963 {
964 return _IdSpace;
965 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100966
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100967 /**
968 * @brief Gets all the IdSpace declarations defined in the tile registry.
969 *
970 * @return all the IdSpace declarations defined in the tile registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
971 */
972 std::vector<int32_t> IdSpace_declarations() const
973 {
974 std::vector<int32_t> x;
975
976 auto it = _frags.begin();
977
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100978 while(it != _frags.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100979 {
980 x.push_back(it->first);
981
982 it++;
983 }
984
985 return x;
986 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100987
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100988 /**
989 * @brief Declare a tile from a previously created tile
990 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100991 void insert(const std::string &name, const IVectorTile *frag)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100992 {
993 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100994 const int32_t key_IdSpace = _IdSpace;
995 const std::string key_var_name = name;
996 const std::string var_name = frag->name();
997 TileInfo format = frag->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100998
999 // First check whether a tile with the same name exists
1000 IVectorTile *result = (*this)[key_var_name];
1001 assert(result == nullptr);
1002 if(result == nullptr)
1003 {
1004 std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
1005
1006 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1007 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1008
1009 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Link;
1010 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1011 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1012 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1013 }
1014 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001015
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001016 /**
1017 * @brief Declare a tile with TileInfo. The tile will be stored in the IdSpace set with @p set_IdSpace()
1018 *
1019 * @note The reference name used for declaring the tile should not be previously used in the IdSpace
1020 *
1021 * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
1022 * @param[in] format Tile format use to use
1023 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001024 void insert(const std::string &name, const TileInfo &format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001025 {
1026 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001027 const int32_t key_IdSpace = _IdSpace;
1028 const std::string key_var_name = name;
1029 const std::string var_name = generate_tile_name(name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001030
1031 // First check whether a tile with the same name exists
1032 IVectorTile *result = (*this)[key_var_name];
1033 assert(result == nullptr);
1034 if(result == nullptr)
1035 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001036 std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001037 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1038 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1039
1040 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1041 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1042 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1043 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1044 }
1045 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001046
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001047 /**
1048 * @brief Declare a constant tile. The content of the tile is passed as a vector of std::string
1049 *
1050 * @note The reference name used for declaring the tile should not be previously used in the IdSpace
1051 *
1052 * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
1053 * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
1054 * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user's responsibilty to ensure
1055 * that the data type is aligned with the content of the std::string.
1056 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001057 void insert(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001058 {
1059 assert(_language == GpuTargetLanguage::OpenCL);
1060 const int32_t key_IdSpace = _IdSpace;
1061 const std::string key_var_name = name;
1062
1063 // First check whether a tile with the same name exists
1064 IVectorTile *result = (*this)[key_var_name];
1065 assert(result == nullptr);
1066 if(result == nullptr)
1067 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001068 std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001069 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1070 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1071
1072 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1073 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1074 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1075 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1076 }
1077 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001078
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001079 /**
1080 * @brief Declare an anonymous constant tile. The content of the tile is passed as a vector of std::string
1081 *
1082 * @note This method can be used to declare temporary tiles that need to be accessed only once.
1083 *
1084 * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
1085 * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user responsibilty to ensure
1086 * that the data type is aligned with what passed with the std::string.
1087 *
1088 * @return IVectorTile* the anonymous constant tile
1089 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001090 IVectorTile *insert(const std::vector<std::vector<std::string>> &in, DataType dt)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001091 {
1092 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001093 const int32_t key_IdSpace = _IdSpace;
1094 const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001095
1096 // First check whether a tile with the same name exists
1097 IVectorTile *result = (*this)[key_var_name];
1098 assert(result == nullptr);
1099 if(result == nullptr)
1100 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001101 std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001102 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1103 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1104
1105 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1106 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1107 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1108 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1109 }
1110
1111 return (*this)[key_var_name];
1112 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001113
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001114 /**
1115 * @brief Get the tile from the registry. This method searches the tile in the IdSpace provided by the user
1116 *
1117 * @param[in] name The name of the tile to retrieve
1118 * @param[in] IdSpace The IdSpace id where to search the tile
1119 *
1120 * @return IVectorTile* The tile
1121 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001122 IVectorTile *get(const std::string &name, int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001123 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001124 const int32_t key_IdSpace = IdSpace;
1125 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001126
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001127 IVectorTile *result = nullptr;
1128 auto search_IdSpace = _frags.find(key_IdSpace);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001129 if(search_IdSpace != _frags.end())
1130 {
1131 auto search_tile = _frags[key_IdSpace].find(key_var_name);
1132 if(search_tile != _frags[key_IdSpace].end())
1133 {
1134 result = search_tile->second.tile_object.get();
1135 assert(result != nullptr);
1136 }
1137 }
1138
1139 return result;
1140 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001141
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001142 /**
1143 * @brief Get the tile from the registry. This method searches the tile in the IdSpace set with @p set_IdSpace()
1144 *
1145 * @param[in] name The name of the tile to retrieve
1146 *
1147 * @return IVectorTile* The tile
1148 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001149 IVectorTile *operator[](const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001150 {
1151 return get(name, _IdSpace);
1152 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001153
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001154 /**
1155 * @brief Check whether the tile in the in the IdSpace provided by the user exists
1156 *
1157 * @param[in] name Name of the tile to search for
1158 * @param[in] IdSpace The IdSpace id where to search the tile
1159 *
1160 * @return true if the tile exists
1161 * @return false if the tile does not exist
1162 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001163 bool has_tile(const std::string &name, int32_t IdSpace) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001164 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001165 const int32_t key_IdSpace = IdSpace;
1166 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001167
1168 // IVectorTile* result = nullptr;
1169 auto search_IdSpace = _frags.find(key_IdSpace);
1170
1171 return search_IdSpace != _frags.end();
1172 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001173
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001174 /**
1175 * @brief Check whether the tile within the current IdSpace exists
1176 *
1177 * @param[in] name Name of the tile to search for
1178 *
1179 * @return true if the tile exists
1180 * @return false if the tile does not exist
1181 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001182 bool has_tile(const std::string &name) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001183 {
1184 return has_tile(name, _IdSpace);
1185 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001186
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001187 /**
1188 * @brief Get all the tiles declared within the IdSpace provided by the user
1189 *
1190 * @param[in] IdSpace IdSpace where to retrieve all the declared tiles
1191 *
1192 * @return std::vector<IVectorTile*> A vector with all the declared tiles in the IdSpace provided by the user
1193 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001194 std::vector<IVectorTile *> tile_declarations(int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001195 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001196 std::vector<IVectorTile *> tiles;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001197
1198 std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it = _frag_types[IdSpace].begin();
1199
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001200 while(it != _frag_types[IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001201 {
1202 // The following line should be enabled. However, we cannot at this stage
1203 // because it used to retrieve the output tile produced by each component.
1204 // However, this method should NOT be used to retrieve the output tile
1205 //if(it->second.tile_type == RegistryTileType::Tile)
1206 {
1207 tiles.push_back(get(it->second.tile_name, it->second.registry_idspace));
1208 }
1209 it++;
1210 }
1211
1212 return tiles;
1213 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001214
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001215 /**
1216 * @brief Increase the level of stack.
1217 *
1218 */
1219 void increment_registry_level()
1220 {
1221 _registry_level++;
1222 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001223
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001224 /**
1225 * @brief Remove all the tiles declared at the current stack level and decrease the level of the stack.
1226 *
1227 */
1228 void decrement_registry_level()
1229 {
1230 assert(_registry_level >= 0);
1231
1232 // Remove all variables in the local scope
1233 std::map<RegistryTileName, RegistryTileTableEntry>::iterator it = _frags[_IdSpace].begin();
1234
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001235 while(it != _frags[_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001236 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001237 if(it->second.registry_level == _registry_level)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001238 {
1239 it = _frags[_IdSpace].erase(it);
1240 }
1241 else
1242 {
1243 it++;
1244 }
1245 }
1246
1247 std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it_type = _frag_types[_IdSpace].begin();
1248
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001249 while(it_type != _frag_types[_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001250 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001251 if(it_type->second.registry_level == _registry_level)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001252 {
1253 it_type = _frag_types[_IdSpace].erase(it_type);
1254 }
1255 else
1256 {
1257 it_type++;
1258 }
1259 }
1260
1261 _registry_level--;
1262 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001263
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001264 /**
1265 * @brief Get the level of the stack
1266 *
1267 */
1268 int32_t level() const
1269 {
1270 return _registry_level;
1271 }
1272
1273private:
1274 // This method ensures that the key is unique among different components
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001275 std::string generate_tile_name(const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001276 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001277 assert(_IdSpace >= 0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001278 if(_registry_level == 0)
1279 {
1280 return "_G" + std::to_string(_IdSpace) + "_" + name;
1281 }
1282 else
1283 {
1284 return name;
1285 }
1286 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001287
1288 RegistryTileTable _frags{};
1289 RegistryTileTypeTable _frag_types{};
1290 RegistryLevel _registry_level{ 0 };
1291 RegistryIdSpace _IdSpace{ -1 };
1292 int32_t _anonymous_frag_count{ 0 }; // Counter used to create the anonymous tiles
1293 GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001294};
1295
1296using TensorEntry = std::unique_ptr<IGpuTensorArgument>;
1297
1298/**
1299 * @brief Data structure that contains the tensors consumed by the components.
1300 * Since different components may use the same name as reference for a tensor, the registry adopts the IdSpace concept, an @p id to prevent name collisions
1301 * when declaring tensors among different components.
1302 *
1303 */
1304class GpuTensorArgumentRegistry
1305{
1306public:
1307 /**
1308 * @brief Construct a new Gpu Tensor Registry object
1309 *
1310 */
1311 GpuTensorArgumentRegistry()
1312 {
1313 _language = GpuTargetLanguage::Unknown;
1314 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001315
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001316 /**
1317 * @brief Construct a new Gpu Tensor Registry object
1318 *
1319 * @param[in] language Gpu programming language to use
1320 */
1321 GpuTensorArgumentRegistry(GpuTargetLanguage language)
1322 {
1323 _language = language;
1324 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001325
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001326 /**
1327 * @brief Default destructor. Destroy the Gpu Tensor Registry object
1328 *
1329 */
1330 ~GpuTensorArgumentRegistry() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001331
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001332 /**
1333 * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors.
1334 * Therefore, the IdSpace should be set before declaring any tensors.
1335 *
1336 * @param[in] id The IdSpace id
1337 */
1338 void set_IdSpace(int32_t id)
1339 {
1340 _IdSpace = id;
1341 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001342
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001343 /**
1344 * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors
1345 *
1346 * @return The IdSpace id
1347 */
1348 int32_t IdSpace() const
1349 {
1350 return _IdSpace;
1351 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001352
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001353 /**
1354 * @brief Gets all the IdSpace declarations defined in the tensor registry.
1355 *
1356 * @return all the IdSpace declarations defined in the tensor registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
1357 */
1358 std::vector<int32_t> IdSpace_declarations() const
1359 {
1360 std::vector<int32_t> x;
1361
1362 auto it = _refs.begin();
1363
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001364 while(it != _refs.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001365 {
1366 x.push_back(it->first);
1367
1368 it++;
1369 }
1370
1371 return x;
1372 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001373
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001374 /**
1375 * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace()
1376 *
1377 * @note The reference name used for declaring the tensor should not be previously used in the IdSpace
1378 *
1379 * @param[in] name Reference name for the tensor. The reference name can be used to retrieve the tensor stored in the registry.
1380 * @param[in] x Pair of tensor info and tensor id
1381 * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components
1382 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001383 void insert(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001384 {
1385 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001386 const int32_t key_IdSpace = _IdSpace;
1387 const int32_t tensor_id = x.id;
1388 const std::string key_var_name = name;
1389 const std::string var_name = generate_tensor_name(name, tensor_id);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001390
1391 // First, check whether the tensor has already a reference. If so, trigger an assert
1392 assert(!has_tensor_argument(name));
1393
1394 // Check whether a tensor with that tensorID exists
1395 auto result = _tensor_arguments.find(tensor_id);
1396 if(result == _tensor_arguments.end())
1397 {
1398 // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001399 std::unique_ptr<ClTensorArgument> arg = std::make_unique<ClTensorArgument>(var_name, x,
1400 return_by_value_when_possible);
1401 _tensor_arguments[tensor_id] = std::move(arg);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001402 }
1403
1404 _refs[key_IdSpace][key_var_name] = tensor_id;
1405 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001406
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001407 /**
1408 * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace()
1409 *
1410 * @param[in] name The name of the tensor to retrieve
1411 *
1412 * @return IGpuTensor* The tensor
1413 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001414 IGpuTensorArgument *operator[](const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001415 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001416 const int32_t key_IdSpace = _IdSpace;
1417 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001418
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001419 IGpuTensorArgument *result = nullptr;
1420 auto search_IdSpace = _refs.find(key_IdSpace);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001421 if(search_IdSpace != _refs.end())
1422 {
1423 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1424
1425 if(search_tensor_id != _refs[key_IdSpace].end())
1426 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001427 const int32_t tensor_id = search_tensor_id->second;
1428 auto search_tensor_argument = _tensor_arguments.find(tensor_id);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001429 if(search_tensor_argument != _tensor_arguments.end())
1430 {
1431 result = search_tensor_argument->second.get();
1432 }
1433 assert(result != nullptr);
1434 }
1435 }
1436
1437 return result;
1438 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001439
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001440 /**
1441 * @brief Get all the tensors declared in the IdSpace provided by the user
1442 *
1443 * @return std::vector<IGpuTensorArgument*> A vector with all the declared tensors
1444 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001445 std::vector<IGpuTensorArgument *> tensor_argument_declarations()
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001446 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001447 std::vector<IGpuTensorArgument *> args;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001448
1449 auto it = _tensor_arguments.begin();
1450
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001451 while(it != _tensor_arguments.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001452 {
1453 args.push_back(it->second.get());
1454 it++;
1455 }
1456
1457 return args;
1458 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001459
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001460 /**
1461 * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists
1462 *
1463 * @param[in] name Name of the tensor argument to search for
1464 *
1465 * @return true if the tensor argument exists
1466 * @return false if the tensor argument does not exist
1467 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001468 bool has_tensor_argument(const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001469 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001470 const int32_t key_IdSpace = _IdSpace;
1471 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001472
1473 auto search_IdSpace = _refs.find(key_IdSpace);
1474
1475 if(search_IdSpace != _refs.end())
1476 {
1477 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1478
1479 return search_tensor_id != _refs[key_IdSpace].end();
1480 }
1481 else
1482 {
1483 return false;
1484 }
1485 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001486
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001487 /**
1488 * @brief Check whether the tensor argument is in the the IdSpace provided by the user
1489 *
1490 * @param[in] name Name of the tensor argument to search for
1491 * @param[in] IdSpace The IdSpace id where to search the tensor argument
1492 *
1493 * @return true if the tile exists
1494 * @return false if the tile does not exist
1495 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001496 bool has_tensor_argument(const std::string &name, int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001497 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001498 const int32_t key_IdSpace = IdSpace;
1499 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001500
1501 auto search_IdSpace = _refs.find(key_IdSpace);
1502
1503 if(search_IdSpace != _refs.end())
1504 {
1505 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1506
1507 return search_tensor_id != _refs[key_IdSpace].end();
1508 }
1509 else
1510 {
1511 return false;
1512 }
1513 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001514
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001515private:
1516 // This method ensures that the key is unique among different components
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001517 std::string generate_tensor_name(const std::string &name, int32_t tensor_id)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001518 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001519 assert(tensor_id >= 0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001520
1521 return name + std::to_string(tensor_id);
1522 }
1523
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001524 std::map<int32_t, TensorEntry> _tensor_arguments{};
1525 std::map<int32_t, std::map<std::string, int32_t>> _refs{};
1526 int32_t _IdSpace{ -1 };
1527 GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001528};
1529
1530enum class OpType : int32_t
1531{
1532 Elementwise = 0x0000,
1533 Relational = 0x1000,
1534 Algebra = 0x2000
1535};
1536
1537inline std::string to_string(AssignmentOp op)
1538{
1539 switch(op)
1540 {
1541 case AssignmentOp::Decrement:
1542 return "-=";
1543 case AssignmentOp::Increment:
1544 return "+=";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001545 default:
1546 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001547 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001548 }
1549}
1550
1551inline std::string to_string(BinaryOp op)
1552{
1553 switch(op)
1554 {
1555 case BinaryOp::Add:
1556 return "+";
1557 case BinaryOp::Sub:
1558 return "-";
1559 case BinaryOp::Mul:
1560 return "*";
1561 case BinaryOp::Div:
1562 return "/";
1563 case BinaryOp::Mod:
1564 return "%";
1565 case BinaryOp::Equal:
1566 return "==";
1567 case BinaryOp::Less:
1568 return "<";
1569 case BinaryOp::LessEqual:
1570 return "<=";
1571 case BinaryOp::Greater:
1572 return ">";
1573 case BinaryOp::GreaterEqual:
1574 return ">=";
1575 case BinaryOp::LogicalAnd:
1576 return "&&";
1577 case BinaryOp::LogicalOr:
1578 return "||";
1579 case BinaryOp::LogicalNot:
1580 return "!";
1581 default:
1582 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001583 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001584 }
1585}
1586
1587inline std::string binary_op_string(BinaryOp op)
1588{
1589 switch(op)
1590 {
1591 case BinaryOp::Add:
1592 return "add";
1593 case BinaryOp::Sub:
1594 return "sub";
1595 case BinaryOp::Mul:
1596 return "mul";
1597 case BinaryOp::Div:
1598 return "div";
1599 case BinaryOp::Mod:
1600 return "mod";
1601 case BinaryOp::Equal:
1602 return "eq";
1603 case BinaryOp::Less:
1604 return "gt";
1605 case BinaryOp::LessEqual:
1606 return "gteq";
1607 case BinaryOp::Greater:
1608 return "lt";
1609 case BinaryOp::GreaterEqual:
1610 return "lte";
1611 default:
1612 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001613 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001614 }
1615}
1616
1617enum class OperandType : int32_t
1618{
1619 Unknown = 0x00000000,
1620 ScalarFp32 = 0x00001011, // Immediate scalar tile
1621 ScalarFp16 = 0x00001012, // Immediate scalar tile
1622 ScalarInt32 = 0x00001021, // Immediate scalar tile
1623 ScalarInt16 = 0x00001022, // Immediate scalar tile
1624 ScalarInt8 = 0x00001024, // Immediate scalar tile
1625 ScalarUInt32 = 0x00001031, // Immediate scalar tile
1626 ScalarUInt16 = 0x00001032, // Immediate scalar tile
1627 ScalarUInt8 = 0x00001034, // Immediate scalar tile
1628 ScalarBool = 0x00001041, // Immediate scalar tile
1629 ScalarTile = 0x00001050, // Scalar from a tile
1630 Tile = 0x00010000, // Tile
1631 TensorStride1 = 0x00100001, // Tensor component
1632 TensorStride2 = 0x00100002, // Tensor component
1633 TensorStride3 = 0x00100003, // Tensor component
1634 TensorStride4 = 0x00100004, // Tensor component
1635 TensorDim0 = 0x00100010, // Tensor component
1636 TensorDim1 = 0x00100020, // Tensor component
1637 TensorDim2 = 0x00100030, // Tensor component
1638 TensorDim3 = 0x00100040, // Tensor component
1639 TensorDim4 = 0x00100050, // Tensor component
1640 TensorC = 0x00100010, // Tensor component
1641 TensorW = 0x00100020, // Tensor component
1642 TensorH = 0x00100030, // Tensor component
1643 TensorD = 0x00100040, // Tensor component
1644 TensorN = 0x00100050, // Tensor component
1645 TensorDim1xDim2 = 0x00100100, // Tensor component
1646 TensorDim1xDim2xDim3 = 0x00100200, // Tensor component
1647 TensorWxH = 0x00100300, // Tensor component
1648 TensorWxHxD = 0x00100400, // Tensor component
1649 TensorDataOffset = 0x00100500, // Tensor component
1650};
1651
1652struct ScalarTileCoord
1653{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001654 ScalarTileCoord()
1655 {
1656 }
1657
1658 ScalarTileCoord(int32_t x0, int32_t y0)
1659 : x(x0), y(y0)
1660 {
1661 }
1662
1663 int32_t x{ -1 };
1664 int32_t y{ -1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001665};
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001666
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001667/**
1668 * @brief Operand class. This object is used to pass the operands to the operations performed by the writer.
1669 * Operand can be of three types:
1670 * -# Scalar immediate: constant expression
1671 * -# Tile: A tile
1672 * -# Tensor component: A component (scalar) of a tensor
1673 *
1674 */
1675class Operand
1676{
1677public:
1678 Operand(const std::string &val)
1679 {
1680 _str = val;
1681 _type = OperandType::Tile;
1682 }
1683
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001684 Operand(const std::string &val, const ScalarTileCoord &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001685 {
1686 _str = val;
1687 _type = OperandType::ScalarTile;
1688 _coord = coord;
1689 }
1690
1691 Operand(const std::string &val, OperandType type)
1692 {
1693 _str = val;
1694 _type = type;
1695 }
1696
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001697 Operand(const Operand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001698 {
1699 _str = t.value();
1700 _type = t.type();
1701 }
1702
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001703 Operand &operator=(const Operand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001704 {
1705 _str = t.value();
1706 _type = t.type();
1707 _coord = t.scalar_tile_coordinate();
1708 return *this;
1709 }
1710
1711 std::string value() const
1712 {
1713 return _str;
1714 }
1715
1716 OperandType type() const
1717 {
1718 return _type;
1719 }
1720
1721 ScalarTileCoord scalar_tile_coordinate() const
1722 {
1723 return _coord;
1724 }
1725
1726private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001727 std::string _str{};
1728 OperandType _type{ OperandType::Unknown };
1729 ScalarTileCoord _coord{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001730};
1731
1732enum class GpuSamplerTensorStorage : int32_t
1733{
1734 Unknown = static_cast<int32_t>(GpuTensorStorage::Unknown),
1735 BufferUint8Ptr = static_cast<int32_t>(GpuTensorStorage::BufferUint8Ptr),
1736 Image2dReadOnly = static_cast<int32_t>(GpuTensorStorage::Image2dReadOnly),
1737 Image2dWriteOnly = static_cast<int32_t>(GpuTensorStorage::Image2dWriteOnly),
1738 Image3dReadOnly = static_cast<int32_t>(GpuTensorStorage::Image3dReadOnly),
1739 Image3dWriteOnly = static_cast<int32_t>(GpuTensorStorage::Image2dWriteOnly),
1740};
1741
1742struct GpuSampler
1743{
1744 GpuSampler() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001745
1746 TensorSamplerFormat format{ TensorSamplerFormat::Unknown };
1747 GpuSamplerTensorStorage storage{ GpuSamplerTensorStorage::Unknown };
1748 TensorSamplerAddressModeX address_mode_x{ TensorSamplerAddressModeX::Unknown };
1749 TensorSamplerAddressModeY address_mode_y{ TensorSamplerAddressModeY::Unknown };
1750 TensorSamplerAddressModeZ address_mode_z{ TensorSamplerAddressModeZ::Unknown };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001751};
1752
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001753inline GpuSampler
1754create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y,
1755 int32_t step_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001756{
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001757 CKW_UNUSED(step_x, step_y, step_z);
1758
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001759 auto tensor = tensor_info_id->shape;
1760
1761 GpuSampler dst_sampler;
1762 dst_sampler.format = sampler.format;
1763 dst_sampler.storage = GpuSamplerTensorStorage::BufferUint8Ptr;
1764 dst_sampler.address_mode_x = sampler.address_mode_x;
1765 dst_sampler.address_mode_y = sampler.address_mode_y;
1766 dst_sampler.address_mode_z = sampler.address_mode_z;
1767
1768 int32_t dim_x = 0;
1769 int32_t dim_y = 0;
1770 int32_t dim_z = 0;
1771
1772 switch(sampler.format)
1773 {
1774 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001775 dim_x = tensor[0];
1776 dim_y = tensor[1];
1777 dim_z = tensor[2];
1778 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001779 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001780 dim_x = tensor[0];
1781 dim_y = tensor[1] * tensor[2];
1782 dim_z = 1;
1783 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001784 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001785 std::cout << "Unsupported tensor format" << std::endl;
1786 assert(false);
1787 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001788 }
1789
1790 if(dim_x == 1)
1791 {
1792 assert(step_x == 1);
1793 dst_sampler.address_mode_x = TensorSamplerAddressModeX::None;
1794 }
1795
1796 if(dim_y == 1)
1797 {
1798 assert(step_y == 1);
1799 dst_sampler.address_mode_y = TensorSamplerAddressModeY::None;
1800 }
1801
1802 if(dim_z == 1)
1803 {
1804 assert(step_z == 1);
1805 dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None;
1806 }
1807
1808 return dst_sampler;
1809}
1810
1811class GpuOutputSampler
1812{
1813public:
1814 GpuOutputSampler() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001815
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001816 /**
1817 * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once
1818 * by the root component. Once initialized, all simpler components will need to used this sampler
1819 * or a broadcasted version of it
1820 *
1821 * @param[in] sampler GpuSampler
1822 * @param[in] step_x Increment step in the X direction. Not necessarily it is the same of n0 of tile!
1823 * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile!
1824 * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile!
1825 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001826 void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage,
1827 TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001828 {
1829 assert(_is_initialized == false);
1830
1831 _step_x = step_x;
1832 _step_y = step_y;
1833 _step_z = step_z;
1834 _tensor_info_id = tensor_info_id;
1835 _sampler = create_sampler(tensor_storage, tensor_format);
1836 _is_initialized = true;
1837 };
1838
1839 GpuSampler sampler() const
1840 {
1841 return _sampler;
1842 };
1843
1844 int32_t step_x() const
1845 {
1846 return _step_x;
1847 };
1848
1849 int32_t step_y() const
1850 {
1851 return _step_y;
1852 };
1853
1854 int32_t step_z() const
1855 {
1856 return _step_z;
1857 };
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001858
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001859private:
1860 GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format)
1861 {
1862 // Output can only be in output mode
1863 assert(tensor_storage != GpuSamplerTensorStorage::Image2dReadOnly);
1864 assert(tensor_storage != GpuSamplerTensorStorage::Image3dReadOnly);
1865
1866 auto tensor = _tensor_info_id->shape;
1867
1868 GpuSampler sampler;
1869 sampler.format = tensor_format;
1870 sampler.storage = tensor_storage;
1871 sampler.address_mode_x = TensorSamplerAddressModeX::None;
1872 sampler.address_mode_y = TensorSamplerAddressModeY::None;
1873 sampler.address_mode_z = TensorSamplerAddressModeZ::None;
1874
1875 // In the case of texture, we do not need any special checks at the border
1876 if(tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr)
1877 {
1878 int32_t dim_x = 0;
1879 int32_t dim_y = 0;
1880 int32_t dim_z = 0;
1881
1882 switch(tensor_format)
1883 {
1884 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001885 dim_x = tensor[0];
1886 dim_y = tensor[1];
1887 dim_z = tensor[2];
1888 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001889 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001890 dim_x = tensor[0];
1891 dim_y = tensor[1] * tensor[2];
1892 dim_z = 1;
1893 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001894 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001895 std::cout << "Unsupported tensor format" << std::endl;
1896 assert(false);
1897 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001898 }
1899
1900 if((dim_x % _step_x) != 0 && dim_x != 1)
1901 {
1902 sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin;
1903 }
1904
1905 if((dim_y % _step_y) != 0 && dim_y != 1)
1906 {
1907 sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly;
1908 }
1909
1910 if((dim_z % _step_z) != 0 && dim_z != 1)
1911 {
1912 sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly;
1913 }
1914 }
1915
1916 return sampler;
1917 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001918
1919 GpuSampler _sampler{}; // GpuSampler
1920 int32_t _step_x{ 1 };
1921 int32_t _step_y{ 1 };
1922 int32_t _step_z{ 1 };
1923 const TensorInfo *_tensor_info_id{ nullptr };
1924 bool _is_initialized{ false };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001925};
1926
1927/**
1928 * @brief Tensor operand class. This object is used to pass the operands as tensor to the operations performed by the writer.
1929 */
1930class TensorOperand
1931{
1932public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001933 TensorOperand(const std::string &val, GpuSampler sampler)
1934 : _str(val), _sampler(sampler)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001935 {
1936 }
1937
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001938 TensorOperand &operator=(const TensorOperand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001939 {
1940 _str = t.value();
1941 _sampler = t.sampler();
1942 return *this;
1943 }
1944
1945 std::string value() const
1946 {
1947 return _str;
1948 }
1949
1950 GpuSampler sampler() const
1951 {
1952 return _sampler;
1953 }
1954
1955private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001956 std::string _str{};
1957 GpuSampler _sampler{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001958};
1959
1960/**
1961 * @brief Data structure that contains all the necessary information to write the Gpu kernel with the Gpu kernel Writer
1962 * This data structure must be initialized before being passed to the Gpu Kernel Writer
1963 *
1964 */
1965class GpuKernelWriterDataHolder
1966{
1967public:
1968 /**
1969 * @brief Construct a new Gpu Kernel Data object. In this phase, we should also store
1970 * the GPU target and target specific capabilities (extensions). For now, we just initialize the
1971 * programming language
1972 *
1973 * @param[in] language Gpu programming language to use
1974 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001975 GpuKernelWriterDataHolder(GpuTargetLanguage language)
1976 : tiles(language), arguments(language), code(""), _language(language)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001977 {
1978 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001979
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001980 /**
1981 * @brief Get the Gpu programming language used
1982 *
1983 * @return GpuTargetLanguage the Gpu programming language
1984 */
1985 GpuTargetLanguage programming_language() const
1986 {
1987 return _language;
1988 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001989
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001990 /**
1991 * @brief @ref GpuTileRegistry
1992 *
1993 */
1994 GpuTileRegistry tiles{};
1995 /**
1996 * @brief @ref GpuTensorArgumentRegistry
1997 *
1998 */
1999 GpuTensorArgumentRegistry arguments{};
2000 /**
2001 * @brief @ref GpuOutputSampler.
2002 *
2003 */
2004 GpuOutputSampler output_sampler{};
2005 /**
2006 * @brief Source code
2007 *
2008 */
2009 std::string code{};
2010
2011 // GpuExtensionRegistry extensions{};
2012private:
2013 GpuTargetLanguage _language;
2014};
2015
2016struct LWS
2017{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002018 int32_t x{ 1 };
2019 int32_t y{ 1 };
2020 int32_t z{ 1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002021};
2022
2023/**
2024 * @brief Utility class used to get the tile from the operand. If the operand is not a tile, @ref OperandUnpacker
2025 * declare an anonymous tile in the tile registry.
2026 */
2027class OperandUnpacker
2028{
2029public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002030 OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments)
2031 : _tiles(tiles), _arguments(arguments)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002032 {
2033 // Increase the level of the stack to allocate possible temporary tiles
2034 _tiles.increment_registry_level();
2035 };
2036
2037 ~OperandUnpacker()
2038 {
2039 // Decrease the level of the stack to deallocate any temporary tiles
2040 _tiles.decrement_registry_level();
2041 }
2042
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002043 IVectorTile *unpack(const Operand &src)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002044 {
2045 // Get the tile
2046 if(src.type() == OperandType::Tile)
2047 {
2048 assert(_tiles.has_tile(src.value()));
2049 return _tiles[src.value()];
2050 }
2051 // Create an anonymous tile with a constant
2052 else if(static_cast<int32_t>(src.type()) & 0x00001000)
2053 {
2054 if(src.type() == OperandType::ScalarTile)
2055 {
2056 ScalarTileCoord coord = src.scalar_tile_coordinate();
2057 assert(_tiles.has_tile(src.value()));
2058 assert(coord.x >= 0);
2059 assert(coord.y >= 0);
2060 auto val = _tiles[src.value()]->scalar(coord.x, coord.y);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002061 return _tiles.insert({ { { val.str } } }, val.type.dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002062 }
2063 else
2064 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002065 return _tiles.insert({ { { src.value() } } }, to_tile_data_type(src.type()));
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002066 }
2067 }
2068 // Create an anonymous tile with the tensor component
2069 else
2070 {
2071 assert(_arguments.has_tensor_argument(src.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002072 auto x = _arguments[src.value()];
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002073 const std::string val = x->component(to_tensor_component(src.type()));
2074 const DataType dt = x->component_data_type();
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002075 return _tiles.insert({ { { val } } }, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002076 }
2077 }
2078
2079private:
2080 DataType to_tile_data_type(OperandType x)
2081 {
2082 return static_cast<DataType>(static_cast<int32_t>(x) & 0x00ff);
2083 }
2084
2085 TensorComponent to_tensor_component(OperandType x)
2086 {
2087 switch(x)
2088 {
2089 case OperandType::TensorDim0:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002090 return TensorComponent::Dim0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002091 case OperandType::TensorDim1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002092 return TensorComponent::Dim1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002093 case OperandType::TensorDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002094 return TensorComponent::Dim2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002095 case OperandType::TensorDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002096 return TensorComponent::Dim3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002097 case OperandType::TensorDim4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002098 return TensorComponent::Dim4;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002099 case OperandType::TensorStride1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002100 return TensorComponent::Stride1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002101 case OperandType::TensorStride2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002102 return TensorComponent::Stride2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002103 case OperandType::TensorStride3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002104 return TensorComponent::Stride3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002105 case OperandType::TensorStride4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002106 return TensorComponent::Stride4;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002107 case OperandType::TensorDim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002108 return TensorComponent::Dim1xDim2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002109 case OperandType::TensorDim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002110 return TensorComponent::Dim1xDim2xDim3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002111 case OperandType::TensorDataOffset:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002112 return TensorComponent::OffsetFirstElement;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002113 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002114 assert(false);
2115 return TensorComponent::Unknown;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002116 }
2117 }
2118
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002119 GpuTileRegistry &_tiles;
2120 GpuTensorArgumentRegistry &_arguments;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002121};
2122
2123/**
2124 * @brief Utility class used to get the tensor argument from the operand. If the operand is not a tile, @ref OperandUnpacker
2125 * declare an anonymous tile in the tile registry.
2126 * Tensor dimension reduction aims for reducing the tensor data dimension while keeping data's tensor structure.
2127 */
2128class TensorOperandUnpacker
2129{
2130public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002131 TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments)
2132 : _arguments(arguments){};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002133
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002134 IGpuTensorArgument *unpack(const TensorOperand &src)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002135 {
2136 assert(_arguments.has_tensor_argument(src.value()));
2137 return _arguments[src.value()];
2138 }
2139
2140private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002141 GpuTensorArgumentRegistry &_arguments;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002142};
2143
2144/**
2145 * @brief The GpuKernel will be used in three occasions (stages):
2146 * #- Compilation stage
2147 * #- Tuning stage
2148 * #- Dispatch stage
2149 */
2150struct GpuKernel
2151{
2152 // Compilation stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002153 std::string code{}; // Source code, required for the compilation stage
2154 std::vector<GpuExtensions> list_extensions{}; // Extensions, required for the compilation stage
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002155 // Tuning stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002156 std::string config_id{}; // Unique id, required for the tuning stage
2157 std::vector<LWS> list_lws{}; // LWS to test, required for the tuning stage
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002158 // Dispatch stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002159 GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage
2160 std::vector<std::pair<int32_t, GpuTensorStorage>> list_tensor_storages; // List of tensor storages, required for the dispatch stage
2161 std::vector<std::pair<int32_t, TensorComponent>> list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002162};
2163
2164// This function should produce an object with the source
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002165inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002166{
2167 std::string code;
2168 code += "__kernel void ";
2169 code += name;
2170 code += "(\n";
2171
2172 auto IdSpaces = in.arguments.IdSpace_declarations();
2173
2174 std::vector<std::string> arg_str;
2175
2176 auto tensor_args = in.arguments.tensor_argument_declarations();
2177
2178 for(auto &i : tensor_args)
2179 {
2180 // For each tensor used, get the storage and tensor components
2181 auto storages = i->storage_declarations();
2182 auto components = i->component_declarations();
2183
2184 for(auto &y : storages)
2185 {
2186 std::string str;
2187 str += i->storage_type_declaration(y);
2188 str += " ";
2189 str += i->storage(y);
2190 arg_str.push_back(str);
2191 }
2192
2193 for(auto &y : components)
2194 {
2195 std::string str;
2196 str += i->component_type_declaration();
2197 str += " ";
2198 str += i->component(y);
2199 arg_str.push_back(str);
2200 }
2201 }
2202
2203 for(size_t i = 0; i < arg_str.size(); ++i)
2204 {
2205 code += arg_str[i];
2206 if(i + 1 < arg_str.size())
2207 {
2208 code += ",\n";
2209 }
2210 }
2211
2212 code += ")\n";
2213 code += "{\n";
2214 code += in.code;
2215 code += "}\n";
2216
2217 return code;
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002218}
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002219
2220/**
2221 * @brief This class is responsible to map a N-Tensor to a 3d tensor. The mapper needs the GpuSampler to know
2222 * how to reduce the dimensionality of a tensor
2223 *
2224 */
2225class GpuTensor3dMapper
2226{
2227public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002228 GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler)
2229 : _sampler(sampler), _tensor(tensor){};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002230
2231 std::string tensor_component_x() const
2232 {
2233 const auto format = _sampler.format;
2234 switch(format)
2235 {
2236 case TensorSamplerFormat::C_WH_1:
2237 case TensorSamplerFormat::C_W_H:
2238 return _tensor->component(TensorComponent::C);
2239 default:
2240 std::cout << "Unsupported tensor format" << std::endl;
2241 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002242 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002243 }
2244 }
2245
2246 std::string tensor_component_y() const
2247 {
2248 const auto format = _sampler.format;
2249 switch(format)
2250 {
2251 case TensorSamplerFormat::C_WH_1:
2252 return _tensor->component(TensorComponent::WxH);
2253 case TensorSamplerFormat::C_W_H:
2254 return _tensor->component(TensorComponent::W);
2255 default:
2256 std::cout << "Unsupported tensor format" << std::endl;
2257 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002258 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002259 }
2260 }
2261
2262 std::string tensor_component_z() const
2263 {
2264 const auto format = _sampler.format;
2265 switch(format)
2266 {
2267 case TensorSamplerFormat::C_WH_1:
2268 return "1";
2269 case TensorSamplerFormat::C_W_H:
2270 return _tensor->component(TensorComponent::H);
2271 default:
2272 std::cout << "Unsupported tensor format" << std::endl;
2273 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002274 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002275 }
2276 }
2277
2278 std::string tensor_component_stride_y() const
2279 {
2280 const auto format = _sampler.format;
2281 switch(format)
2282 {
2283 case TensorSamplerFormat::C_WH_1:
2284 case TensorSamplerFormat::C_W_H:
2285 return _tensor->component(TensorComponent::Stride1);
2286 default:
2287 std::cout << "Unsupported tensor format" << std::endl;
2288 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002289 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002290 }
2291 }
2292
2293 std::string tensor_component_stride_z() const
2294 {
2295 const auto format = _sampler.format;
2296 switch(format)
2297 {
2298 case TensorSamplerFormat::C_WH_1:
2299 return "0";
2300 case TensorSamplerFormat::C_W_H:
2301 return _tensor->component(TensorComponent::Stride2);
2302 default:
2303 std::cout << "Unsupported tensor format" << std::endl;
2304 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002305 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002306 }
2307 }
2308
2309 std::string tensor_component_stride_batch() const
2310 {
2311 const auto format = _sampler.format;
2312 switch(format)
2313 {
2314 case TensorSamplerFormat::C_WH_1:
2315 case TensorSamplerFormat::C_W_H:
2316 return _tensor->component(TensorComponent::Stride3);
2317 default:
2318 std::cout << "Unsupported tensor format" << std::endl;
2319 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002320 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002321 }
2322 }
2323
2324 bool is_one_component_x() const
2325 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002326 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002327 const auto format = _sampler.format;
2328 switch(format)
2329 {
2330 case TensorSamplerFormat::C_WH_1:
2331 case TensorSamplerFormat::C_W_H:
2332 return t.shape[0] == 1;
2333 default:
2334 std::cout << "Unsupported tensor format" << std::endl;
2335 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002336 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002337 }
2338 }
2339
2340 bool is_one_component_y() const
2341 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002342 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002343 const auto format = _sampler.format;
2344 switch(format)
2345 {
2346 case TensorSamplerFormat::C_WH_1:
2347 return (t.shape[1] * t.shape[2]) == 1;
2348 case TensorSamplerFormat::C_W_H:
2349 return t.shape[1] == 1;
2350 default:
2351 std::cout << "Unsupported tensor format" << std::endl;
2352 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002353 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002354 }
2355 }
2356
2357 bool is_one_component_z() const
2358 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002359 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002360 const auto format = _sampler.format;
2361 switch(format)
2362 {
2363 case TensorSamplerFormat::C_WH_1:
2364 return true;
2365 case TensorSamplerFormat::C_W_H:
2366 return t.shape[2] == 1;
2367 default:
2368 std::cout << "Unsupported tensor format" << std::endl;
2369 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002370 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002371 }
2372 }
2373
2374 bool is_one_component_batch() const
2375 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002376 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002377 const auto format = _sampler.format;
2378 switch(format)
2379 {
2380 case TensorSamplerFormat::C_WH_1:
2381 case TensorSamplerFormat::C_W_H:
2382 return t.shape[3] == 1;
2383 default:
2384 std::cout << "Unsupported tensor format" << std::endl;
2385 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002386 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002387 }
2388 }
2389
2390 GpuSampler gpu_sampler() const
2391 {
2392 return _sampler;
2393 }
2394
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002395 IGpuTensorArgument *tensor_argument() const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002396 {
2397 return _tensor;
2398 }
2399
2400private:
2401 GpuSampler _sampler;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002402 IGpuTensorArgument *_tensor;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002403};
2404
2405struct GpuKernelWriterAttribute
2406{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002407 bool return_tensor_component_by_value{ false };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002408};
2409
2410enum class ConvertPolicy
2411{
2412 Wrap, /**< Wrap around */
2413 Saturate /**< Saturate */
2414};
2415
2416enum class RoundingMode
2417{
2418 None,
2419 Rte,
2420 Rtz,
2421 Rtp,
2422 Rtn
2423};
2424
2425// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl05.html
2426class IGpuKernelWriter
2427{
2428public:
2429 virtual ~IGpuKernelWriter() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002430
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002431 virtual void set_IdSpace(int32_t id) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002432
2433 virtual void import_tile(const std::string &dst, const IVectorTile *src) = 0;
2434
2435 virtual void declare_argument(const std::string &name, const TensorInfo &tensor) = 0;
2436
2437 virtual void declare_tile(const std::string &name, const TileInfo &info) = 0;
2438
2439 virtual void declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) = 0;
2440
2441 virtual void write_text(const std::string &x) = 0;
2442
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002443 virtual void compound_statement_begin() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002444
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002445 virtual void compound_statement_end() = 0;
2446
2447 // Operations
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002448 virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0;
2449
2450 virtual void op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0;
2451
2452 virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0;
2453
2454 virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0;
2455
2456 virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
2457
2458 virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0;
2459
2460 virtual void op_scalar_function(const Operand &dst_name, const Operand &src_name, ScalarUnaryFunction func) = 0;
2461
2462 virtual void op_if(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
2463
2464 virtual void op_for_loop(const Operand &var_name, BinaryOp cond_op, const Operand &cond_value, AssignmentOp update_op, const Operand &update_value) = 0;
2465
2466 virtual void op_load_indirect(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y_indirect, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0;
2467
2468 virtual void op_load_immediate(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32), const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0;
2469
2470 virtual void op_store_immediate(const TensorOperand &tensor, const Operand &src, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0;
2471
2472 virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0;
2473
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002474 virtual void op_return() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002475
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002476 // virtual void op_else() = 0;
2477 // virtual void op_elseif() = 0;
2478 // Utils
2479 // It is the process of converting
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002480 virtual void util_get_indirect_buffer(const Operand &dst, const TensorOperand &tensor, const Operand &x,
2481 const Operand &y, const Operand &x_off, const Operand &y_off) = 0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002482};
2483
2484enum class GpuLoadStoreType
2485{
2486 Load = 1,
2487 Store = 2
2488};
2489
2490class IGpuLoadStoreHelperWriter
2491{
2492public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002493 IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type)
2494 : _writer(x), _mapper(mapper), _type(type)
2495 {
2496 }
2497
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002498 IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002499
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002500 IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002501
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002502 virtual ~IGpuLoadStoreHelperWriter() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002503
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002504 virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002505
2506 virtual void write(const std::pair<int32_t, std::string> &y) = 0;
2507
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002508 virtual void finalize() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002509
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002510protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002511 IGpuKernelWriter *_writer;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002512 GpuTensor3dMapper _mapper;
2513 GpuLoadStoreType _type;
2514};
2515
2516class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter
2517{
2518public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002519 ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
2520 : IGpuLoadStoreHelperWriter(x, mapper, type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002521 {
2522 }
2523
2524 ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002525
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002526 ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default;
2527
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002528 static bool
2529 validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002530 {
2531 CKW_UNUSED(x, type, dst);
2532
2533 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr)
2534 {
2535 return false;
2536 }
2537 return true;
2538 }
2539
2540 void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
2541 {
2542 assert(validate(_writer, _mapper, _type, dst));
2543
2544 _dst = dst;
2545 _ls_width_full = dst->format().w;
2546
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002547 _coord_x = x->scalar(0, 0).str;
2548 _coord_z = z->scalar(0, 0).str;
2549 _coord_b = b->scalar(0, 0).str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002550 _coord_orig_z = _coord_z;
2551
2552 out_of_bound_initialize_x(_coord_x);
2553 out_of_bound_initialize_z(_coord_z);
2554
2555 /*
2556 meaning of else:
2557 - x: partial load/store
2558 - y: no load/store operation
2559 - z: no load/store operation
2560 if(x)
2561 {
2562 if(z)
2563 {
2564 if(y)
2565 {
2566 // full load/store width
2567 }
2568 else
2569 {
2570 // no load/store
2571 }
2572 }
2573 else
2574 {
2575 // no load/store
2576 }
2577 }
2578 else
2579 {
2580 if(z)
2581 {
2582 if(y)
2583 {
2584 // partial load/store width
2585 }
2586 else
2587 {
2588 // no load/store
2589 }
2590 }
2591 else
2592 {
2593 // no load/store
2594 }
2595 }
2596 */
2597 }
2598
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002599 void write(const std::pair<int32_t, std::string> &y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002600 {
2601 int32_t idx_y = y.first;
2602 std::string coord_y = y.second;
2603
2604 // The only check required is on Y.
2605 out_of_bound_initialize_y(coord_y);
2606
2607 const std::string dst = _dst->vector(idx_y).str;
2608 const std::string address = to_ls_buffer_address(_coord_x, coord_y, _coord_z, _coord_b);
2609 const std::string ls_buf = to_ls_buffer(_type, _ls_width_full, dst, address);
2610
2611 _writer->write_text(ls_buf);
2612 _writer->write_text(";\n");
2613
2614 out_of_bound_finalize_y(dst);
2615
2616 // The left over load/store will be written in the finalize stage
2617 if(_ls_width_part.size() != 0)
2618 {
2619 int32_t w = 0;
2620 for(auto &p : _ls_width_part)
2621 {
2622 const std::string dst0 = _dst->vector(w, p, idx_y).str;
2623 const std::string coord_x = _coord_x + " + " + std::to_string(w);
2624 const std::string address = to_ls_buffer_address(coord_x, coord_y, _coord_z, _coord_b);
2625 const std::string ls_buf0 = to_ls_buffer(_type, p, dst0, address);
2626 _leftovers_x.push_back(std::make_pair(std::make_pair(dst0, coord_y), ls_buf0));
2627
2628 w += p;
2629 }
2630 }
2631 }
2632
2633 void finalize() override
2634 {
2635 out_of_bound_finalize_z();
2636 out_of_bound_finalize_x();
2637 }
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002638
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002639private:
2640 IVectorTile *_dst{ nullptr };
2641 int32_t _ls_width_full{ 0 };
2642 std::vector<int32_t> _ls_width_part{};
2643 std::vector<std::pair<std::pair<std::string, std::string>, std::string>> _leftovers_x{};
2644 std::string _coord_x{};
2645 std::string _coord_z{};
2646 std::string _coord_orig_z{};
2647 std::string _coord_b{};
2648
2649 void out_of_bound_initialize_x(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002650 {
2651 if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
2652 {
2653 auto tensor_format = _mapper.tensor_argument()->format();
2654 auto shape = tensor_format.shape;
2655
2656 _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full);
2657 if(_ls_width_part.size() != 0)
2658 {
2659 _writer->write_text("if(" + coord + " > 0)\n");
2660 _writer->compound_statement_begin();
2661 }
2662 }
2663 };
2664
2665 void out_of_bound_finalize_x()
2666 {
2667 if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
2668 {
2669 if(_ls_width_part.size() != 0)
2670 {
2671 _writer->compound_statement_end();
2672 _writer->write_text("else\n");
2673 _writer->compound_statement_begin();
2674
2675 out_of_bound_initialize_z(_coord_orig_z);
2676 for(auto &i : _leftovers_x)
2677 {
2678 out_of_bound_initialize_y(i.first.second);
2679 _writer->write_text(i.second);
2680 _writer->write_text(";\n");
2681 out_of_bound_finalize_y(i.first.first);
2682 }
2683 out_of_bound_finalize_z();
2684 _writer->compound_statement_end();
2685 }
2686 }
2687 };
2688
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002689 void out_of_bound_initialize_y(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002690 {
2691 std::string max = "";
2692
2693 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
2694
2695 switch(address_mode_y)
2696 {
2697 case TensorSamplerAddressModeY::Skip:
2698 case TensorSamplerAddressModeY::ClampToBorder:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002699 // NOTE: This line should not be moved outside of the switch statement.
2700 // The reason for that is because when we query the component, the component is marked as used
2701 // and added to the list of arguments of the kernel. Since, not in all cases this component is required,
2702 // we should request the component only when used
2703 max = _mapper.tensor_component_y();
2704 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
2705 _writer->compound_statement_begin();
2706 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002707 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
2708 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002709 _writer->write_text("if(" + coord + " >= 0)\n");
2710 _writer->compound_statement_begin();
2711 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002712 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
2713 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002714 max = _mapper.tensor_component_y();
2715 _writer->write_text("if(" + coord + " < " + max + ")\n");
2716 _writer->compound_statement_begin();
2717 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002718 case TensorSamplerAddressModeY::ClampToNearest:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002719 max = _mapper.tensor_component_y();
2720 coord = "clamp(" + coord + ", 0, " + max + " - 1)";
2721 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002722 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002723 max = _mapper.tensor_component_y();
2724 coord = "min(" + coord + ", " + max + " - 1)";
2725 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002726 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002727 coord = "max(" + coord + ", 0)";
2728 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002729 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002730 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002731 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002732 std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
2733 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002734 }
2735 };
2736
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002737 void out_of_bound_finalize_y(const std::string &dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002738 {
2739 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
2740
2741 switch(address_mode_y)
2742 {
2743 case TensorSamplerAddressModeY::ClampToBorder:
2744 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
2745 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
2746 case TensorSamplerAddressModeY::Skip:
2747 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
2748 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002749 _writer->compound_statement_end();
2750 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002751
2752 default:
2753 assert(false);
2754 }
2755
2756 switch(address_mode_y)
2757 {
2758 case TensorSamplerAddressModeY::ClampToBorder:
2759 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
2760 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002761 _writer->write_text("else\n");
2762 _writer->compound_statement_begin();
2763 _writer->write_text(dst);
2764 _writer->write_text(" = 0.0f;\n");
2765 _writer->compound_statement_end();
2766 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002767
2768 default:
2769 assert(false);
2770 }
2771 };
2772
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002773 void out_of_bound_initialize_z(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002774 {
2775 std::string max = "";
2776
2777 const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
2778
2779 switch(address_mode_z)
2780 {
2781 case TensorSamplerAddressModeZ::Skip:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002782 max = _mapper.tensor_component_z();
2783 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
2784 _writer->compound_statement_begin();
2785 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002786 case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002787 _writer->write_text("if(" + coord + " >= 0)\n");
2788 _writer->compound_statement_begin();
2789 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002790 case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002791 max = _mapper.tensor_component_z();
2792 _writer->write_text("if(" + coord + " < " + max + ")\n");
2793 _writer->compound_statement_begin();
2794 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002795 case TensorSamplerAddressModeZ::ClampToNearest:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002796 max = _mapper.tensor_component_z();
2797 coord = "clamp(" + coord + ", 0, " + max + " - 1)";
2798 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002799 case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002800 max = _mapper.tensor_component_z();
2801 coord = "min(" + coord + ", " + max + " - 1)";
2802 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002803 case TensorSamplerAddressModeZ::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002804 coord = "max(" + coord + ", 0)";
2805 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002806 case TensorSamplerAddressModeZ::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002807 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002808 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002809 std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
2810 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002811 }
2812 };
2813
2814 void out_of_bound_finalize_z()
2815 {
2816 const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
2817
2818 switch(address_mode_z)
2819 {
2820 case TensorSamplerAddressModeZ::Skip:
2821 case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
2822 case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002823 _writer->compound_statement_end();
2824 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002825
2826 default:
2827 assert(false);
2828 }
2829 };
2830
2831 std::vector<int32_t> decompose_leftover_ls_vector_width(int32_t ls_leftover_vector_width) const
2832 {
2833 std::vector<int32_t> x;
2834
2835 switch(ls_leftover_vector_width)
2836 {
2837 case 0:
2838 break;
2839 case 1:
2840 case 2:
2841 case 3:
2842 case 4:
2843 case 8:
2844 case 16:
2845 x.push_back(ls_leftover_vector_width);
2846 break;
2847 case 5:
2848 x.push_back(4);
2849 x.push_back(1);
2850 break;
2851 case 6:
2852 x.push_back(4);
2853 x.push_back(2);
2854 break;
2855 case 7:
2856 x.push_back(4);
2857 x.push_back(3);
2858 break;
2859 case 9:
2860 x.push_back(8);
2861 x.push_back(1);
2862 break;
2863 case 10:
2864 x.push_back(8);
2865 x.push_back(2);
2866 break;
2867 case 11:
2868 x.push_back(8);
2869 x.push_back(3);
2870 break;
2871 case 12:
2872 x.push_back(8);
2873 x.push_back(4);
2874 break;
2875 case 13:
2876 x.push_back(8);
2877 x.push_back(4);
2878 x.push_back(1);
2879 break;
2880 case 14:
2881 x.push_back(8);
2882 x.push_back(4);
2883 x.push_back(2);
2884 break;
2885 case 15:
2886 x.push_back(8);
2887 x.push_back(4);
2888 x.push_back(3);
2889 break;
2890
2891 default:
2892 assert(false);
2893 }
2894 return x;
2895 }
2896
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002897 std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data,
2898 const std::string &address)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002899 {
2900 switch(type)
2901 {
2902 case GpuLoadStoreType::Load:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002903 if(vector_width != 1)
2904 {
2905 return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
2906 }
2907 else
2908 {
2909 return data + " = *(" + address + ")";
2910 }
2911 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002912 case GpuLoadStoreType::Store:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002913 if(vector_width != 1)
2914 {
2915 return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
2916 }
2917 else
2918 {
2919 return "*(" + address + ") = " + data;
2920 }
2921 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002922 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002923 std::cout << "Unsupported GpuLoadStoreType" << std::endl;
2924 assert(false);
2925 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002926 }
2927 }
2928
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002929 std::string to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z,
2930 const std::string &b) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002931 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002932 auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002933 assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002934 const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage);
2935 const std::string dst_type = get_cl_data_type(_dst->format().dt, 1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002936
2937 std::string address;
2938 address += "(__global ";
2939 address += dst_type;
2940 address += "*)(";
2941 address += ptr_buf;
2942 if(x != "0" && (_mapper.is_one_component_x() != true))
2943 {
2944 address += " + (";
2945 address += x + ") * sizeof(" + dst_type + ")";
2946 }
2947 if(y != "0" && (_mapper.is_one_component_y() != true))
2948 {
2949 const std::string stride_y = _mapper.tensor_component_stride_y();
2950 address += " + (";
2951 address += y + ")";
2952 address += " * ";
2953 address += stride_y;
2954 }
2955 if(z != "0" && (_mapper.is_one_component_z() != true))
2956 {
2957 const std::string stride_z = _mapper.tensor_component_stride_z();
2958 address += " + (";
2959 address += z + ")";
2960 address += " * ";
2961 address += stride_z;
2962 }
2963 if(b != "0" && (_mapper.is_one_component_batch() != true))
2964 {
2965 const std::string stride_b = _mapper.tensor_component_stride_batch();
2966 address += " + (";
2967 address += b + ")";
2968 address += " * ";
2969 address += stride_b;
2970 }
2971 address += ")";
2972 return address;
2973 }
2974};
2975
2976class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter
2977{
2978public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002979 static bool
2980 validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002981 {
2982 CKW_UNUSED(x);
2983
2984 if(dst->format().w != 4)
2985 {
2986 return false;
2987 }
2988 if(mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None)
2989 {
2990 return false;
2991 }
2992 if(mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None)
2993 {
2994 return false;
2995 }
2996 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load)
2997 {
2998 return false;
2999 }
3000 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && type == GpuLoadStoreType::Store)
3001 {
3002 return false;
3003 }
3004 if((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16))
3005 {
3006 return false;
3007 }
3008 return true;
3009 /*
3010 - x: Only GpuSamplerAddressModeX::None is supported and vector length = 4
3011 - z: Only GpuSamplerAddressModeZ::None is supported
3012 */
3013 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003014
3015 ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
3016 : IGpuLoadStoreHelperWriter(x, mapper, type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003017 {
3018 }
3019
3020 ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003021
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003022 ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default;
3023
3024 void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
3025 {
3026 assert(validate(_writer, _mapper, _type, dst));
3027
3028 _dst = dst;
3029 _ls_width_full = dst->format().w;
3030 _coord_x = x->scalar(0, 0).str;
3031 _coord_z = z->scalar(0, 0).str;
3032 _coord_b = b->scalar(0, 0).str;
3033
3034 /*
3035 if(y)
3036 {
3037 // full load/store width
3038 }
3039 else
3040 {
3041 // no load/store
3042 }
3043 */
3044 }
3045
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003046 void write(const std::pair<int32_t, std::string> &y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003047 {
3048 int32_t idx_y = y.first;
3049 std::string coord_y = y.second;
3050
3051 // The only check required is on Y.
3052 out_of_bound_initialize_y(coord_y);
3053
3054 const std::string dst = _dst->vector(idx_y).str;
3055 const std::string sampler = to_ls_image2d_sampler();
3056 const std::string coord = to_ls_image2d_coord(_coord_x, coord_y, _coord_z, _coord_b);
3057 const std::string ls_buf = to_ls_image2d(_type, _ls_width_full, dst, sampler, coord);
3058
3059 _writer->write_text(ls_buf);
3060 _writer->write_text(";\n");
3061
3062 out_of_bound_finalize_y(dst);
3063 }
3064
3065 void finalize() override
3066 {
3067 }
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003068
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003069private:
3070 IVectorTile *_dst{ nullptr };
3071 int32_t _ls_width_full{ 0 };
3072 std::string _coord_x{};
3073 std::string _coord_z{};
3074 std::string _coord_b{};
3075
3076 void out_of_bound_initialize_y(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003077 {
3078 std::string max = "";
3079
3080 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3081
3082 switch(address_mode_y)
3083 {
3084 case TensorSamplerAddressModeY::Skip:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003085 max = _mapper.tensor_component_y();
3086 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
3087 _writer->compound_statement_begin();
3088 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003089 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003090 _writer->write_text("if(" + coord + " >= 0)\n");
3091 _writer->compound_statement_begin();
3092 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003093 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003094 max = _mapper.tensor_component_y();
3095 _writer->write_text("if(" + coord + " < " + max + ")\n");
3096 _writer->compound_statement_begin();
3097 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003098 case TensorSamplerAddressModeY::ClampToBorder:
3099 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
3100 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
3101 case TensorSamplerAddressModeY::ClampToNearest:
3102 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
3103 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
3104 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003105 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003106 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003107 std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl;
3108 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003109 }
3110 };
3111
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003112 void out_of_bound_finalize_y(const std::string &dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003113 {
3114 CKW_UNUSED(dst);
3115
3116 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3117
3118 switch(address_mode_y)
3119 {
3120 case TensorSamplerAddressModeY::Skip:
3121 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
3122 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003123 _writer->compound_statement_end();
3124 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003125
3126 default:
3127 assert(false);
3128 }
3129 };
3130
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003131 std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string &data,
3132 const std::string &sampler, const std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003133 {
3134 CKW_UNUSED(vector_width);
3135
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003136 auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
3137 const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003138 // const DataType dt = _dst->format().dt;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003139 const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003140
3141 switch(type)
3142 {
3143 case GpuLoadStoreType::Load:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003144 return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")";
3145 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003146 case GpuLoadStoreType::Store:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003147 return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003148 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003149 assert(false);
3150 std::cout << "Unsupported GpuLoadStoreType" << std::endl;
3151 assert(false);
3152 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003153 }
3154 }
3155
3156 std::string to_ls_image2d_sampler() const
3157 {
3158 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3159
3160 switch(address_mode_y)
3161 {
3162 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003163 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003164 case TensorSamplerAddressModeY::Skip:
3165 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
3166 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
3167 case TensorSamplerAddressModeY::ClampToBorder:
3168 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
3169 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003170 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003171 case TensorSamplerAddressModeY::ClampToNearest:
3172 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
3173 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003174 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003175 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003176 std::cout << "Unsupported address_mode_coord" << std::endl;
3177 assert(false);
3178 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003179 }
3180 }
3181
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003182 std::string to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z,
3183 const std::string &b) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003184 {
3185 std::string coord_x = "(" + x + ") >> 2";
3186 std::string coord_y = "(";
3187
3188 if(y != "0" && (_mapper.is_one_component_y() != true))
3189 {
3190 coord_y += y;
3191 }
3192 if(z != "0" && (_mapper.is_one_component_z() != true))
3193 {
3194 const std::string dim = _mapper.tensor_component_y();
3195 coord_y += " + (";
3196 coord_y += z + ")";
3197 coord_y += " * ";
3198 coord_y += dim;
3199 }
3200 if(b != "0" && (_mapper.is_one_component_batch() != true))
3201 {
3202 const std::string dim0 = _mapper.tensor_component_y();
3203 const std::string dim1 = _mapper.tensor_component_z();
3204 coord_y += " + (";
3205 coord_y += b + ")";
3206 coord_y += " * ";
3207 coord_y += dim0;
3208 coord_y += " * ";
3209 coord_y += dim1;
3210 }
3211 coord_y += ")";
3212 return "(int2)(" + coord_x + ", " + coord_y + ")";
3213 }
3214};
3215
3216/** IGpuLoadStoreHelperWriter factory class */
3217class ClLoadStoreHelperWriterFactory final
3218{
3219public:
3220 /** Static method to call the IGpuLoadStoreHelperWriter class accordingly with the tensor storage set in the mapper
3221 *
3222 *
3223 * @return IGpuLoadStoreHelperWriter
3224 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003225 static std::unique_ptr<IGpuLoadStoreHelperWriter>
3226 create(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003227 {
3228 const auto tensor_storage = mapper.gpu_sampler().storage;
3229 switch(tensor_storage)
3230 {
3231 case GpuSamplerTensorStorage::BufferUint8Ptr:
3232 return std::make_unique<ClLoadStoreBufferHelperWriter>(x, mapper, type);
3233 case GpuSamplerTensorStorage::Image2dReadOnly:
3234 case GpuSamplerTensorStorage::Image2dWriteOnly:
3235 return std::make_unique<ClLoadStoreImage2dHelperWriter>(x, mapper, type);
3236 default:
3237 std::cout << "Unsupported Gpu tensor storage" << std::endl;
3238 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01003239 return nullptr;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003240 }
3241 }
3242};
3243
3244// This utility method needs to go in utils.h
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003245inline bool is_tile_scalar(IVectorTile *x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003246{
3247 return x->format().w == 1 && x->format().h == 1;
3248}
3249
3250class ClKernelWriter : public IGpuKernelWriter
3251{
3252public:
3253 ClKernelWriter(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
3254 {
3255 _data = x;
3256 _attr = attr;
3257 }
3258
3259 ClKernelWriter(const ClKernelWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003260
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003261 ClKernelWriter &operator=(const ClKernelWriter &) = default;
3262
3263 // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure
3264 // there are no conflicts or ambiguity in the code
3265 void set_IdSpace(int32_t id) override
3266 {
3267 _data->tiles.set_IdSpace(id);
3268 _data->arguments.set_IdSpace(id);
3269 }
3270
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003271 void import_tile(const std::string &dst_name, const IVectorTile *src) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003272 {
3273 _data->tiles.insert(dst_name, src);
3274 }
3275
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003276 void declare_argument(const std::string &name, const TensorInfo &tensor) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003277 {
3278 assert(_data->arguments[name] == nullptr);
3279 _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value);
3280 }
3281
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003282 void declare_tile(const std::string &name, const TileInfo &format) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003283 {
3284 assert(_data->tiles[name] == nullptr);
3285 _data->tiles.insert(name, format);
3286
3287 IVectorTile *x = _data->tiles[name];
3288
3289 for(auto &t : x->underlying_source_variables())
3290 {
3291 _data->code += t.type.str + " " + t.str + ";\n";
3292 }
3293 }
3294
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003295 void declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in,
3296 DataType dt) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003297 {
3298 assert(_data->tiles[name] == nullptr);
3299 _data->tiles.insert(name, in, dt);
3300 // Note: A constant does not need to be declared in the code
3301 }
3302
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003303 void write_text(const std::string &x) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003304 {
3305 _data->code += x;
3306 }
3307
3308 void compound_statement_begin() override
3309 {
3310 _data->tiles.increment_registry_level();
3311 _data->code += "{\n";
3312 }
3313
3314 void compound_statement_end() override
3315 {
3316 _data->tiles.decrement_registry_level();
3317 _data->code += "}\n";
3318 }
3319
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003320 void op_get_global_id(const Operand &dst_var, int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003321 {
3322 assert(dst_var.type() == OperandType::Tile);
3323 assert(_data->tiles.has_tile(dst_var.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003324 assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003325
3326 auto var = _data->tiles[dst_var.value()];
3327
3328 _data->code += var->scalar(0, 0).str;
3329 _data->code += " = get_global_id(";
3330 _data->code += std::to_string(dim);
3331 _data->code += ");\n";
3332 };
3333
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003334 void op_get_global_coord(const Operand &o_dst, const Operand &o_step, const TensorOperand &o_tensor,
3335 int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003336 {
3337 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003338 auto dst = operands.unpack(o_dst);
3339 auto step = operands.unpack(o_step);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003340
3341 // Validation: Check that x, y and z are scalar
3342
3343 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003344 auto tensor = tensor_operands.unpack(o_tensor);
3345 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003346
3347 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3348
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003349 switch(dim)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003350 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003351 case 0:
3352 if(mapper.is_one_component_x())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003353 {
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003354 _data->code += dst->scalar(0, 0).str;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003355 _data->code += " = 0;\n";
3356 }
3357 else
3358 {
3359 if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
3360 {
3361 // Validation: Check: fixed tensor shape
3362 // TO BE CHANGED
3363 _data->code += dst->scalar(0, 0).str;
3364 _data->code += " = get_global_id(0) * ";
3365 _data->code += step->scalar(0, 0).str;
3366 _data->code += ";\n";
3367 }
3368 else
3369 {
3370 _data->code += dst->scalar(0, 0).str;
3371 _data->code += " = get_global_id(0) * ";
3372 _data->code += step->scalar(0, 0).str;
3373 _data->code += ";\n";
3374 }
3375 }
3376 break;
3377 case 1:
3378 if(mapper.is_one_component_y())
3379 {
3380 _data->code += dst->scalar(0, 0).str;
3381 _data->code += " = 0;\n";
3382 }
3383 else
3384 {
3385 if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin)
3386 {
3387 }
3388 else
3389 {
3390 _data->code += dst->scalar(0, 0).str;
3391 _data->code += " = get_global_id(1) * ";
3392 _data->code += step->scalar(0, 0).str;
3393 _data->code += ";\n";
3394 }
3395 }
3396 break;
3397 case 2:
3398 if(mapper.is_one_component_z())
3399 {
3400 _data->code += dst->scalar(0, 0).str;
3401 _data->code += " = 0;\n";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003402 }
3403 else
3404 {
3405 _data->code += dst->scalar(0, 0).str;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003406 _data->code += " = get_global_id(2) * ";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003407 _data->code += step->scalar(0, 0).str;
3408 _data->code += ";\n";
3409 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003410 break;
3411 default:
3412 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003413 }
3414 };
3415
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003416 void op_get_global_batch(const Operand &o_dst, const TensorOperand &o_tensor) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003417 {
3418 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003419 auto dst = operands.unpack(o_dst);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003420
3421 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003422 auto tensor = tensor_operands.unpack(o_tensor);
3423 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003424
3425 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3426
3427 if(mapper.is_one_component_batch())
3428 {
3429 _data->code += dst->scalar(0, 0).str;
3430 _data->code += " = 0;\n";
3431 }
3432 else
3433 {
3434 std::cout << "Unsupported batched computation" << std::endl;
3435 assert(false);
3436 }
3437 };
3438
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003439 void op_get_global_size(const Operand &dst_var, int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003440 {
3441 assert(dst_var.type() == OperandType::Tile);
3442 assert(_data->tiles.has_tile(dst_var.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003443 assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003444
3445 auto var = _data->tiles[dst_var.value()];
3446
3447 _data->code += var->scalar(0, 0).str;
3448 _data->code += " = get_global_size(";
3449 _data->code += std::to_string(dim);
3450 _data->code += ");\n";
3451 }
3452
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003453 void op_binary_expression(const Operand &dst_name, const Operand &lhs_name, BinaryOp op,
3454 const Operand &rhs_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003455 {
3456 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003457 auto lhs = operands.unpack(lhs_name);
3458 auto rhs = operands.unpack(rhs_name);
3459 auto dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003460
3461 const int32_t dst_w = dst->format().w;
3462 const int32_t dst_h = dst->format().h;
3463 assert(lhs != nullptr);
3464 const int32_t lhs_w = lhs->format().w;
3465 const int32_t rhs_w = rhs->format().w;
3466
3467 if(op == BinaryOp::MatMul_Nt_T)
3468 {
3469 assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16));
3470 for(int32_t y = 0; y < dst_h; ++y)
3471 {
3472 for(int32_t x = 0; x < dst_w; ++x)
3473 {
3474 for(int32_t k = 0; k < lhs_w; ++k)
3475 {
3476 _data->code += dst->scalar(x, y).str;
3477 _data->code += " = fma(";
3478 _data->code += lhs->scalar(k, y).str;
3479 _data->code += ", ";
3480 _data->code += rhs->scalar(k, x).str;
3481 _data->code += ", ";
3482 _data->code += dst->scalar(x, y).str;
3483 _data->code += ");\n";
3484 }
3485 }
3486 }
3487
3488 return;
3489 }
3490
3491 bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1;
3492 bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1;
3493
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003494 std::string lhs_prefix = broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
3495 std::string rhs_prefix = broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003496 std::string op_str = to_string(op);
3497
3498 // Broadcasting on Y is automatic
3499 for(int32_t y = 0; y < dst_h; ++y)
3500 {
3501 _data->code += dst->vector(y).str;
3502 _data->code += " = ";
3503 _data->code += lhs_prefix + lhs->vector(y).str;
3504 _data->code += " ";
3505 _data->code += op_str;
3506 _data->code += " ";
3507 _data->code += rhs_prefix + rhs->vector(y).str;
3508 _data->code += ";\n";
3509 }
3510 };
3511
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003512 void op_cast_expression(const Operand &o_dst, const Operand &o_src, ConvertPolicy policy) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003513 {
3514 CKW_UNUSED(policy);
3515
3516 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003517 auto src = operands.unpack(o_src);
3518 auto dst = operands.unpack(o_dst);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003519
3520 // const int32_t dst_w = dst->format().w;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003521 const int32_t dst_h = dst->format().h;
3522 const std::string dt = dst->scalar(0, 0).type.str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003523
3524 // Broadcasting on Y is automatic
3525 for(int32_t y = 0; y < dst_h; ++y)
3526 {
3527 _data->code += dst->vector(y).str;
3528 _data->code += " = convert_" + dt + "(";
3529 _data->code += src->vector(y).str;
3530 _data->code += ");\n";
3531 }
3532 };
3533
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003534 void op_assign(const Operand &dst_name, const Operand &src_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003535 {
3536 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003537 auto src = operands.unpack(src_name);
3538 auto dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003539
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003540 const int32_t dst_w = dst->format().w;
3541 const int32_t dst_h = dst->format().h;
3542 const int32_t src_w = src->format().w;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003543 // const int32_t src_h = src->format().h;
3544 const std::string dt = dst->scalar(0, 0).type.str;
3545
3546 bool broadcast_src_x = dst_w != 1 && src_w == 1;
3547
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003548 std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003549
3550 // Broadcasting on Y is automatic
3551 for(int32_t y = 0; y < dst_h; ++y)
3552 {
3553 _data->code += dst->vector(y).str;
3554 _data->code += " = ";
3555 _data->code += src_prefix + src->vector(y).str;
3556 _data->code += ";\n";
3557 }
3558 }
3559
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003560 void
3561 op_scalar_function(const Operand &dst_name, const Operand &src_name, ScalarUnaryFunction func) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003562 {
3563 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003564 auto src = operands.unpack(src_name);
3565 auto dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003566
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003567 const int32_t dst_w = dst->format().w;
3568 const int32_t dst_h = dst->format().h;
3569 const int32_t src_w = src->format().w;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003570 // const int32_t src_h = src->format().h;
3571 const std::string dt = dst->scalar(0, 0).type.str;
3572
3573 bool broadcast_src_x = dst_w != 1 && src_w == 1;
3574
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003575 std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003576
3577 // Broadcasting on Y is automatic
3578 for(int32_t y = 0; y < dst_h; ++y)
3579 {
3580 _data->code += dst->vector(y).str;
3581 _data->code += " = ";
3582
3583 switch(func)
3584 {
3585 case ScalarUnaryFunction::Exp:
3586 _data->code += "exp(";
3587 break;
3588
3589 default:
3590 CKW_ASSERT(false);
3591 }
3592
3593 _data->code += src_prefix + src->vector(y).str;
3594 _data->code += ");\n";
3595 }
3596 }
3597
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003598 void op_if(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003599 {
3600 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003601 auto lhs = operands.unpack(o_lhs);
3602 auto rhs = operands.unpack(o_rhs);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003603
3604 assert(is_tile_scalar(lhs));
3605 assert(is_tile_scalar(rhs));
3606
3607 _data->code += "if(";
3608 _data->code += lhs->scalar(0, 0).str;
3609 _data->code += " ";
3610 _data->code += to_string(op);
3611 _data->code += " ";
3612 _data->code += rhs->scalar(0, 0).str;
3613 _data->code += ")\n";
3614 }
3615
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003616 void op_for_loop(const Operand &var_name, BinaryOp cond_op, const Operand &cond_value_name,
3617 AssignmentOp update_op, const Operand &update_value_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003618 {
3619 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003620 auto var = operands.unpack(var_name);
3621 auto cond_value = operands.unpack(cond_value_name);
3622 auto update_value = operands.unpack(update_value_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003623
3624 const int32_t dst_w = var->format().w;
3625 const int32_t dst_h = var->format().h;
3626
3627 // It must be a scalar variable
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01003628 CKW_UNUSED(dst_w, dst_h);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003629 assert(dst_w == 1);
3630 assert(dst_h == 1);
3631
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003632 _data->code += "for(; ";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003633 _data->code += var->scalar(0, 0).str;
3634 _data->code += " ";
3635 _data->code += to_string(cond_op);
3636 _data->code += " " + cond_value->scalar(0, 0).str + "; ";
3637 _data->code += var->scalar(0, 0).str;
3638 _data->code += " ";
3639 _data->code += to_string(update_op);
3640 _data->code += " " + update_value->scalar(0, 0).str + ")";
3641 _data->code += "\n";
3642 }
3643
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003644 void op_load_immediate(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x,
3645 const Operand &o_y, const Operand &o_z, const Operand &o_batch_idx,
3646 const Operand &dilation_y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003647 {
3648 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003649 auto dst = operands.unpack(o_dst);
3650 auto x = operands.unpack(o_x);
3651 auto y = operands.unpack(o_y);
3652 auto z = operands.unpack(o_z);
3653 auto dil_y = operands.unpack(dilation_y);
3654 auto b = operands.unpack(o_batch_idx);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003655
3656 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003657 auto tensor = tensor_operands.unpack(o_tensor);
3658 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003659
3660 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3661
3662 auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
3663
3664 // Initialize the constant part
3665 load_writer->initialize(dst, x, z, b);
3666
3667 for(int i = 0; i < dst->format().h; ++i)
3668 {
3669 std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i);
3670 if(dil_y->scalar(0, 0).str != "1")
3671 {
3672 coord_y += " * " + dil_y->scalar(0, 0).str;
3673 }
3674 load_writer->write(std::make_pair(i, coord_y));
3675 }
3676
3677 load_writer->finalize();
3678 }
3679
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003680 void op_load_indirect(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x,
3681 const Operand &o_indirect_h, const Operand &o_z,
3682 const Operand &o_batch_idx) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003683 {
3684 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003685 auto dst = operands.unpack(o_dst);
3686 auto x = operands.unpack(o_x);
3687 auto y_ind = operands.unpack(o_indirect_h);
3688 auto z = operands.unpack(o_z);
3689 auto b = operands.unpack(o_batch_idx);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003690
3691 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003692 auto tensor = tensor_operands.unpack(o_tensor);
3693 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003694
3695 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3696
3697 auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
3698
3699 // Initialize the constant part
3700 load_writer->initialize(dst, x, z, b);
3701
3702 for(int i = 0; i < dst->format().h; ++i)
3703 {
3704 load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str));
3705 }
3706
3707 load_writer->finalize();
3708 }
3709
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003710 void op_store_immediate(const TensorOperand &tensor_name, const Operand &src_name, const Operand &x_name,
3711 const Operand &y_name, const Operand &z_name,
3712 const Operand &batch_index_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003713 {
3714 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003715 auto src = operands.unpack(src_name);
3716 auto x = operands.unpack(x_name);
3717 auto y = operands.unpack(y_name);
3718 auto z = operands.unpack(z_name);
3719 auto b = operands.unpack(batch_index_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003720
3721 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003722 auto tensor = tensor_operands.unpack(tensor_name);
3723 auto gpu_sampler = tensor_name.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003724
3725 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3726
3727 auto store_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Store);
3728
3729 // Initialize the constant part
3730 store_writer->initialize(src, x, z, b);
3731
3732 int32_t tile_h = src->format().h;
3733
3734 for(int m0 = tile_h - 1; m0 >= 0; m0--)
3735 {
3736 store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0)));
3737 }
3738
3739 store_writer->finalize();
3740 }
3741
3742 void op_return() override
3743 {
3744 _data->code += "return;\n";
3745 }
3746
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003747 void util_get_indirect_buffer(const Operand &o_dst, const TensorOperand &o_tensor, const Operand &o_x,
3748 const Operand &o_y, const Operand &o_x_off, const Operand &o_y_off) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003749 {
3750 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003751 auto dst = operands.unpack(o_dst);
3752 auto x = operands.unpack(o_x);
3753 auto y = operands.unpack(o_y);
3754 auto x_off = operands.unpack(o_x_off);
3755 auto y_off = operands.unpack(o_y_off);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003756
3757 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003758 auto tensor = tensor_operands.unpack(o_tensor);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003759
3760 assert(dst->format().w == 1);
3761 assert(x->format().w == 1);
3762 assert(y->format().w == 1);
3763 assert(x_off->format().w == 1);
3764 assert(y_off->format().w == 1);
3765 assert(dst->format().dt == DataType::Int32);
3766 assert(x->format().dt == DataType::Int32);
3767 assert(y->format().dt == DataType::Int32);
3768 assert(x_off->format().dt == DataType::Int32);
3769 assert(y_off->format().dt == DataType::Int32);
3770
3771 const std::string width = tensor->component(TensorComponent::W);
3772 const std::string height = tensor->component(TensorComponent::H);
3773 const std::string wxh = tensor->component(TensorComponent::WxH);
3774 /*
3775 int x_s;
3776 int y_s;
3777 x_s = (xi_0 + x_k);
3778 y_s = (yi_0 + y_k);
3779 mi_0 = x_s + y_s * width + b * widthxheight;
3780 mi_0 = select(-1, mi_0, x_s >= 0);
3781 mi_0 = select(-1, mi_0, y_s >= 0);
3782 mi_0 = select(-1, mi_0, x_s < 128);
3783 mi_0 = select(-1, mi_0, y_s < 128);
3784 */
3785 compound_statement_begin();
3786 declare_tile("_x_s", TileInfo(DataType::Int32));
3787 declare_tile("_y_s", TileInfo(DataType::Int32));
3788 auto x_s = operands.unpack(Operand("_x_s"));
3789 auto y_s = operands.unpack(Operand("_y_s"));
3790 for(int i = 0; i < dst->format().h; ++i)
3791 {
3792 // x_s = (xi_0 + x_k);
3793 // y_s = (yi_0 + y_k);
3794 _data->code += x_s->scalar(0, i).str;
3795 _data->code += " = (";
3796 _data->code += x->scalar(0, i).str;
3797 _data->code += " + ";
3798 _data->code += x_off->scalar(0, i).str;
3799 _data->code += ");\n";
3800 _data->code += y_s->scalar(0, i).str;
3801 _data->code += " = (";
3802 _data->code += y->scalar(0, i).str;
3803 _data->code += " + ";
3804 _data->code += y_off->scalar(0, i).str;
3805 _data->code += ");\n";
3806 // mi_0 = x_s + y_s * width;
3807 _data->code += dst->scalar(0, i).str;
3808 _data->code += " = ";
3809 _data->code += x_s->scalar(0, i).str;
3810 _data->code += " + ";
3811 _data->code += y_s->scalar(0, i).str;
3812 _data->code += " * " + width + ";\n";
3813 // mi_0 = select(wxh, mi_0, x_s >= 0);
3814 _data->code += dst->scalar(0, i).str;
3815 _data->code += " = select(-1, ";
3816 _data->code += dst->scalar(0, i).str;
3817 _data->code += ", ";
3818 _data->code += x_s->scalar(0, i).str;
3819 _data->code += " >= 0);\n";
3820 // mi_0 = select(wxh, mi_0, y_s >= 0);
3821 _data->code += dst->scalar(0, i).str;
3822 _data->code += " = select(-1, ";
3823 _data->code += dst->scalar(0, i).str;
3824 _data->code += ", ";
3825 _data->code += y_s->scalar(0, i).str;
3826 _data->code += " >= 0);\n";
3827 // mi_0 = select(wxh, mi_0, x_s < width);
3828 _data->code += dst->scalar(0, i).str;
3829 _data->code += " = select(-1, ";
3830 _data->code += dst->scalar(0, i).str;
3831 _data->code += ", ";
3832 _data->code += x_s->scalar(0, i).str;
3833 _data->code += " < ";
3834 _data->code += width + ");\n";
3835 // mi_0 = select(wxh, mi_0, y_s < height);
3836 _data->code += dst->scalar(0, i).str;
3837 _data->code += " = select(-1, ";
3838 _data->code += dst->scalar(0, i).str;
3839 _data->code += ", ";
3840 _data->code += y_s->scalar(0, i).str;
3841 _data->code += " < ";
3842 _data->code += height + ");\n";
3843 }
3844 compound_statement_end();
3845 }
3846
3847private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003848 GpuKernelWriterDataHolder *_data{ nullptr };
3849 GpuKernelWriterAttribute *_attr{ nullptr };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003850};
3851
3852/** IGpuKernelWriter factory class */
3853class GpuKernelWriterFactory final
3854{
3855public:
3856 /** Static method to call the IGpuKernelWriter class accordingly with the Gpu programming language
3857 *
3858 * @param[in] gpu GPU target
3859 *
3860 * @return IGpuKernelWriter
3861 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003862 static std::unique_ptr<IGpuKernelWriter>
3863 create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003864 {
3865 switch(x->programming_language())
3866 {
3867 case GpuTargetLanguage::OpenCL:
3868 return std::make_unique<ClKernelWriter>(attr, x);
3869 default:
3870 std::cout << "Unsupported Gpu programming language" << std::endl;
3871 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01003872 return nullptr;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003873 }
3874 }
3875};
3876
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003877inline int32_t
3878adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003879{
3880 auto tensor = tensor_info_id->shape;
3881
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003882 int32_t dim[3] = { 0 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003883
3884 switch(tensor_format)
3885 {
3886 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003887 dim[0] = tensor[0];
3888 dim[1] = tensor[1];
3889 dim[2] = tensor[2];
3890 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003891 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003892 dim[0] = tensor[0];
3893 dim[1] = tensor[1] * tensor[2];
3894 dim[2] = 1;
3895 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003896 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003897 std::cout << "Unsupported tensor format" << std::endl;
3898 assert(false);
3899 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003900 }
3901
3902 return std::min(step, dim[idx]);
3903}
3904
3905} // namespace prototype
3906} // namespace ckw
3907
Viet-Hoa Doce3c48c2023-07-03 13:44:43 +01003908#endif // CKW_PROTOTYPE_SRC_PROTOTYPE_H