blob: 18f284b2b1fbf63152d8271c829efccb97f4e74c [file] [log] [blame]
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
Viet-Hoa Doce3c48c2023-07-03 13:44:43 +010025#ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H
26#define CKW_PROTOTYPE_SRC_PROTOTYPE_H
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010027
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010028#include <algorithm>
29#include <array>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010030#include <cassert> // assert (to be removed)
31#include <chrono>
32#include <cmath>
33#include <cstdint> // int32_t
Nikolaj Jensen5ff48022023-06-27 14:13:24 +010034#include <functional>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010035#include <iostream> // cout (to be removed)
36#include <map>
37#include <memory>
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010038#include <stdexcept>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010039#include <string>
40#include <unordered_map>
41#include <vector>
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010042
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010043#include "ckw/Error.h"
Nikolaj Jensenacea4072023-07-03 09:44:42 +010044#include "ckw/TensorInfo.h"
Nikolaj Jensen5ff48022023-06-27 14:13:24 +010045#include "ckw/types/ConvertPolicy.h"
46#include "ckw/types/DataType.h"
47#include "ckw/types/Functions.h"
48#include "ckw/types/GpuTargetLanguage.h"
49#include "ckw/types/Operators.h"
50#include "ckw/types/TensorSamplerTypes.h"
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010051
52namespace ckw
53{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010054namespace prototype
55{
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010056
57// Dummy data structure for Size2D
58using Size2D = std::vector<int32_t>;
59
60// Dummy Status
61using Status = void;
62
63enum class ComponentType : int32_t
64{
65 Complex = 0,
66 Simple = 1,
67 Unfusable = 2
68};
69
70enum class GpuCompilationSpeed
71{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010072 Fast = 0x00, // fast compilation may increase the latency of the network
73 Slow = 0x01 // slow compilation may decrease the latency of the network
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010074};
75
76enum class GpuExtensions
77{
78 Fp16,
79 Dot8,
80 Mmul,
81 FastMath
82};
83
84struct TensorInfo
85{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010086 TensorShape shape{ { 0 } };
87 DataType data_type{ DataType::Unknown };
88 TensorDataLayout data_layout{ TensorDataLayout::Nhwc };
89 int32_t id{ -1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010090};
91
92struct ComponentAttribute
93{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010094 GpuCompilationSpeed compilation_speed{ GpuCompilationSpeed::Fast };
95 bool overwrite_tile{ true };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010096};
97
98inline std::string data_type_to_cl_type(DataType dt)
99{
100 switch(dt)
101 {
102 case DataType::Fp32:
103 return "float";
104 case DataType::Fp16:
105 return "half";
106 case DataType::Int8:
107 return "char";
108 case DataType::Uint8:
109 return "uchar";
110 case DataType::Uint16:
111 return "ushort";
112 case DataType::Int16:
113 return "short";
114 case DataType::Uint32:
115 return "uint";
116 case DataType::Int32:
117 return "int";
118 case DataType::Bool:
119 return "bool";
120 default:
121 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100122 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100123 }
124}
125
126inline int32_t width_to_cl_vector_size(int32_t width)
127{
128 switch(width)
129 {
130 case 1:
131 return 1;
132 case 2:
133 return 2;
134 case 3:
135 return 3;
136 case 4:
137 return 4;
138 case 5:
139 case 6:
140 case 7:
141 case 8:
142 return 8;
143 case 9:
144 case 10:
145 case 11:
146 case 12:
147 case 13:
148 case 14:
149 case 15:
150 case 16:
151 return 16;
152 default:
153 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100154 return 0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100155 }
156}
157
158inline std::string get_cl_data_type(DataType dt, int32_t width)
159{
160 std::string data_type;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100161 int32_t w = width_to_cl_vector_size(width);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100162 data_type += data_type_to_cl_type(dt);
163 if(w != 1)
164 {
165 data_type += std::to_string(w);
166 }
167 return data_type;
168}
169
170inline std::string to_opencl_store(int32_t vector_length)
171{
172 if(vector_length != 1)
173 {
174 return "vstore" + std::to_string(vector_length) + "(";
175 }
176 else
177 {
178 return "*(";
179 }
180}
181
182struct TileInfo
183{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100184 TileInfo()
185 {
186 }
187
188 TileInfo(DataType dt)
189 : dt(dt), w(1), h(1)
190 {
191 }
192
193 TileInfo(DataType dt, int32_t width)
194 : dt(dt), w(width), h(1)
195 {
196 }
197
198 TileInfo(DataType dt, int32_t width, int32_t height)
199 : dt(dt), w(width), h(height)
200 {
201 }
202
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100203 DataType dt{ DataType::Unknown }; // Data type of the tile
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100204 int32_t w{ 0 }; // Width (i.e. c0 - portion of the channels)
205 int32_t h{ 0 }; // Height (i.e. s0 - portion of the spatial dimensions)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100206};
207
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100208inline std::ostream &operator<<(std::ostream &o, const TileInfo &a)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100209{
210 o << a.w << " x " << a.h;
211 return o;
212}
213
214struct DataTypeAsString
215{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100216 std::string str{ "" };
217 DataType dt{ DataType::Unknown };
218 int32_t size{ 1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100219};
220
221struct ValueAsString
222{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100223 std::string str{ "" };
224 DataTypeAsString type{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100225};
226
227// https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c
228// A Tile is a collection of variables used to express a 2D data.
229class IScalarTile
230{
231public:
232 virtual ~IScalarTile() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100233
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100234 /** Method to get the scalar variable from a tile
235 * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
236 * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
237 *
238 * @return the scalar variable as a string
239 */
240 virtual ValueAsString scalar(int32_t x, int32_t y) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100241
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100242 /** Method to get the list of underlying variable names used by the tile
243 *
244 * @return the list of variable names
245 */
246 virtual std::vector<ValueAsString> underlying_source_variables() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100247
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100248 /** Method to get the name of the tile.
249 *
250 * @return the name of the tile
251 */
252 std::string name() const
253 {
254 return _basename;
255 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100256
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100257 /** Method to get the tile format
258 *
259 * @return the format
260 */
261 TileInfo format() const
262 {
263 return _format;
264 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100265
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100266 /** Method to know whether the tile is assignable or not (constant)
267 *
268 * @return true if the tile is assignable
269 */
270 virtual bool is_assignable() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100271
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100272 /** Method to know whether the tile needs to be declared
273 *
274 * @return true if the tile needs to be declared in the code before being used
275 */
276 virtual bool need_declaration() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100277
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100278protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100279 TileInfo _format{}; // Tile format
280 std::string _basename{ "" }; // Tile name
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100281};
282
283// A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context.
284// The vector size is given by the width of the tile. The number of vectors height by depth defines the number of vectors
285class IVectorTile : public IScalarTile
286{
287public:
288 virtual ~IVectorTile() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100289
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100290 /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
291 * The user can query the list of supported width for the vectors through preferred_vector_sizes().
292 *
293 * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
294 *
295 * @return the vector variable as a string
296 */
297 virtual ValueAsString vector(int32_t y) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100298
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100299 /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
300 *
301 * @return the vector variable as a string
302 */
303 virtual ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const = 0;
304 /** Method to get the preferred vector sizes.
305 *
306 * @return a vector with the preferred vector sizes
307 */
308 //virtual std::vector<int32_t> preferred_vector_sizes() const = 0;
309};
310
311class ClTile : public IVectorTile
312{
313public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100314 ClTile(const std::string &name, TileInfo format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100315 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100316 _format = format;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100317 _basename = name;
318 }
319
320 ValueAsString scalar(int32_t x, int32_t y) const override
321 {
322 x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
323 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
324
325 ValueAsString t;
326 t.str = build_variable_name(y);
327 t.type.str = get_cl_data_type(_format.dt, 1);
328 t.type.dt = _format.dt;
329 t.type.size = 1;
330
331 // Check required because if the width has only one element, we cannot use .s0
332 if(_format.w != 1)
333 {
334 // Automatic broadcasting
335 t.str += ".s" + std::to_string(x);
336 }
337
338 return t;
339 }
340
341 ValueAsString vector(int32_t y) const override
342 {
343 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
344
345 ValueAsString t;
346 t.str = build_variable_name(y);
347 t.type.str = get_cl_data_type(_format.dt, _format.w);
348 t.type.dt = _format.dt;
349 t.type.size = _format.w;
350 return t;
351 }
352
353 ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
354 {
355 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
356
357 ValueAsString t;
358 t.str = build_variable_name(y);
359 t.type.str = get_cl_data_type(_format.dt, width);
360 t.type.dt = _format.dt;
361 t.type.size = width;
362
363 if(_format.w != 1)
364 {
365 t.str += ".s";
366 for(int i = 0; i < width; ++i)
367 {
368 t.str += to_scalar_hex(x_start + i);
369 }
370 }
371 return t;
372 }
373
374 std::vector<ValueAsString> underlying_source_variables() const override
375 {
376 std::vector<ValueAsString> vars;
377 for(int32_t y = 0; y < _format.h; ++y)
378 {
379 ValueAsString t;
380 t.str = build_variable_name(y);
381 t.type.str = get_cl_data_type(_format.dt, _format.w);
382 t.type.dt = _format.dt;
383 t.type.size = _format.w;
384 vars.push_back(t);
385 }
386 return vars;
387 }
388
389 bool is_assignable() const override
390 {
391 return true;
392 }
393
394 bool need_declaration() const override
395 {
396 return true;
397 }
398
399private:
400 std::string build_variable_name(int32_t y) const
401 {
402 std::string var_name = _basename;
403
404 if(_format.h == 1)
405 {
406 return var_name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100407 }
408 else
409 {
410 var_name += "_";
411 var_name += std::to_string(y);
412 }
413
414 return var_name;
415 }
416
417 std::string to_scalar_hex(int32_t x) const
418 {
419 switch(x)
420 {
421 case 0:
422 case 1:
423 case 2:
424 case 3:
425 case 4:
426 case 5:
427 case 6:
428 case 7:
429 case 8:
430 case 9:
431 return std::to_string(x);
432 case 10:
433 return "A";
434 case 11:
435 return "B";
436 case 12:
437 return "C";
438 case 13:
439 return "D";
440 case 14:
441 return "E";
442 case 15:
443 return "F";
444 default:
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100445 std::cout << "Unsupported hexadecimal value" << std::endl;
446 assert(false);
447 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100448 }
449 }
450};
451
452// Unique features: It contains values in the form of string. The name used for this object is misleading since the variables can change the value over time.
453class ClConstantTile : public IVectorTile
454{
455public:
456 ClConstantTile(const std::vector<std::vector<std::string>> &in, DataType dt)
457 {
458 _format.w = in[0].size();
459 _format.h = in.size();
460 _format.dt = dt;
461
462 _data = std::vector<std::vector<std::string>>(_format.h, std::vector<std::string>(_format.w));
463
464 for(int32_t y = 0; y < _format.h; ++y)
465 {
466 for(int32_t x = 0; x < _format.w; ++x)
467 {
468 _data[y][x] = in[y][x];
469 }
470 }
471 }
472
473 ValueAsString scalar(int32_t x, int32_t y) const override
474 {
475 x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
476 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
477
478 ValueAsString t;
479 t.str = _data[y][x];
480 t.type.str = get_cl_data_type(_format.dt, 1);
481 t.type.dt = _format.dt;
482 t.type.size = 1;
483
484 return t;
485 }
486
487 ValueAsString vector(int32_t y) const override
488 {
489 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
490
491 return vector(0, _format.w, y);
492 }
493
494 ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
495 {
496 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
497
498 ValueAsString t;
499 t.str = "";
500 t.type.str = get_cl_data_type(_format.dt, width);
501 t.type.dt = _format.dt;
502 t.type.size = width;
503
504 if(width > 1)
505 {
506 t.str += "((" + get_cl_data_type(_format.dt, width) + ")(";
507 }
508
509 int32_t x = x_start;
510 for(; x < width - 1; ++x)
511 {
512 t.str += scalar(x, y).str;
513 t.str += ", ";
514 }
515 t.str += scalar(x, y).str;
516
517 if(width > 1)
518 {
519 t.str += "))";
520 }
521
522 return t;
523 }
524
525 std::vector<ValueAsString> underlying_source_variables() const override
526 {
527 std::vector<ValueAsString> vars;
528
529 for(int32_t y = 0; y < _format.h; ++y)
530 {
531 for(int32_t x = 0; x < _format.w; ++x)
532 {
533 ValueAsString t;
534 t.str = _data[y][x];
535 t.type.str = get_cl_data_type(_format.dt, 1);
536 t.type.dt = _format.dt;
537 t.type.size = 1;
538 vars.push_back(t);
539 }
540 }
541
542 return vars;
543 }
544
545 bool is_assignable() const override
546 {
547 return false;
548 }
549
550 bool need_declaration() const override
551 {
552 return false;
553 }
554
555private:
556 std::vector<std::vector<std::string>> _data{};
557};
558
559enum class TensorComponentIndex : int32_t
560{
561 IndexMask = 0x0000000f,
562};
563
564enum class TensorComponentType : int32_t
565{
566 OffsetFirstElement = 0x00000100,
567 Stride = 0x00001000,
568 Dimension = 0x00010000,
569 FoldedDimension = 0x00100000,
570 Constant = 0x01000000
571};
572
573enum class TensorComponent : int32_t
574{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100575 Unknown = 0x00000000,
576 OffsetFirstElement = 0x00000100,
577 Stride1 = 0x00001001,
578 Stride2 = 0x00001002,
579 Stride3 = 0x00001003,
580 Stride4 = 0x00001004,
581 Dim0 = 0x00010000,
582 Dim1 = 0x00010001,
583 Dim2 = 0x00010002,
584 Dim3 = 0x00010003,
585 Dim4 = 0x00010004,
586 C = 0x00010000, // Dim0
587 W = 0x00010001, // Dim1
588 H = 0x00010002, // Dim2
589 D = 0x00010003,
590 N = 0x00010004,
591 Dim1xDim2 = 0x00100021,
592 Dim1xDim2xDim3 = 0x00100321,
593 WxH = 0x00100021,
594 WxHxD = 0x00100321
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100595};
596
597inline std::string to_string(TensorComponent x)
598{
599 switch(x)
600 {
601 case TensorComponent::Unknown:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100602 return "Unknown";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100603 case TensorComponent::OffsetFirstElement:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100604 return "OffsetFirstElement";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100605 case TensorComponent::Stride1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100606 return "Stride1";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100607 case TensorComponent::Stride2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100608 return "Stride2";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100609 case TensorComponent::Stride3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100610 return "Stride3";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100611 case TensorComponent::Stride4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100612 return "Stride4";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100613 case TensorComponent::Dim0:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100614 return "Dim0";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100615 case TensorComponent::Dim1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100616 return "Dim1";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100617 case TensorComponent::Dim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100618 return "Dim2";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100619 case TensorComponent::Dim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100620 return "Dim3";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100621 case TensorComponent::Dim4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100622 return "Dim4";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100623 case TensorComponent::Dim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100624 return "Dim1xDim2";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100625 case TensorComponent::Dim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100626 return "Dim1xDim2xDim3";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100627 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100628 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100629 }
630}
631
632class ITensorArgument
633{
634public:
635 virtual ~ITensorArgument() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100636
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100637 /** Method to get the tensor component as a string
638 *
639 * @param[in] x tensor component to query
640 *
641 * @return the tensor component as a string
642 */
643 virtual std::string component(TensorComponent x) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100644
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100645 /** Method to get the tensor component type declaration as a string
646 *
647 * @return the tensor component type declaration as a string
648 */
649 virtual std::string component_type_declaration() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100650
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100651 /** Method to get the tensor component data type
652 *
653 * @return the tensor component data type
654 */
655 virtual DataType component_data_type() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100656
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100657 /** Method to get the tensor component declarations
658 *
659 * @return a vector containing the tensor component declarations
660 */
661 virtual std::vector<TensorComponent> component_declarations() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100662
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100663 /** Method to get the name of the tensor argument.
664 *
665 * @return the name of the tensor argument
666 */
667 std::string name() const
668 {
669 return _basename;
670 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100671
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100672 /** Method to get the tensor format
673 *
674 * @return the format
675 */
676 TensorInfo format() const
677 {
678 return _format;
679 }
680
681protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100682 TensorInfo _format{};
683 std::string _basename{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100684};
685
686enum class GpuTensorStorage : int32_t
687{
688 Unknown = 0x0000,
689 BufferUint8Ptr = 0x0012,
690 Image2dReadOnly = 0x0020,
691 Image2dWriteOnly = 0x0021,
692 Image3dReadOnly = 0x0030,
693 Image3dWriteOnly = 0x0031
694};
695
696class IGpuTensorArgument : public ITensorArgument
697{
698public:
699 virtual ~IGpuTensorArgument() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100700
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100701 /** Method to get the tensor storage, which is the underlying storage used to keep the data memory
702 *
703 * @param[in] x tensor storage to query
704 *
705 * @return the tensor storage as a string
706 */
707 virtual std::string storage(GpuTensorStorage x) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100708
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100709 /** Method to get the tensor storage type declaration as a string
710 *
711 * @param[in] x tensor component to query
712 *
713 * @return the tensor storage type declaration as a string
714 */
715 virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100716
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100717 /** Method to get the tensor storage declarations
718 *
719 * @return a vector containing the tensor storage declarations
720 */
721 virtual std::vector<GpuTensorStorage> storage_declarations() const = 0;
722};
723
724class ClTensorArgument : public IGpuTensorArgument
725{
726public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100727 ClTensorArgument(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100728 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100729 _basename = name;
730 _format = x;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100731 _return_by_value_when_possible = return_by_value_when_possible;
732 }
733
734 // Methods to override
735 std::string component(TensorComponent x) override
736 {
737 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentType::Constant)))
738 {
739 int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
740 return std::to_string(idx - 1);
741 }
742
743 if(_return_by_value_when_possible)
744 {
745 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentType::Dimension)))
746 {
747 int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
748 return std::to_string(_format.shape[idx]);
749 }
750
751 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentType::FoldedDimension)))
752 {
753 switch(x)
754 {
755 case TensorComponent::Dim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100756 return std::to_string(_format.shape[1] * _format.shape[2]);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100757 case TensorComponent::Dim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100758 return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100759 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100760 std::cout << "Unsupported folded dimension" << std::endl;
761 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100762 }
763 }
764 }
765
766 if(std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end())
767 {
768 _components_required.push_back(x);
769 }
770
771 return build_component_name(x);
772 }
773
774 std::string component_type_declaration() const override
775 {
776 return "int";
777 };
778
779 DataType component_data_type() const override
780 {
781 return DataType::Int32;
782 }
783
784 std::string storage(GpuTensorStorage x) override
785 {
786 if(std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end())
787 {
788 _storage_required.push_back(x);
789 }
790
791 return build_storage_name(x);
792 }
793
794 std::string storage_type_declaration(GpuTensorStorage x) const override
795 {
796 switch(x)
797 {
798 case GpuTensorStorage::BufferUint8Ptr:
799 return "__global uchar*";
800 case GpuTensorStorage::Image2dReadOnly:
801 return "__read_only image2d_t";
802 case GpuTensorStorage::Image2dWriteOnly:
803 return "__write_only image2d_t";
804 case GpuTensorStorage::Image3dReadOnly:
805 return "__read_only image3d_t ";
806 case GpuTensorStorage::Image3dWriteOnly:
807 return "__write_only image3d_t ";
808 default:
809 std::cout << "Unsupported storage" << std::endl;
810 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100811 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100812 }
813 };
814
815 std::vector<GpuTensorStorage> storage_declarations() const override
816 {
817 return _storage_required;
818 }
819
820 std::vector<TensorComponent> component_declarations() const override
821 {
822 return _components_required;
823 }
824
825private:
826 std::string build_storage_name(GpuTensorStorage x) const
827 {
828 std::string var_name = _basename;
829
830 switch(x)
831 {
832 case GpuTensorStorage::BufferUint8Ptr:
833 return var_name + "_ptr";
834 case GpuTensorStorage::Image2dReadOnly:
835 case GpuTensorStorage::Image2dWriteOnly:
836 return var_name + "_img2d";
837 case GpuTensorStorage::Image3dReadOnly:
838 case GpuTensorStorage::Image3dWriteOnly:
839 return var_name + "_img3d";
840 default:
841 std::cout << "Unsupported storage" << std::endl;
842 assert(false);
843 }
844
845 return var_name;
846 }
847
848 std::string build_component_name(TensorComponent x) const
849 {
850 std::string var_name = _basename;
851
852 switch(x)
853 {
854 case TensorComponent::OffsetFirstElement:
855 return var_name + "_offset_first_element";
856 case TensorComponent::Stride1:
857 return var_name + "_stride1";
858 case TensorComponent::Stride2:
859 return var_name + "_stride2";
860 case TensorComponent::Stride3:
861 return var_name + "_stride3";
862 case TensorComponent::Dim0:
863 return var_name + "_dim0";
864 case TensorComponent::Dim1:
865 return var_name + "_dim1";
866 case TensorComponent::Dim2:
867 return var_name + "_dim2";
868 case TensorComponent::Dim3:
869 return var_name + "_dim3";
870 case TensorComponent::Dim1xDim2:
871 return var_name + "_dim1xdim2";
872 case TensorComponent::Dim1xDim2xDim3:
873 return var_name + "_dim1xdim2xdim3";
874 default:
875 std::cout << "Unsupported component" << std::endl;
876 assert(false);
877 }
878
879 return var_name;
880 }
881
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100882 bool _return_by_value_when_possible{ false };
883 std::vector<GpuTensorStorage> _storage_required{};
884 std::vector<TensorComponent> _components_required{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100885};
886
887/**
888 * @brief Data structure that contains the declared tiles by the components.
889 * The registry is a linear data structure that follows the similar principle of the stack. The user can use the @p increment_registry_level() method to
890 * increase the level of the stack (0 when it starts). When the user uses the @p decrement_registry_level() method, the registry decreases the level of the stack
891 * and remove (pop) all the tiles from the level above.
892 * When a tile is declared on the level 0, it is a global tile. A global tile is visible in all parts of the code.
893 * Since different components may use the same name to define a tile, the registry adopts the IdSpace concept, an @p id to prevent name collisions
894 * when declaring tiles among different components.
895 *
896 */
897class GpuTileRegistry
898{
899public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100900 enum class RegistryTileType
901 {
902 Tile,
903 Link
904 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100905
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100906 using RegistryIdSpace = int32_t;
907 using RegistryLevel = int32_t;
908 using RegistryTileName = std::string;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100909
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100910 struct RegistryTileTableEntry
911 {
912 RegistryLevel registry_level{ 0 };
913 std::unique_ptr<IVectorTile> tile_object{ nullptr };
914 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100915
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100916 struct RegistryTileTypeTableEntry
917 {
918 RegistryTileType tile_type{ RegistryTileType::Tile };
919 RegistryTileName tile_name{};
920 RegistryIdSpace registry_idspace{ 0 };
921 RegistryLevel registry_level{ 0 };
922 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100923
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100924 using RegistryTileTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTableEntry>>;
925 using RegistryTileTypeTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTypeTableEntry>>;
926
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100927 /**
928 * @brief Construct a new Gpu Tile Registry object
929 *
930 */
931 GpuTileRegistry()
932 {
933 _language = GpuTargetLanguage::Unknown;
934 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100935
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100936 /**
937 * @brief Construct a new Gpu Tile Registry object providing the Gpu programming language
938 *
939 * @param[in] language Gpu programming language to use
940 */
941 GpuTileRegistry(GpuTargetLanguage language)
942 {
943 _language = language;
944 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100945
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100946 /**
947 * @brief Default destructor. Destroy the Gpu Tile Registry object
948 *
949 */
950 ~GpuTileRegistry() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100951
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100952 /**
953 * @brief Set the working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles.
954 * Therefore, the IdSpace should be set before declaring any tiles.
955 *
956 * @param[in] id The IdSpace id
957 */
958 void set_IdSpace(int32_t id)
959 {
960 _IdSpace = id;
961 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100962
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100963 /**
964 * @brief Get the current working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles
965 *
966 * @return The IdSpace id
967 */
968 int32_t IdSpace() const
969 {
970 return _IdSpace;
971 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100972
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100973 /**
974 * @brief Gets all the IdSpace declarations defined in the tile registry.
975 *
976 * @return all the IdSpace declarations defined in the tile registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
977 */
978 std::vector<int32_t> IdSpace_declarations() const
979 {
980 std::vector<int32_t> x;
981
982 auto it = _frags.begin();
983
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100984 while(it != _frags.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100985 {
986 x.push_back(it->first);
987
988 it++;
989 }
990
991 return x;
992 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100993
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100994 /**
995 * @brief Declare a tile from a previously created tile
996 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100997 void insert(const std::string &name, const IVectorTile *frag)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100998 {
999 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001000 const int32_t key_IdSpace = _IdSpace;
1001 const std::string key_var_name = name;
1002 const std::string var_name = frag->name();
1003 TileInfo format = frag->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001004
1005 // First check whether a tile with the same name exists
1006 IVectorTile *result = (*this)[key_var_name];
1007 assert(result == nullptr);
1008 if(result == nullptr)
1009 {
1010 std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
1011
1012 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1013 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1014
1015 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Link;
1016 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1017 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1018 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1019 }
1020 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001021
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001022 /**
1023 * @brief Declare a tile with TileInfo. The tile will be stored in the IdSpace set with @p set_IdSpace()
1024 *
1025 * @note The reference name used for declaring the tile should not be previously used in the IdSpace
1026 *
1027 * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
1028 * @param[in] format Tile format use to use
1029 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001030 void insert(const std::string &name, const TileInfo &format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001031 {
1032 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001033 const int32_t key_IdSpace = _IdSpace;
1034 const std::string key_var_name = name;
1035 const std::string var_name = generate_tile_name(name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001036
1037 // First check whether a tile with the same name exists
1038 IVectorTile *result = (*this)[key_var_name];
1039 assert(result == nullptr);
1040 if(result == nullptr)
1041 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001042 std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001043 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1044 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1045
1046 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1047 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1048 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1049 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1050 }
1051 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001052
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001053 /**
1054 * @brief Declare a constant tile. The content of the tile is passed as a vector of std::string
1055 *
1056 * @note The reference name used for declaring the tile should not be previously used in the IdSpace
1057 *
1058 * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
1059 * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
1060 * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user's responsibilty to ensure
1061 * that the data type is aligned with the content of the std::string.
1062 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001063 void insert(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001064 {
1065 assert(_language == GpuTargetLanguage::OpenCL);
1066 const int32_t key_IdSpace = _IdSpace;
1067 const std::string key_var_name = name;
1068
1069 // First check whether a tile with the same name exists
1070 IVectorTile *result = (*this)[key_var_name];
1071 assert(result == nullptr);
1072 if(result == nullptr)
1073 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001074 std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001075 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1076 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1077
1078 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1079 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1080 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1081 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1082 }
1083 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001084
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001085 /**
1086 * @brief Declare an anonymous constant tile. The content of the tile is passed as a vector of std::string
1087 *
1088 * @note This method can be used to declare temporary tiles that need to be accessed only once.
1089 *
1090 * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
1091 * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user responsibilty to ensure
1092 * that the data type is aligned with what passed with the std::string.
1093 *
1094 * @return IVectorTile* the anonymous constant tile
1095 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001096 IVectorTile *insert(const std::vector<std::vector<std::string>> &in, DataType dt)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001097 {
1098 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001099 const int32_t key_IdSpace = _IdSpace;
1100 const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001101
1102 // First check whether a tile with the same name exists
1103 IVectorTile *result = (*this)[key_var_name];
1104 assert(result == nullptr);
1105 if(result == nullptr)
1106 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001107 std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001108 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1109 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1110
1111 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1112 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1113 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1114 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1115 }
1116
1117 return (*this)[key_var_name];
1118 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001119
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001120 /**
1121 * @brief Get the tile from the registry. This method searches the tile in the IdSpace provided by the user
1122 *
1123 * @param[in] name The name of the tile to retrieve
1124 * @param[in] IdSpace The IdSpace id where to search the tile
1125 *
1126 * @return IVectorTile* The tile
1127 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001128 IVectorTile *get(const std::string &name, int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001129 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001130 const int32_t key_IdSpace = IdSpace;
1131 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001132
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001133 IVectorTile *result = nullptr;
1134 auto search_IdSpace = _frags.find(key_IdSpace);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001135 if(search_IdSpace != _frags.end())
1136 {
1137 auto search_tile = _frags[key_IdSpace].find(key_var_name);
1138 if(search_tile != _frags[key_IdSpace].end())
1139 {
1140 result = search_tile->second.tile_object.get();
1141 assert(result != nullptr);
1142 }
1143 }
1144
1145 return result;
1146 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001147
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001148 /**
1149 * @brief Get the tile from the registry. This method searches the tile in the IdSpace set with @p set_IdSpace()
1150 *
1151 * @param[in] name The name of the tile to retrieve
1152 *
1153 * @return IVectorTile* The tile
1154 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001155 IVectorTile *operator[](const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001156 {
1157 return get(name, _IdSpace);
1158 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001159
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001160 /**
1161 * @brief Check whether the tile in the in the IdSpace provided by the user exists
1162 *
1163 * @param[in] name Name of the tile to search for
1164 * @param[in] IdSpace The IdSpace id where to search the tile
1165 *
1166 * @return true if the tile exists
1167 * @return false if the tile does not exist
1168 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001169 bool has_tile(const std::string &name, int32_t IdSpace) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001170 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001171 const int32_t key_IdSpace = IdSpace;
1172 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001173
1174 // IVectorTile* result = nullptr;
1175 auto search_IdSpace = _frags.find(key_IdSpace);
1176
1177 return search_IdSpace != _frags.end();
1178 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001179
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001180 /**
1181 * @brief Check whether the tile within the current IdSpace exists
1182 *
1183 * @param[in] name Name of the tile to search for
1184 *
1185 * @return true if the tile exists
1186 * @return false if the tile does not exist
1187 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001188 bool has_tile(const std::string &name) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001189 {
1190 return has_tile(name, _IdSpace);
1191 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001192
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001193 /**
1194 * @brief Get all the tiles declared within the IdSpace provided by the user
1195 *
1196 * @param[in] IdSpace IdSpace where to retrieve all the declared tiles
1197 *
1198 * @return std::vector<IVectorTile*> A vector with all the declared tiles in the IdSpace provided by the user
1199 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001200 std::vector<IVectorTile *> tile_declarations(int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001201 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001202 std::vector<IVectorTile *> tiles;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001203
1204 std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it = _frag_types[IdSpace].begin();
1205
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001206 while(it != _frag_types[IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001207 {
1208 // The following line should be enabled. However, we cannot at this stage
1209 // because it used to retrieve the output tile produced by each component.
1210 // However, this method should NOT be used to retrieve the output tile
1211 //if(it->second.tile_type == RegistryTileType::Tile)
1212 {
1213 tiles.push_back(get(it->second.tile_name, it->second.registry_idspace));
1214 }
1215 it++;
1216 }
1217
1218 return tiles;
1219 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001220
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001221 /**
1222 * @brief Increase the level of stack.
1223 *
1224 */
1225 void increment_registry_level()
1226 {
1227 _registry_level++;
1228 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001229
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001230 /**
1231 * @brief Remove all the tiles declared at the current stack level and decrease the level of the stack.
1232 *
1233 */
1234 void decrement_registry_level()
1235 {
1236 assert(_registry_level >= 0);
1237
1238 // Remove all variables in the local scope
1239 std::map<RegistryTileName, RegistryTileTableEntry>::iterator it = _frags[_IdSpace].begin();
1240
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001241 while(it != _frags[_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001242 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001243 if(it->second.registry_level == _registry_level)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001244 {
1245 it = _frags[_IdSpace].erase(it);
1246 }
1247 else
1248 {
1249 it++;
1250 }
1251 }
1252
1253 std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it_type = _frag_types[_IdSpace].begin();
1254
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001255 while(it_type != _frag_types[_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001256 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001257 if(it_type->second.registry_level == _registry_level)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001258 {
1259 it_type = _frag_types[_IdSpace].erase(it_type);
1260 }
1261 else
1262 {
1263 it_type++;
1264 }
1265 }
1266
1267 _registry_level--;
1268 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001269
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001270 /**
1271 * @brief Get the level of the stack
1272 *
1273 */
1274 int32_t level() const
1275 {
1276 return _registry_level;
1277 }
1278
1279private:
1280 // This method ensures that the key is unique among different components
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001281 std::string generate_tile_name(const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001282 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001283 assert(_IdSpace >= 0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001284 if(_registry_level == 0)
1285 {
1286 return "_G" + std::to_string(_IdSpace) + "_" + name;
1287 }
1288 else
1289 {
1290 return name;
1291 }
1292 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001293
1294 RegistryTileTable _frags{};
1295 RegistryTileTypeTable _frag_types{};
1296 RegistryLevel _registry_level{ 0 };
1297 RegistryIdSpace _IdSpace{ -1 };
1298 int32_t _anonymous_frag_count{ 0 }; // Counter used to create the anonymous tiles
1299 GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001300};
1301
1302using TensorEntry = std::unique_ptr<IGpuTensorArgument>;
1303
1304/**
1305 * @brief Data structure that contains the tensors consumed by the components.
1306 * Since different components may use the same name as reference for a tensor, the registry adopts the IdSpace concept, an @p id to prevent name collisions
1307 * when declaring tensors among different components.
1308 *
1309 */
1310class GpuTensorArgumentRegistry
1311{
1312public:
1313 /**
1314 * @brief Construct a new Gpu Tensor Registry object
1315 *
1316 */
1317 GpuTensorArgumentRegistry()
1318 {
1319 _language = GpuTargetLanguage::Unknown;
1320 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001321
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001322 /**
1323 * @brief Construct a new Gpu Tensor Registry object
1324 *
1325 * @param[in] language Gpu programming language to use
1326 */
1327 GpuTensorArgumentRegistry(GpuTargetLanguage language)
1328 {
1329 _language = language;
1330 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001331
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001332 /**
1333 * @brief Default destructor. Destroy the Gpu Tensor Registry object
1334 *
1335 */
1336 ~GpuTensorArgumentRegistry() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001337
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001338 /**
1339 * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors.
1340 * Therefore, the IdSpace should be set before declaring any tensors.
1341 *
1342 * @param[in] id The IdSpace id
1343 */
1344 void set_IdSpace(int32_t id)
1345 {
1346 _IdSpace = id;
1347 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001348
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001349 /**
1350 * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors
1351 *
1352 * @return The IdSpace id
1353 */
1354 int32_t IdSpace() const
1355 {
1356 return _IdSpace;
1357 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001358
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001359 /**
1360 * @brief Gets all the IdSpace declarations defined in the tensor registry.
1361 *
1362 * @return all the IdSpace declarations defined in the tensor registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
1363 */
1364 std::vector<int32_t> IdSpace_declarations() const
1365 {
1366 std::vector<int32_t> x;
1367
1368 auto it = _refs.begin();
1369
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001370 while(it != _refs.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001371 {
1372 x.push_back(it->first);
1373
1374 it++;
1375 }
1376
1377 return x;
1378 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001379
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001380 /**
1381 * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace()
1382 *
1383 * @note The reference name used for declaring the tensor should not be previously used in the IdSpace
1384 *
1385 * @param[in] name Reference name for the tensor. The reference name can be used to retrieve the tensor stored in the registry.
1386 * @param[in] x Pair of tensor info and tensor id
1387 * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components
1388 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001389 void insert(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001390 {
1391 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001392 const int32_t key_IdSpace = _IdSpace;
1393 const int32_t tensor_id = x.id;
1394 const std::string key_var_name = name;
1395 const std::string var_name = generate_tensor_name(name, tensor_id);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001396
1397 // First, check whether the tensor has already a reference. If so, trigger an assert
1398 assert(!has_tensor_argument(name));
1399
1400 // Check whether a tensor with that tensorID exists
1401 auto result = _tensor_arguments.find(tensor_id);
1402 if(result == _tensor_arguments.end())
1403 {
1404 // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001405 std::unique_ptr<ClTensorArgument> arg = std::make_unique<ClTensorArgument>(var_name, x,
1406 return_by_value_when_possible);
1407 _tensor_arguments[tensor_id] = std::move(arg);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001408 }
1409
1410 _refs[key_IdSpace][key_var_name] = tensor_id;
1411 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001412
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001413 /**
1414 * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace()
1415 *
1416 * @param[in] name The name of the tensor to retrieve
1417 *
1418 * @return IGpuTensor* The tensor
1419 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001420 IGpuTensorArgument *operator[](const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001421 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001422 const int32_t key_IdSpace = _IdSpace;
1423 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001424
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001425 IGpuTensorArgument *result = nullptr;
1426 auto search_IdSpace = _refs.find(key_IdSpace);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001427 if(search_IdSpace != _refs.end())
1428 {
1429 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1430
1431 if(search_tensor_id != _refs[key_IdSpace].end())
1432 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001433 const int32_t tensor_id = search_tensor_id->second;
1434 auto search_tensor_argument = _tensor_arguments.find(tensor_id);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001435 if(search_tensor_argument != _tensor_arguments.end())
1436 {
1437 result = search_tensor_argument->second.get();
1438 }
1439 assert(result != nullptr);
1440 }
1441 }
1442
1443 return result;
1444 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001445
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001446 /**
1447 * @brief Get all the tensors declared in the IdSpace provided by the user
1448 *
1449 * @return std::vector<IGpuTensorArgument*> A vector with all the declared tensors
1450 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001451 std::vector<IGpuTensorArgument *> tensor_argument_declarations()
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001452 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001453 std::vector<IGpuTensorArgument *> args;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001454
1455 auto it = _tensor_arguments.begin();
1456
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001457 while(it != _tensor_arguments.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001458 {
1459 args.push_back(it->second.get());
1460 it++;
1461 }
1462
1463 return args;
1464 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001465
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001466 /**
1467 * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists
1468 *
1469 * @param[in] name Name of the tensor argument to search for
1470 *
1471 * @return true if the tensor argument exists
1472 * @return false if the tensor argument does not exist
1473 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001474 bool has_tensor_argument(const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001475 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001476 const int32_t key_IdSpace = _IdSpace;
1477 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001478
1479 auto search_IdSpace = _refs.find(key_IdSpace);
1480
1481 if(search_IdSpace != _refs.end())
1482 {
1483 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1484
1485 return search_tensor_id != _refs[key_IdSpace].end();
1486 }
1487 else
1488 {
1489 return false;
1490 }
1491 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001492
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001493 /**
1494 * @brief Check whether the tensor argument is in the the IdSpace provided by the user
1495 *
1496 * @param[in] name Name of the tensor argument to search for
1497 * @param[in] IdSpace The IdSpace id where to search the tensor argument
1498 *
1499 * @return true if the tile exists
1500 * @return false if the tile does not exist
1501 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001502 bool has_tensor_argument(const std::string &name, int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001503 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001504 const int32_t key_IdSpace = IdSpace;
1505 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001506
1507 auto search_IdSpace = _refs.find(key_IdSpace);
1508
1509 if(search_IdSpace != _refs.end())
1510 {
1511 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1512
1513 return search_tensor_id != _refs[key_IdSpace].end();
1514 }
1515 else
1516 {
1517 return false;
1518 }
1519 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001520
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001521private:
1522 // This method ensures that the key is unique among different components
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001523 std::string generate_tensor_name(const std::string &name, int32_t tensor_id)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001524 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001525 assert(tensor_id >= 0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001526
1527 return name + std::to_string(tensor_id);
1528 }
1529
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001530 std::map<int32_t, TensorEntry> _tensor_arguments{};
1531 std::map<int32_t, std::map<std::string, int32_t>> _refs{};
1532 int32_t _IdSpace{ -1 };
1533 GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001534};
1535
1536enum class OpType : int32_t
1537{
1538 Elementwise = 0x0000,
1539 Relational = 0x1000,
1540 Algebra = 0x2000
1541};
1542
1543inline std::string to_string(AssignmentOp op)
1544{
1545 switch(op)
1546 {
1547 case AssignmentOp::Decrement:
1548 return "-=";
1549 case AssignmentOp::Increment:
1550 return "+=";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001551 default:
1552 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001553 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001554 }
1555}
1556
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01001557inline std::string to_string(UnaryOp op)
1558{
1559 switch(op)
1560 {
1561 case UnaryOp::LogicalNot:
1562 return "!";
1563 default:
1564 assert(false);
1565 return "";
1566 }
1567}
1568
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001569inline std::string to_string(BinaryOp op)
1570{
1571 switch(op)
1572 {
1573 case BinaryOp::Add:
1574 return "+";
1575 case BinaryOp::Sub:
1576 return "-";
1577 case BinaryOp::Mul:
1578 return "*";
1579 case BinaryOp::Div:
1580 return "/";
1581 case BinaryOp::Mod:
1582 return "%";
1583 case BinaryOp::Equal:
1584 return "==";
1585 case BinaryOp::Less:
1586 return "<";
1587 case BinaryOp::LessEqual:
1588 return "<=";
1589 case BinaryOp::Greater:
1590 return ">";
1591 case BinaryOp::GreaterEqual:
1592 return ">=";
1593 case BinaryOp::LogicalAnd:
1594 return "&&";
1595 case BinaryOp::LogicalOr:
1596 return "||";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001597 default:
1598 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001599 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001600 }
1601}
1602
1603inline std::string binary_op_string(BinaryOp op)
1604{
1605 switch(op)
1606 {
1607 case BinaryOp::Add:
1608 return "add";
1609 case BinaryOp::Sub:
1610 return "sub";
1611 case BinaryOp::Mul:
1612 return "mul";
1613 case BinaryOp::Div:
1614 return "div";
1615 case BinaryOp::Mod:
1616 return "mod";
1617 case BinaryOp::Equal:
1618 return "eq";
1619 case BinaryOp::Less:
1620 return "gt";
1621 case BinaryOp::LessEqual:
1622 return "gteq";
1623 case BinaryOp::Greater:
1624 return "lt";
1625 case BinaryOp::GreaterEqual:
1626 return "lte";
1627 default:
1628 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001629 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001630 }
1631}
1632
1633enum class OperandType : int32_t
1634{
1635 Unknown = 0x00000000,
1636 ScalarFp32 = 0x00001011, // Immediate scalar tile
1637 ScalarFp16 = 0x00001012, // Immediate scalar tile
1638 ScalarInt32 = 0x00001021, // Immediate scalar tile
1639 ScalarInt16 = 0x00001022, // Immediate scalar tile
1640 ScalarInt8 = 0x00001024, // Immediate scalar tile
1641 ScalarUInt32 = 0x00001031, // Immediate scalar tile
1642 ScalarUInt16 = 0x00001032, // Immediate scalar tile
1643 ScalarUInt8 = 0x00001034, // Immediate scalar tile
1644 ScalarBool = 0x00001041, // Immediate scalar tile
1645 ScalarTile = 0x00001050, // Scalar from a tile
1646 Tile = 0x00010000, // Tile
1647 TensorStride1 = 0x00100001, // Tensor component
1648 TensorStride2 = 0x00100002, // Tensor component
1649 TensorStride3 = 0x00100003, // Tensor component
1650 TensorStride4 = 0x00100004, // Tensor component
1651 TensorDim0 = 0x00100010, // Tensor component
1652 TensorDim1 = 0x00100020, // Tensor component
1653 TensorDim2 = 0x00100030, // Tensor component
1654 TensorDim3 = 0x00100040, // Tensor component
1655 TensorDim4 = 0x00100050, // Tensor component
1656 TensorC = 0x00100010, // Tensor component
1657 TensorW = 0x00100020, // Tensor component
1658 TensorH = 0x00100030, // Tensor component
1659 TensorD = 0x00100040, // Tensor component
1660 TensorN = 0x00100050, // Tensor component
1661 TensorDim1xDim2 = 0x00100100, // Tensor component
1662 TensorDim1xDim2xDim3 = 0x00100200, // Tensor component
1663 TensorWxH = 0x00100300, // Tensor component
1664 TensorWxHxD = 0x00100400, // Tensor component
1665 TensorDataOffset = 0x00100500, // Tensor component
1666};
1667
1668struct ScalarTileCoord
1669{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001670 ScalarTileCoord()
1671 {
1672 }
1673
1674 ScalarTileCoord(int32_t x0, int32_t y0)
1675 : x(x0), y(y0)
1676 {
1677 }
1678
1679 int32_t x{ -1 };
1680 int32_t y{ -1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001681};
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001682
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001683/**
1684 * @brief Operand class. This object is used to pass the operands to the operations performed by the writer.
1685 * Operand can be of three types:
1686 * -# Scalar immediate: constant expression
1687 * -# Tile: A tile
1688 * -# Tensor component: A component (scalar) of a tensor
1689 *
1690 */
1691class Operand
1692{
1693public:
1694 Operand(const std::string &val)
1695 {
1696 _str = val;
1697 _type = OperandType::Tile;
1698 }
1699
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001700 Operand(const std::string &val, const ScalarTileCoord &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001701 {
1702 _str = val;
1703 _type = OperandType::ScalarTile;
1704 _coord = coord;
1705 }
1706
1707 Operand(const std::string &val, OperandType type)
1708 {
1709 _str = val;
1710 _type = type;
1711 }
1712
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001713 Operand(const Operand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001714 {
1715 _str = t.value();
1716 _type = t.type();
1717 }
1718
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001719 Operand &operator=(const Operand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001720 {
1721 _str = t.value();
1722 _type = t.type();
1723 _coord = t.scalar_tile_coordinate();
1724 return *this;
1725 }
1726
1727 std::string value() const
1728 {
1729 return _str;
1730 }
1731
1732 OperandType type() const
1733 {
1734 return _type;
1735 }
1736
1737 ScalarTileCoord scalar_tile_coordinate() const
1738 {
1739 return _coord;
1740 }
1741
1742private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001743 std::string _str{};
1744 OperandType _type{ OperandType::Unknown };
1745 ScalarTileCoord _coord{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001746};
1747
1748enum class GpuSamplerTensorStorage : int32_t
1749{
1750 Unknown = static_cast<int32_t>(GpuTensorStorage::Unknown),
1751 BufferUint8Ptr = static_cast<int32_t>(GpuTensorStorage::BufferUint8Ptr),
1752 Image2dReadOnly = static_cast<int32_t>(GpuTensorStorage::Image2dReadOnly),
1753 Image2dWriteOnly = static_cast<int32_t>(GpuTensorStorage::Image2dWriteOnly),
1754 Image3dReadOnly = static_cast<int32_t>(GpuTensorStorage::Image3dReadOnly),
1755 Image3dWriteOnly = static_cast<int32_t>(GpuTensorStorage::Image2dWriteOnly),
1756};
1757
1758struct GpuSampler
1759{
1760 GpuSampler() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001761
1762 TensorSamplerFormat format{ TensorSamplerFormat::Unknown };
1763 GpuSamplerTensorStorage storage{ GpuSamplerTensorStorage::Unknown };
1764 TensorSamplerAddressModeX address_mode_x{ TensorSamplerAddressModeX::Unknown };
1765 TensorSamplerAddressModeY address_mode_y{ TensorSamplerAddressModeY::Unknown };
1766 TensorSamplerAddressModeZ address_mode_z{ TensorSamplerAddressModeZ::Unknown };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001767};
1768
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001769inline GpuSampler
1770create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y,
1771 int32_t step_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001772{
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001773 CKW_UNUSED(step_x, step_y, step_z);
1774
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001775 auto tensor = tensor_info_id->shape;
1776
1777 GpuSampler dst_sampler;
1778 dst_sampler.format = sampler.format;
1779 dst_sampler.storage = GpuSamplerTensorStorage::BufferUint8Ptr;
1780 dst_sampler.address_mode_x = sampler.address_mode_x;
1781 dst_sampler.address_mode_y = sampler.address_mode_y;
1782 dst_sampler.address_mode_z = sampler.address_mode_z;
1783
1784 int32_t dim_x = 0;
1785 int32_t dim_y = 0;
1786 int32_t dim_z = 0;
1787
1788 switch(sampler.format)
1789 {
1790 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001791 dim_x = tensor[0];
1792 dim_y = tensor[1];
1793 dim_z = tensor[2];
1794 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001795 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001796 dim_x = tensor[0];
1797 dim_y = tensor[1] * tensor[2];
1798 dim_z = 1;
1799 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001800 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001801 std::cout << "Unsupported tensor format" << std::endl;
1802 assert(false);
1803 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001804 }
1805
1806 if(dim_x == 1)
1807 {
1808 assert(step_x == 1);
1809 dst_sampler.address_mode_x = TensorSamplerAddressModeX::None;
1810 }
1811
1812 if(dim_y == 1)
1813 {
1814 assert(step_y == 1);
1815 dst_sampler.address_mode_y = TensorSamplerAddressModeY::None;
1816 }
1817
1818 if(dim_z == 1)
1819 {
1820 assert(step_z == 1);
1821 dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None;
1822 }
1823
1824 return dst_sampler;
1825}
1826
1827class GpuOutputSampler
1828{
1829public:
1830 GpuOutputSampler() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001831
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001832 /**
1833 * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once
1834 * by the root component. Once initialized, all simpler components will need to used this sampler
1835 * or a broadcasted version of it
1836 *
1837 * @param[in] sampler GpuSampler
1838 * @param[in] step_x Increment step in the X direction. Not necessarily it is the same of n0 of tile!
1839 * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile!
1840 * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile!
1841 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001842 void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage,
1843 TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001844 {
1845 assert(_is_initialized == false);
1846
1847 _step_x = step_x;
1848 _step_y = step_y;
1849 _step_z = step_z;
1850 _tensor_info_id = tensor_info_id;
1851 _sampler = create_sampler(tensor_storage, tensor_format);
1852 _is_initialized = true;
1853 };
1854
1855 GpuSampler sampler() const
1856 {
1857 return _sampler;
1858 };
1859
1860 int32_t step_x() const
1861 {
1862 return _step_x;
1863 };
1864
1865 int32_t step_y() const
1866 {
1867 return _step_y;
1868 };
1869
1870 int32_t step_z() const
1871 {
1872 return _step_z;
1873 };
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001874
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001875private:
1876 GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format)
1877 {
1878 // Output can only be in output mode
1879 assert(tensor_storage != GpuSamplerTensorStorage::Image2dReadOnly);
1880 assert(tensor_storage != GpuSamplerTensorStorage::Image3dReadOnly);
1881
1882 auto tensor = _tensor_info_id->shape;
1883
1884 GpuSampler sampler;
1885 sampler.format = tensor_format;
1886 sampler.storage = tensor_storage;
1887 sampler.address_mode_x = TensorSamplerAddressModeX::None;
1888 sampler.address_mode_y = TensorSamplerAddressModeY::None;
1889 sampler.address_mode_z = TensorSamplerAddressModeZ::None;
1890
1891 // In the case of texture, we do not need any special checks at the border
1892 if(tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr)
1893 {
1894 int32_t dim_x = 0;
1895 int32_t dim_y = 0;
1896 int32_t dim_z = 0;
1897
1898 switch(tensor_format)
1899 {
1900 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001901 dim_x = tensor[0];
1902 dim_y = tensor[1];
1903 dim_z = tensor[2];
1904 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001905 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001906 dim_x = tensor[0];
1907 dim_y = tensor[1] * tensor[2];
1908 dim_z = 1;
1909 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001910 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001911 std::cout << "Unsupported tensor format" << std::endl;
1912 assert(false);
1913 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001914 }
1915
1916 if((dim_x % _step_x) != 0 && dim_x != 1)
1917 {
1918 sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin;
1919 }
1920
1921 if((dim_y % _step_y) != 0 && dim_y != 1)
1922 {
1923 sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly;
1924 }
1925
1926 if((dim_z % _step_z) != 0 && dim_z != 1)
1927 {
1928 sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly;
1929 }
1930 }
1931
1932 return sampler;
1933 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001934
1935 GpuSampler _sampler{}; // GpuSampler
1936 int32_t _step_x{ 1 };
1937 int32_t _step_y{ 1 };
1938 int32_t _step_z{ 1 };
1939 const TensorInfo *_tensor_info_id{ nullptr };
1940 bool _is_initialized{ false };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001941};
1942
1943/**
1944 * @brief Tensor operand class. This object is used to pass the operands as tensor to the operations performed by the writer.
1945 */
1946class TensorOperand
1947{
1948public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001949 TensorOperand(const std::string &val, GpuSampler sampler)
1950 : _str(val), _sampler(sampler)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001951 {
1952 }
1953
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001954 TensorOperand &operator=(const TensorOperand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001955 {
1956 _str = t.value();
1957 _sampler = t.sampler();
1958 return *this;
1959 }
1960
1961 std::string value() const
1962 {
1963 return _str;
1964 }
1965
1966 GpuSampler sampler() const
1967 {
1968 return _sampler;
1969 }
1970
1971private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001972 std::string _str{};
1973 GpuSampler _sampler{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001974};
1975
1976/**
1977 * @brief Data structure that contains all the necessary information to write the Gpu kernel with the Gpu kernel Writer
1978 * This data structure must be initialized before being passed to the Gpu Kernel Writer
1979 *
1980 */
1981class GpuKernelWriterDataHolder
1982{
1983public:
1984 /**
1985 * @brief Construct a new Gpu Kernel Data object. In this phase, we should also store
1986 * the GPU target and target specific capabilities (extensions). For now, we just initialize the
1987 * programming language
1988 *
1989 * @param[in] language Gpu programming language to use
1990 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001991 GpuKernelWriterDataHolder(GpuTargetLanguage language)
1992 : tiles(language), arguments(language), code(""), _language(language)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001993 {
1994 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001995
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001996 /**
1997 * @brief Get the Gpu programming language used
1998 *
1999 * @return GpuTargetLanguage the Gpu programming language
2000 */
2001 GpuTargetLanguage programming_language() const
2002 {
2003 return _language;
2004 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002005
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002006 /**
2007 * @brief @ref GpuTileRegistry
2008 *
2009 */
2010 GpuTileRegistry tiles{};
2011 /**
2012 * @brief @ref GpuTensorArgumentRegistry
2013 *
2014 */
2015 GpuTensorArgumentRegistry arguments{};
2016 /**
2017 * @brief @ref GpuOutputSampler.
2018 *
2019 */
2020 GpuOutputSampler output_sampler{};
2021 /**
2022 * @brief Source code
2023 *
2024 */
2025 std::string code{};
2026
2027 // GpuExtensionRegistry extensions{};
2028private:
2029 GpuTargetLanguage _language;
2030};
2031
2032struct LWS
2033{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002034 int32_t x{ 1 };
2035 int32_t y{ 1 };
2036 int32_t z{ 1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002037};
2038
2039/**
2040 * @brief Utility class used to get the tile from the operand. If the operand is not a tile, @ref OperandUnpacker
2041 * declare an anonymous tile in the tile registry.
2042 */
2043class OperandUnpacker
2044{
2045public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002046 OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments)
2047 : _tiles(tiles), _arguments(arguments)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002048 {
2049 // Increase the level of the stack to allocate possible temporary tiles
2050 _tiles.increment_registry_level();
2051 };
2052
2053 ~OperandUnpacker()
2054 {
2055 // Decrease the level of the stack to deallocate any temporary tiles
2056 _tiles.decrement_registry_level();
2057 }
2058
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002059 IVectorTile *unpack(const Operand &src)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002060 {
2061 // Get the tile
2062 if(src.type() == OperandType::Tile)
2063 {
2064 assert(_tiles.has_tile(src.value()));
2065 return _tiles[src.value()];
2066 }
2067 // Create an anonymous tile with a constant
2068 else if(static_cast<int32_t>(src.type()) & 0x00001000)
2069 {
2070 if(src.type() == OperandType::ScalarTile)
2071 {
2072 ScalarTileCoord coord = src.scalar_tile_coordinate();
2073 assert(_tiles.has_tile(src.value()));
2074 assert(coord.x >= 0);
2075 assert(coord.y >= 0);
2076 auto val = _tiles[src.value()]->scalar(coord.x, coord.y);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002077 return _tiles.insert({ { { val.str } } }, val.type.dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002078 }
2079 else
2080 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002081 return _tiles.insert({ { { src.value() } } }, to_tile_data_type(src.type()));
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002082 }
2083 }
2084 // Create an anonymous tile with the tensor component
2085 else
2086 {
2087 assert(_arguments.has_tensor_argument(src.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002088 auto x = _arguments[src.value()];
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002089 const std::string val = x->component(to_tensor_component(src.type()));
2090 const DataType dt = x->component_data_type();
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002091 return _tiles.insert({ { { val } } }, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002092 }
2093 }
2094
2095private:
2096 DataType to_tile_data_type(OperandType x)
2097 {
2098 return static_cast<DataType>(static_cast<int32_t>(x) & 0x00ff);
2099 }
2100
2101 TensorComponent to_tensor_component(OperandType x)
2102 {
2103 switch(x)
2104 {
2105 case OperandType::TensorDim0:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002106 return TensorComponent::Dim0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002107 case OperandType::TensorDim1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002108 return TensorComponent::Dim1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002109 case OperandType::TensorDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002110 return TensorComponent::Dim2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002111 case OperandType::TensorDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002112 return TensorComponent::Dim3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002113 case OperandType::TensorDim4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002114 return TensorComponent::Dim4;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002115 case OperandType::TensorStride1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002116 return TensorComponent::Stride1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002117 case OperandType::TensorStride2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002118 return TensorComponent::Stride2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002119 case OperandType::TensorStride3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002120 return TensorComponent::Stride3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002121 case OperandType::TensorStride4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002122 return TensorComponent::Stride4;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002123 case OperandType::TensorDim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002124 return TensorComponent::Dim1xDim2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002125 case OperandType::TensorDim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002126 return TensorComponent::Dim1xDim2xDim3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002127 case OperandType::TensorDataOffset:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002128 return TensorComponent::OffsetFirstElement;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002129 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002130 assert(false);
2131 return TensorComponent::Unknown;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002132 }
2133 }
2134
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002135 GpuTileRegistry &_tiles;
2136 GpuTensorArgumentRegistry &_arguments;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002137};
2138
2139/**
2140 * @brief Utility class used to get the tensor argument from the operand. If the operand is not a tile, @ref OperandUnpacker
2141 * declare an anonymous tile in the tile registry.
2142 * Tensor dimension reduction aims for reducing the tensor data dimension while keeping data's tensor structure.
2143 */
2144class TensorOperandUnpacker
2145{
2146public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002147 TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments)
2148 : _arguments(arguments){};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002149
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002150 IGpuTensorArgument *unpack(const TensorOperand &src)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002151 {
2152 assert(_arguments.has_tensor_argument(src.value()));
2153 return _arguments[src.value()];
2154 }
2155
2156private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002157 GpuTensorArgumentRegistry &_arguments;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002158};
2159
2160/**
2161 * @brief The GpuKernel will be used in three occasions (stages):
2162 * #- Compilation stage
2163 * #- Tuning stage
2164 * #- Dispatch stage
2165 */
2166struct GpuKernel
2167{
2168 // Compilation stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002169 std::string code{}; // Source code, required for the compilation stage
2170 std::vector<GpuExtensions> list_extensions{}; // Extensions, required for the compilation stage
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002171 // Tuning stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002172 std::string config_id{}; // Unique id, required for the tuning stage
2173 std::vector<LWS> list_lws{}; // LWS to test, required for the tuning stage
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002174 // Dispatch stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002175 GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage
2176 std::vector<std::pair<int32_t, GpuTensorStorage>> list_tensor_storages; // List of tensor storages, required for the dispatch stage
2177 std::vector<std::pair<int32_t, TensorComponent>> list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002178};
2179
2180// This function should produce an object with the source
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002181inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002182{
2183 std::string code;
2184 code += "__kernel void ";
2185 code += name;
2186 code += "(\n";
2187
2188 auto IdSpaces = in.arguments.IdSpace_declarations();
2189
2190 std::vector<std::string> arg_str;
2191
2192 auto tensor_args = in.arguments.tensor_argument_declarations();
2193
2194 for(auto &i : tensor_args)
2195 {
2196 // For each tensor used, get the storage and tensor components
2197 auto storages = i->storage_declarations();
2198 auto components = i->component_declarations();
2199
2200 for(auto &y : storages)
2201 {
2202 std::string str;
2203 str += i->storage_type_declaration(y);
2204 str += " ";
2205 str += i->storage(y);
2206 arg_str.push_back(str);
2207 }
2208
2209 for(auto &y : components)
2210 {
2211 std::string str;
2212 str += i->component_type_declaration();
2213 str += " ";
2214 str += i->component(y);
2215 arg_str.push_back(str);
2216 }
2217 }
2218
2219 for(size_t i = 0; i < arg_str.size(); ++i)
2220 {
2221 code += arg_str[i];
2222 if(i + 1 < arg_str.size())
2223 {
2224 code += ",\n";
2225 }
2226 }
2227
2228 code += ")\n";
2229 code += "{\n";
2230 code += in.code;
2231 code += "}\n";
2232
2233 return code;
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002234}
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002235
2236/**
2237 * @brief This class is responsible to map a N-Tensor to a 3d tensor. The mapper needs the GpuSampler to know
2238 * how to reduce the dimensionality of a tensor
2239 *
2240 */
2241class GpuTensor3dMapper
2242{
2243public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002244 GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler)
2245 : _sampler(sampler), _tensor(tensor){};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002246
2247 std::string tensor_component_x() const
2248 {
2249 const auto format = _sampler.format;
2250 switch(format)
2251 {
2252 case TensorSamplerFormat::C_WH_1:
2253 case TensorSamplerFormat::C_W_H:
2254 return _tensor->component(TensorComponent::C);
2255 default:
2256 std::cout << "Unsupported tensor format" << std::endl;
2257 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002258 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002259 }
2260 }
2261
2262 std::string tensor_component_y() const
2263 {
2264 const auto format = _sampler.format;
2265 switch(format)
2266 {
2267 case TensorSamplerFormat::C_WH_1:
2268 return _tensor->component(TensorComponent::WxH);
2269 case TensorSamplerFormat::C_W_H:
2270 return _tensor->component(TensorComponent::W);
2271 default:
2272 std::cout << "Unsupported tensor format" << std::endl;
2273 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002274 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002275 }
2276 }
2277
2278 std::string tensor_component_z() const
2279 {
2280 const auto format = _sampler.format;
2281 switch(format)
2282 {
2283 case TensorSamplerFormat::C_WH_1:
2284 return "1";
2285 case TensorSamplerFormat::C_W_H:
2286 return _tensor->component(TensorComponent::H);
2287 default:
2288 std::cout << "Unsupported tensor format" << std::endl;
2289 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002290 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002291 }
2292 }
2293
2294 std::string tensor_component_stride_y() const
2295 {
2296 const auto format = _sampler.format;
2297 switch(format)
2298 {
2299 case TensorSamplerFormat::C_WH_1:
2300 case TensorSamplerFormat::C_W_H:
2301 return _tensor->component(TensorComponent::Stride1);
2302 default:
2303 std::cout << "Unsupported tensor format" << std::endl;
2304 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002305 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002306 }
2307 }
2308
2309 std::string tensor_component_stride_z() const
2310 {
2311 const auto format = _sampler.format;
2312 switch(format)
2313 {
2314 case TensorSamplerFormat::C_WH_1:
2315 return "0";
2316 case TensorSamplerFormat::C_W_H:
2317 return _tensor->component(TensorComponent::Stride2);
2318 default:
2319 std::cout << "Unsupported tensor format" << std::endl;
2320 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002321 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002322 }
2323 }
2324
2325 std::string tensor_component_stride_batch() const
2326 {
2327 const auto format = _sampler.format;
2328 switch(format)
2329 {
2330 case TensorSamplerFormat::C_WH_1:
2331 case TensorSamplerFormat::C_W_H:
2332 return _tensor->component(TensorComponent::Stride3);
2333 default:
2334 std::cout << "Unsupported tensor format" << std::endl;
2335 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002336 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002337 }
2338 }
2339
2340 bool is_one_component_x() const
2341 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002342 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002343 const auto format = _sampler.format;
2344 switch(format)
2345 {
2346 case TensorSamplerFormat::C_WH_1:
2347 case TensorSamplerFormat::C_W_H:
2348 return t.shape[0] == 1;
2349 default:
2350 std::cout << "Unsupported tensor format" << std::endl;
2351 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002352 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002353 }
2354 }
2355
2356 bool is_one_component_y() const
2357 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002358 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002359 const auto format = _sampler.format;
2360 switch(format)
2361 {
2362 case TensorSamplerFormat::C_WH_1:
2363 return (t.shape[1] * t.shape[2]) == 1;
2364 case TensorSamplerFormat::C_W_H:
2365 return t.shape[1] == 1;
2366 default:
2367 std::cout << "Unsupported tensor format" << std::endl;
2368 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002369 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002370 }
2371 }
2372
2373 bool is_one_component_z() const
2374 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002375 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002376 const auto format = _sampler.format;
2377 switch(format)
2378 {
2379 case TensorSamplerFormat::C_WH_1:
2380 return true;
2381 case TensorSamplerFormat::C_W_H:
2382 return t.shape[2] == 1;
2383 default:
2384 std::cout << "Unsupported tensor format" << std::endl;
2385 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002386 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002387 }
2388 }
2389
2390 bool is_one_component_batch() const
2391 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002392 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002393 const auto format = _sampler.format;
2394 switch(format)
2395 {
2396 case TensorSamplerFormat::C_WH_1:
2397 case TensorSamplerFormat::C_W_H:
2398 return t.shape[3] == 1;
2399 default:
2400 std::cout << "Unsupported tensor format" << std::endl;
2401 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002402 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002403 }
2404 }
2405
2406 GpuSampler gpu_sampler() const
2407 {
2408 return _sampler;
2409 }
2410
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002411 IGpuTensorArgument *tensor_argument() const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002412 {
2413 return _tensor;
2414 }
2415
2416private:
2417 GpuSampler _sampler;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002418 IGpuTensorArgument *_tensor;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002419};
2420
2421struct GpuKernelWriterAttribute
2422{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002423 bool return_tensor_component_by_value{ false };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002424};
2425
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002426enum class RoundingMode
2427{
2428 None,
2429 Rte,
2430 Rtz,
2431 Rtp,
2432 Rtn
2433};
2434
2435// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl05.html
2436class IGpuKernelWriter
2437{
2438public:
2439 virtual ~IGpuKernelWriter() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002440
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002441 virtual void set_IdSpace(int32_t id) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002442
2443 virtual void import_tile(const std::string &dst, const IVectorTile *src) = 0;
2444
2445 virtual void declare_argument(const std::string &name, const TensorInfo &tensor) = 0;
2446
2447 virtual void declare_tile(const std::string &name, const TileInfo &info) = 0;
2448
2449 virtual void declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) = 0;
2450
2451 virtual void write_text(const std::string &x) = 0;
2452
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002453 virtual void compound_statement_begin() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002454
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002455 virtual void compound_statement_end() = 0;
2456
2457 // Operations
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002458 virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002459
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002460 virtual void op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002461
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002462 virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002463
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002464 virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002465
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002466 virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002467
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002468 virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002469
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002470 virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002471
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002472 virtual void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002473
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002474 virtual void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002475
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002476 virtual void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name) = 0;
2477
2478 virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
2479
2480 virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
2481
2482 virtual void op_else_header() = 0;
2483
2484 virtual void op_for_loop_header(const Operand &var_name, BinaryOp cond_op, const Operand &cond_value, AssignmentOp update_op, const Operand &update_value) = 0;
2485
2486 virtual void op_load_indirect(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y_indirect, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002487
2488 virtual void op_load_immediate(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32), const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0;
2489
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002490 virtual void op_store_immediate(const TensorOperand &tensor, const Operand &src, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002491
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002492 virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002493
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002494 virtual void op_return() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002495
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002496 // Utils
2497 // It is the process of converting
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002498 virtual void util_get_indirect_buffer(const Operand &dst, const TensorOperand &tensor, const Operand &x,
2499 const Operand &y, const Operand &x_off, const Operand &y_off) = 0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002500};
2501
2502enum class GpuLoadStoreType
2503{
2504 Load = 1,
2505 Store = 2
2506};
2507
2508class IGpuLoadStoreHelperWriter
2509{
2510public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002511 IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type)
2512 : _writer(x), _mapper(mapper), _type(type)
2513 {
2514 }
2515
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002516 IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002517
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002518 IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002519
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002520 virtual ~IGpuLoadStoreHelperWriter() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002521
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002522 virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002523
2524 virtual void write(const std::pair<int32_t, std::string> &y) = 0;
2525
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002526 virtual void finalize() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002527
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002528protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002529 IGpuKernelWriter *_writer;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002530 GpuTensor3dMapper _mapper;
2531 GpuLoadStoreType _type;
2532};
2533
2534class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter
2535{
2536public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002537 ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
2538 : IGpuLoadStoreHelperWriter(x, mapper, type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002539 {
2540 }
2541
2542 ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002543
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002544 ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default;
2545
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002546 static bool
2547 validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002548 {
2549 CKW_UNUSED(x, type, dst);
2550
2551 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr)
2552 {
2553 return false;
2554 }
2555 return true;
2556 }
2557
2558 void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
2559 {
2560 assert(validate(_writer, _mapper, _type, dst));
2561
2562 _dst = dst;
2563 _ls_width_full = dst->format().w;
2564
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002565 _coord_x = x->scalar(0, 0).str;
2566 _coord_z = z->scalar(0, 0).str;
2567 _coord_b = b->scalar(0, 0).str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002568 _coord_orig_z = _coord_z;
2569
2570 out_of_bound_initialize_x(_coord_x);
2571 out_of_bound_initialize_z(_coord_z);
2572
2573 /*
2574 meaning of else:
2575 - x: partial load/store
2576 - y: no load/store operation
2577 - z: no load/store operation
2578 if(x)
2579 {
2580 if(z)
2581 {
2582 if(y)
2583 {
2584 // full load/store width
2585 }
2586 else
2587 {
2588 // no load/store
2589 }
2590 }
2591 else
2592 {
2593 // no load/store
2594 }
2595 }
2596 else
2597 {
2598 if(z)
2599 {
2600 if(y)
2601 {
2602 // partial load/store width
2603 }
2604 else
2605 {
2606 // no load/store
2607 }
2608 }
2609 else
2610 {
2611 // no load/store
2612 }
2613 }
2614 */
2615 }
2616
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002617 void write(const std::pair<int32_t, std::string> &y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002618 {
2619 int32_t idx_y = y.first;
2620 std::string coord_y = y.second;
2621
2622 // The only check required is on Y.
2623 out_of_bound_initialize_y(coord_y);
2624
2625 const std::string dst = _dst->vector(idx_y).str;
2626 const std::string address = to_ls_buffer_address(_coord_x, coord_y, _coord_z, _coord_b);
2627 const std::string ls_buf = to_ls_buffer(_type, _ls_width_full, dst, address);
2628
2629 _writer->write_text(ls_buf);
2630 _writer->write_text(";\n");
2631
2632 out_of_bound_finalize_y(dst);
2633
2634 // The left over load/store will be written in the finalize stage
2635 if(_ls_width_part.size() != 0)
2636 {
2637 int32_t w = 0;
2638 for(auto &p : _ls_width_part)
2639 {
2640 const std::string dst0 = _dst->vector(w, p, idx_y).str;
2641 const std::string coord_x = _coord_x + " + " + std::to_string(w);
2642 const std::string address = to_ls_buffer_address(coord_x, coord_y, _coord_z, _coord_b);
2643 const std::string ls_buf0 = to_ls_buffer(_type, p, dst0, address);
2644 _leftovers_x.push_back(std::make_pair(std::make_pair(dst0, coord_y), ls_buf0));
2645
2646 w += p;
2647 }
2648 }
2649 }
2650
2651 void finalize() override
2652 {
2653 out_of_bound_finalize_z();
2654 out_of_bound_finalize_x();
2655 }
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002656
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002657private:
2658 IVectorTile *_dst{ nullptr };
2659 int32_t _ls_width_full{ 0 };
2660 std::vector<int32_t> _ls_width_part{};
2661 std::vector<std::pair<std::pair<std::string, std::string>, std::string>> _leftovers_x{};
2662 std::string _coord_x{};
2663 std::string _coord_z{};
2664 std::string _coord_orig_z{};
2665 std::string _coord_b{};
2666
2667 void out_of_bound_initialize_x(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002668 {
2669 if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
2670 {
2671 auto tensor_format = _mapper.tensor_argument()->format();
2672 auto shape = tensor_format.shape;
2673
2674 _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full);
2675 if(_ls_width_part.size() != 0)
2676 {
2677 _writer->write_text("if(" + coord + " > 0)\n");
2678 _writer->compound_statement_begin();
2679 }
2680 }
2681 };
2682
2683 void out_of_bound_finalize_x()
2684 {
2685 if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
2686 {
2687 if(_ls_width_part.size() != 0)
2688 {
2689 _writer->compound_statement_end();
2690 _writer->write_text("else\n");
2691 _writer->compound_statement_begin();
2692
2693 out_of_bound_initialize_z(_coord_orig_z);
2694 for(auto &i : _leftovers_x)
2695 {
2696 out_of_bound_initialize_y(i.first.second);
2697 _writer->write_text(i.second);
2698 _writer->write_text(";\n");
2699 out_of_bound_finalize_y(i.first.first);
2700 }
2701 out_of_bound_finalize_z();
2702 _writer->compound_statement_end();
2703 }
2704 }
2705 };
2706
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002707 void out_of_bound_initialize_y(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002708 {
2709 std::string max = "";
2710
2711 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
2712
2713 switch(address_mode_y)
2714 {
2715 case TensorSamplerAddressModeY::Skip:
2716 case TensorSamplerAddressModeY::ClampToBorder:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002717 // NOTE: This line should not be moved outside of the switch statement.
2718 // The reason for that is because when we query the component, the component is marked as used
2719 // and added to the list of arguments of the kernel. Since, not in all cases this component is required,
2720 // we should request the component only when used
2721 max = _mapper.tensor_component_y();
2722 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
2723 _writer->compound_statement_begin();
2724 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002725 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
2726 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002727 _writer->write_text("if(" + coord + " >= 0)\n");
2728 _writer->compound_statement_begin();
2729 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002730 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
2731 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002732 max = _mapper.tensor_component_y();
2733 _writer->write_text("if(" + coord + " < " + max + ")\n");
2734 _writer->compound_statement_begin();
2735 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002736 case TensorSamplerAddressModeY::ClampToNearest:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002737 max = _mapper.tensor_component_y();
2738 coord = "clamp(" + coord + ", 0, " + max + " - 1)";
2739 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002740 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002741 max = _mapper.tensor_component_y();
2742 coord = "min(" + coord + ", " + max + " - 1)";
2743 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002744 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002745 coord = "max(" + coord + ", 0)";
2746 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002747 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002748 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002749 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002750 std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
2751 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002752 }
2753 };
2754
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002755 void out_of_bound_finalize_y(const std::string &dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002756 {
2757 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
2758
2759 switch(address_mode_y)
2760 {
2761 case TensorSamplerAddressModeY::ClampToBorder:
2762 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
2763 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
2764 case TensorSamplerAddressModeY::Skip:
2765 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
2766 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002767 _writer->compound_statement_end();
2768 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002769
2770 default:
2771 assert(false);
2772 }
2773
2774 switch(address_mode_y)
2775 {
2776 case TensorSamplerAddressModeY::ClampToBorder:
2777 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
2778 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002779 _writer->write_text("else\n");
2780 _writer->compound_statement_begin();
2781 _writer->write_text(dst);
2782 _writer->write_text(" = 0.0f;\n");
2783 _writer->compound_statement_end();
2784 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002785
2786 default:
2787 assert(false);
2788 }
2789 };
2790
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002791 void out_of_bound_initialize_z(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002792 {
2793 std::string max = "";
2794
2795 const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
2796
2797 switch(address_mode_z)
2798 {
2799 case TensorSamplerAddressModeZ::Skip:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002800 max = _mapper.tensor_component_z();
2801 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
2802 _writer->compound_statement_begin();
2803 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002804 case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002805 _writer->write_text("if(" + coord + " >= 0)\n");
2806 _writer->compound_statement_begin();
2807 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002808 case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002809 max = _mapper.tensor_component_z();
2810 _writer->write_text("if(" + coord + " < " + max + ")\n");
2811 _writer->compound_statement_begin();
2812 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002813 case TensorSamplerAddressModeZ::ClampToNearest:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002814 max = _mapper.tensor_component_z();
2815 coord = "clamp(" + coord + ", 0, " + max + " - 1)";
2816 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002817 case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002818 max = _mapper.tensor_component_z();
2819 coord = "min(" + coord + ", " + max + " - 1)";
2820 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002821 case TensorSamplerAddressModeZ::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002822 coord = "max(" + coord + ", 0)";
2823 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002824 case TensorSamplerAddressModeZ::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002825 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002826 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002827 std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
2828 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002829 }
2830 };
2831
2832 void out_of_bound_finalize_z()
2833 {
2834 const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
2835
2836 switch(address_mode_z)
2837 {
2838 case TensorSamplerAddressModeZ::Skip:
2839 case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
2840 case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002841 _writer->compound_statement_end();
2842 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002843
2844 default:
2845 assert(false);
2846 }
2847 };
2848
2849 std::vector<int32_t> decompose_leftover_ls_vector_width(int32_t ls_leftover_vector_width) const
2850 {
2851 std::vector<int32_t> x;
2852
2853 switch(ls_leftover_vector_width)
2854 {
2855 case 0:
2856 break;
2857 case 1:
2858 case 2:
2859 case 3:
2860 case 4:
2861 case 8:
2862 case 16:
2863 x.push_back(ls_leftover_vector_width);
2864 break;
2865 case 5:
2866 x.push_back(4);
2867 x.push_back(1);
2868 break;
2869 case 6:
2870 x.push_back(4);
2871 x.push_back(2);
2872 break;
2873 case 7:
2874 x.push_back(4);
2875 x.push_back(3);
2876 break;
2877 case 9:
2878 x.push_back(8);
2879 x.push_back(1);
2880 break;
2881 case 10:
2882 x.push_back(8);
2883 x.push_back(2);
2884 break;
2885 case 11:
2886 x.push_back(8);
2887 x.push_back(3);
2888 break;
2889 case 12:
2890 x.push_back(8);
2891 x.push_back(4);
2892 break;
2893 case 13:
2894 x.push_back(8);
2895 x.push_back(4);
2896 x.push_back(1);
2897 break;
2898 case 14:
2899 x.push_back(8);
2900 x.push_back(4);
2901 x.push_back(2);
2902 break;
2903 case 15:
2904 x.push_back(8);
2905 x.push_back(4);
2906 x.push_back(3);
2907 break;
2908
2909 default:
2910 assert(false);
2911 }
2912 return x;
2913 }
2914
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002915 std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data,
2916 const std::string &address)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002917 {
2918 switch(type)
2919 {
2920 case GpuLoadStoreType::Load:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002921 if(vector_width != 1)
2922 {
2923 return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
2924 }
2925 else
2926 {
2927 return data + " = *(" + address + ")";
2928 }
2929 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002930 case GpuLoadStoreType::Store:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002931 if(vector_width != 1)
2932 {
2933 return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
2934 }
2935 else
2936 {
2937 return "*(" + address + ") = " + data;
2938 }
2939 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002940 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002941 std::cout << "Unsupported GpuLoadStoreType" << std::endl;
2942 assert(false);
2943 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002944 }
2945 }
2946
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002947 std::string to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z,
2948 const std::string &b) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002949 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002950 auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002951 assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002952 const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage);
2953 const std::string dst_type = get_cl_data_type(_dst->format().dt, 1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002954
2955 std::string address;
2956 address += "(__global ";
2957 address += dst_type;
2958 address += "*)(";
2959 address += ptr_buf;
2960 if(x != "0" && (_mapper.is_one_component_x() != true))
2961 {
2962 address += " + (";
2963 address += x + ") * sizeof(" + dst_type + ")";
2964 }
2965 if(y != "0" && (_mapper.is_one_component_y() != true))
2966 {
2967 const std::string stride_y = _mapper.tensor_component_stride_y();
2968 address += " + (";
2969 address += y + ")";
2970 address += " * ";
2971 address += stride_y;
2972 }
2973 if(z != "0" && (_mapper.is_one_component_z() != true))
2974 {
2975 const std::string stride_z = _mapper.tensor_component_stride_z();
2976 address += " + (";
2977 address += z + ")";
2978 address += " * ";
2979 address += stride_z;
2980 }
2981 if(b != "0" && (_mapper.is_one_component_batch() != true))
2982 {
2983 const std::string stride_b = _mapper.tensor_component_stride_batch();
2984 address += " + (";
2985 address += b + ")";
2986 address += " * ";
2987 address += stride_b;
2988 }
2989 address += ")";
2990 return address;
2991 }
2992};
2993
2994class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter
2995{
2996public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002997 static bool
2998 validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002999 {
3000 CKW_UNUSED(x);
3001
3002 if(dst->format().w != 4)
3003 {
3004 return false;
3005 }
3006 if(mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None)
3007 {
3008 return false;
3009 }
3010 if(mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None)
3011 {
3012 return false;
3013 }
3014 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load)
3015 {
3016 return false;
3017 }
3018 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && type == GpuLoadStoreType::Store)
3019 {
3020 return false;
3021 }
3022 if((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16))
3023 {
3024 return false;
3025 }
3026 return true;
3027 /*
3028 - x: Only GpuSamplerAddressModeX::None is supported and vector length = 4
3029 - z: Only GpuSamplerAddressModeZ::None is supported
3030 */
3031 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003032
3033 ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
3034 : IGpuLoadStoreHelperWriter(x, mapper, type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003035 {
3036 }
3037
3038 ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003039
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003040 ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default;
3041
3042 void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
3043 {
3044 assert(validate(_writer, _mapper, _type, dst));
3045
3046 _dst = dst;
3047 _ls_width_full = dst->format().w;
3048 _coord_x = x->scalar(0, 0).str;
3049 _coord_z = z->scalar(0, 0).str;
3050 _coord_b = b->scalar(0, 0).str;
3051
3052 /*
3053 if(y)
3054 {
3055 // full load/store width
3056 }
3057 else
3058 {
3059 // no load/store
3060 }
3061 */
3062 }
3063
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003064 void write(const std::pair<int32_t, std::string> &y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003065 {
3066 int32_t idx_y = y.first;
3067 std::string coord_y = y.second;
3068
3069 // The only check required is on Y.
3070 out_of_bound_initialize_y(coord_y);
3071
3072 const std::string dst = _dst->vector(idx_y).str;
3073 const std::string sampler = to_ls_image2d_sampler();
3074 const std::string coord = to_ls_image2d_coord(_coord_x, coord_y, _coord_z, _coord_b);
3075 const std::string ls_buf = to_ls_image2d(_type, _ls_width_full, dst, sampler, coord);
3076
3077 _writer->write_text(ls_buf);
3078 _writer->write_text(";\n");
3079
3080 out_of_bound_finalize_y(dst);
3081 }
3082
3083 void finalize() override
3084 {
3085 }
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003086
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003087private:
3088 IVectorTile *_dst{ nullptr };
3089 int32_t _ls_width_full{ 0 };
3090 std::string _coord_x{};
3091 std::string _coord_z{};
3092 std::string _coord_b{};
3093
3094 void out_of_bound_initialize_y(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003095 {
3096 std::string max = "";
3097
3098 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3099
3100 switch(address_mode_y)
3101 {
3102 case TensorSamplerAddressModeY::Skip:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003103 max = _mapper.tensor_component_y();
3104 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
3105 _writer->compound_statement_begin();
3106 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003107 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003108 _writer->write_text("if(" + coord + " >= 0)\n");
3109 _writer->compound_statement_begin();
3110 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003111 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003112 max = _mapper.tensor_component_y();
3113 _writer->write_text("if(" + coord + " < " + max + ")\n");
3114 _writer->compound_statement_begin();
3115 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003116 case TensorSamplerAddressModeY::ClampToBorder:
3117 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
3118 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
3119 case TensorSamplerAddressModeY::ClampToNearest:
3120 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
3121 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
3122 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003123 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003124 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003125 std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl;
3126 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003127 }
3128 };
3129
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003130 void out_of_bound_finalize_y(const std::string &dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003131 {
3132 CKW_UNUSED(dst);
3133
3134 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3135
3136 switch(address_mode_y)
3137 {
3138 case TensorSamplerAddressModeY::Skip:
3139 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
3140 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003141 _writer->compound_statement_end();
3142 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003143
3144 default:
3145 assert(false);
3146 }
3147 };
3148
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003149 std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string &data,
3150 const std::string &sampler, const std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003151 {
3152 CKW_UNUSED(vector_width);
3153
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003154 auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
3155 const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003156 const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003157
3158 switch(type)
3159 {
3160 case GpuLoadStoreType::Load:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003161 return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")";
3162 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003163 case GpuLoadStoreType::Store:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003164 return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003165 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003166 assert(false);
3167 std::cout << "Unsupported GpuLoadStoreType" << std::endl;
3168 assert(false);
3169 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003170 }
3171 }
3172
3173 std::string to_ls_image2d_sampler() const
3174 {
3175 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3176
3177 switch(address_mode_y)
3178 {
3179 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003180 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003181 case TensorSamplerAddressModeY::Skip:
3182 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
3183 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
3184 case TensorSamplerAddressModeY::ClampToBorder:
3185 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
3186 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003187 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003188 case TensorSamplerAddressModeY::ClampToNearest:
3189 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
3190 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003191 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003192 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003193 std::cout << "Unsupported address_mode_coord" << std::endl;
3194 assert(false);
3195 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003196 }
3197 }
3198
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003199 std::string to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z,
3200 const std::string &b) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003201 {
3202 std::string coord_x = "(" + x + ") >> 2";
3203 std::string coord_y = "(";
3204
3205 if(y != "0" && (_mapper.is_one_component_y() != true))
3206 {
3207 coord_y += y;
3208 }
3209 if(z != "0" && (_mapper.is_one_component_z() != true))
3210 {
3211 const std::string dim = _mapper.tensor_component_y();
3212 coord_y += " + (";
3213 coord_y += z + ")";
3214 coord_y += " * ";
3215 coord_y += dim;
3216 }
3217 if(b != "0" && (_mapper.is_one_component_batch() != true))
3218 {
3219 const std::string dim0 = _mapper.tensor_component_y();
3220 const std::string dim1 = _mapper.tensor_component_z();
3221 coord_y += " + (";
3222 coord_y += b + ")";
3223 coord_y += " * ";
3224 coord_y += dim0;
3225 coord_y += " * ";
3226 coord_y += dim1;
3227 }
3228 coord_y += ")";
3229 return "(int2)(" + coord_x + ", " + coord_y + ")";
3230 }
3231};
3232
3233/** IGpuLoadStoreHelperWriter factory class */
3234class ClLoadStoreHelperWriterFactory final
3235{
3236public:
3237 /** Static method to call the IGpuLoadStoreHelperWriter class accordingly with the tensor storage set in the mapper
3238 *
3239 *
3240 * @return IGpuLoadStoreHelperWriter
3241 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003242 static std::unique_ptr<IGpuLoadStoreHelperWriter>
3243 create(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003244 {
3245 const auto tensor_storage = mapper.gpu_sampler().storage;
3246 switch(tensor_storage)
3247 {
3248 case GpuSamplerTensorStorage::BufferUint8Ptr:
3249 return std::make_unique<ClLoadStoreBufferHelperWriter>(x, mapper, type);
3250 case GpuSamplerTensorStorage::Image2dReadOnly:
3251 case GpuSamplerTensorStorage::Image2dWriteOnly:
3252 return std::make_unique<ClLoadStoreImage2dHelperWriter>(x, mapper, type);
3253 default:
3254 std::cout << "Unsupported Gpu tensor storage" << std::endl;
3255 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01003256 return nullptr;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003257 }
3258 }
3259};
3260
3261// This utility method needs to go in utils.h
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003262inline bool is_tile_scalar(const IVectorTile *x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003263{
3264 return x->format().w == 1 && x->format().h == 1;
3265}
3266
3267class ClKernelWriter : public IGpuKernelWriter
3268{
3269public:
3270 ClKernelWriter(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
3271 {
3272 _data = x;
3273 _attr = attr;
3274 }
3275
3276 ClKernelWriter(const ClKernelWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003277
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003278 ClKernelWriter &operator=(const ClKernelWriter &) = default;
3279
3280 // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure
3281 // there are no conflicts or ambiguity in the code
3282 void set_IdSpace(int32_t id) override
3283 {
3284 _data->tiles.set_IdSpace(id);
3285 _data->arguments.set_IdSpace(id);
3286 }
3287
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003288 void import_tile(const std::string &dst_name, const IVectorTile *src) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003289 {
3290 _data->tiles.insert(dst_name, src);
3291 }
3292
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003293 void declare_argument(const std::string &name, const TensorInfo &tensor) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003294 {
3295 assert(_data->arguments[name] == nullptr);
3296 _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value);
3297 }
3298
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003299 void declare_tile(const std::string &name, const TileInfo &format) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003300 {
3301 assert(_data->tiles[name] == nullptr);
3302 _data->tiles.insert(name, format);
3303
3304 IVectorTile *x = _data->tiles[name];
3305
3306 for(auto &t : x->underlying_source_variables())
3307 {
3308 _data->code += t.type.str + " " + t.str + ";\n";
3309 }
3310 }
3311
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003312 void declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in,
3313 DataType dt) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003314 {
3315 assert(_data->tiles[name] == nullptr);
3316 _data->tiles.insert(name, in, dt);
3317 // Note: A constant does not need to be declared in the code
3318 }
3319
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003320 void write_text(const std::string &x) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003321 {
3322 _data->code += x;
3323 }
3324
3325 void compound_statement_begin() override
3326 {
3327 _data->tiles.increment_registry_level();
3328 _data->code += "{\n";
3329 }
3330
3331 void compound_statement_end() override
3332 {
3333 _data->tiles.decrement_registry_level();
3334 _data->code += "}\n";
3335 }
3336
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003337 void op_get_global_id(const Operand &dst_var, int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003338 {
3339 assert(dst_var.type() == OperandType::Tile);
3340 assert(_data->tiles.has_tile(dst_var.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003341 assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003342
3343 auto var = _data->tiles[dst_var.value()];
3344
3345 _data->code += var->scalar(0, 0).str;
3346 _data->code += " = get_global_id(";
3347 _data->code += std::to_string(dim);
3348 _data->code += ");\n";
3349 };
3350
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003351 void op_get_global_coord(const Operand &o_dst, const Operand &o_step, const TensorOperand &o_tensor,
3352 int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003353 {
3354 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003355 auto dst = operands.unpack(o_dst);
3356 auto step = operands.unpack(o_step);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003357
3358 // Validation: Check that x, y and z are scalar
3359
3360 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003361 auto tensor = tensor_operands.unpack(o_tensor);
3362 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003363
3364 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3365
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003366 switch(dim)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003367 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003368 case 0:
3369 if(mapper.is_one_component_x())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003370 {
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003371 _data->code += dst->scalar(0, 0).str;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003372 _data->code += " = 0;\n";
3373 }
3374 else
3375 {
3376 if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
3377 {
3378 // Validation: Check: fixed tensor shape
3379 // TO BE CHANGED
3380 _data->code += dst->scalar(0, 0).str;
3381 _data->code += " = get_global_id(0) * ";
3382 _data->code += step->scalar(0, 0).str;
3383 _data->code += ";\n";
3384 }
3385 else
3386 {
3387 _data->code += dst->scalar(0, 0).str;
3388 _data->code += " = get_global_id(0) * ";
3389 _data->code += step->scalar(0, 0).str;
3390 _data->code += ";\n";
3391 }
3392 }
3393 break;
3394 case 1:
3395 if(mapper.is_one_component_y())
3396 {
3397 _data->code += dst->scalar(0, 0).str;
3398 _data->code += " = 0;\n";
3399 }
3400 else
3401 {
3402 if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin)
3403 {
3404 }
3405 else
3406 {
3407 _data->code += dst->scalar(0, 0).str;
3408 _data->code += " = get_global_id(1) * ";
3409 _data->code += step->scalar(0, 0).str;
3410 _data->code += ";\n";
3411 }
3412 }
3413 break;
3414 case 2:
3415 if(mapper.is_one_component_z())
3416 {
3417 _data->code += dst->scalar(0, 0).str;
3418 _data->code += " = 0;\n";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003419 }
3420 else
3421 {
3422 _data->code += dst->scalar(0, 0).str;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003423 _data->code += " = get_global_id(2) * ";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003424 _data->code += step->scalar(0, 0).str;
3425 _data->code += ";\n";
3426 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003427 break;
3428 default:
3429 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003430 }
3431 };
3432
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003433 void op_get_global_batch(const Operand &o_dst, const TensorOperand &o_tensor) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003434 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003435 OperandUnpacker operands(_data->tiles, _data->arguments);
3436 const IVectorTile *dst = operands.unpack(o_dst);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003437
3438 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003439 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003440 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003441
3442 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3443
3444 if(mapper.is_one_component_batch())
3445 {
3446 _data->code += dst->scalar(0, 0).str;
3447 _data->code += " = 0;\n";
3448 }
3449 else
3450 {
3451 std::cout << "Unsupported batched computation" << std::endl;
3452 assert(false);
3453 }
3454 };
3455
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003456 void op_get_global_size(const Operand &dst_var, int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003457 {
3458 assert(dst_var.type() == OperandType::Tile);
3459 assert(_data->tiles.has_tile(dst_var.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003460 assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003461
3462 auto var = _data->tiles[dst_var.value()];
3463
3464 _data->code += var->scalar(0, 0).str;
3465 _data->code += " = get_global_size(";
3466 _data->code += std::to_string(dim);
3467 _data->code += ");\n";
3468 }
3469
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003470 void op_unary_expression(const Operand &dst_name, UnaryOp op, const Operand &src_name) override
3471 {
3472 OperandUnpacker operands(_data->tiles, _data->arguments);
3473 const IVectorTile *src = operands.unpack(src_name);
3474 const IVectorTile *dst = operands.unpack(dst_name);
3475
3476 const int32_t dst_w = dst->format().w;
3477 const int32_t dst_h = dst->format().h;
3478 const int32_t src_w = src->format().w;
3479 const std::string dt = dst->underlying_source_variables()[0].type.str;
3480
3481 const bool broadcast_src_x = dst_w != 1 && src_w == 1;
3482
3483 const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
3484
3485 // Broadcasting on Y is automatic
3486 for(int32_t y = 0; y < dst_h; ++y)
3487 {
3488 _data->code += dst->vector(y).str;
3489 _data->code += " = ";
3490 _data->code += to_string(op);
3491 _data->code += src_prefix + src->vector(y).str;
3492 _data->code += ";\n";
3493 }
3494 }
3495
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003496 void op_binary_expression(const Operand &dst_name, const Operand &lhs_name, BinaryOp op,
3497 const Operand &rhs_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003498 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003499 OperandUnpacker operands(_data->tiles, _data->arguments);
3500 const IVectorTile *lhs = operands.unpack(lhs_name);
3501 const IVectorTile *rhs = operands.unpack(rhs_name);
3502 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003503
3504 const int32_t dst_w = dst->format().w;
3505 const int32_t dst_h = dst->format().h;
3506 assert(lhs != nullptr);
3507 const int32_t lhs_w = lhs->format().w;
3508 const int32_t rhs_w = rhs->format().w;
3509
3510 if(op == BinaryOp::MatMul_Nt_T)
3511 {
3512 assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16));
3513 for(int32_t y = 0; y < dst_h; ++y)
3514 {
3515 for(int32_t x = 0; x < dst_w; ++x)
3516 {
3517 for(int32_t k = 0; k < lhs_w; ++k)
3518 {
3519 _data->code += dst->scalar(x, y).str;
3520 _data->code += " = fma(";
3521 _data->code += lhs->scalar(k, y).str;
3522 _data->code += ", ";
3523 _data->code += rhs->scalar(k, x).str;
3524 _data->code += ", ";
3525 _data->code += dst->scalar(x, y).str;
3526 _data->code += ");\n";
3527 }
3528 }
3529 }
3530
3531 return;
3532 }
3533
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003534 const bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1;
3535 const bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003536
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003537 const std::string lhs_prefix = broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
3538 const std::string rhs_prefix = broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
3539 const std::string op_str = to_string(op);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003540
3541 // Broadcasting on Y is automatic
3542 for(int32_t y = 0; y < dst_h; ++y)
3543 {
3544 _data->code += dst->vector(y).str;
3545 _data->code += " = ";
3546 _data->code += lhs_prefix + lhs->vector(y).str;
3547 _data->code += " ";
3548 _data->code += op_str;
3549 _data->code += " ";
3550 _data->code += rhs_prefix + rhs->vector(y).str;
3551 _data->code += ";\n";
3552 }
3553 };
3554
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003555 void op_cast_expression(const Operand &o_dst, const Operand &o_src, ConvertPolicy policy) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003556 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003557 OperandUnpacker operands(_data->tiles, _data->arguments);
3558 const IVectorTile *src = operands.unpack(o_src);
3559 const IVectorTile *dst = operands.unpack(o_dst);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003560
3561 // const int32_t dst_w = dst->format().w;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003562 const int32_t dst_h = dst->format().h;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003563 const std::string dt = dst->underlying_source_variables()[0].type.str;
3564 const std::string sat = (policy == ConvertPolicy::Saturate ? "_sat" : "");
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003565
3566 // Broadcasting on Y is automatic
3567 for(int32_t y = 0; y < dst_h; ++y)
3568 {
3569 _data->code += dst->vector(y).str;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003570 _data->code += " = convert_" + dt + sat + "(";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003571 _data->code += src->vector(y).str;
3572 _data->code += ");\n";
3573 }
3574 };
3575
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003576 void op_assign(const Operand &dst_name, const Operand &src_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003577 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003578 OperandUnpacker operands(_data->tiles, _data->arguments);
3579 const IVectorTile *src = operands.unpack(src_name);
3580 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003581
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003582 const int32_t dst_w = dst->format().w;
3583 const int32_t dst_h = dst->format().h;
3584 const int32_t src_w = src->format().w;
3585 const std::string dt = dst->underlying_source_variables()[0].type.str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003586
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003587 const bool broadcast_src_x = dst_w != 1 && src_w == 1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003588
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003589 const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003590
3591 // Broadcasting on Y is automatic
3592 for(int32_t y = 0; y < dst_h; ++y)
3593 {
3594 _data->code += dst->vector(y).str;
3595 _data->code += " = ";
3596 _data->code += src_prefix + src->vector(y).str;
3597 _data->code += ";\n";
3598 }
3599 }
3600
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003601 void
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003602 op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003603 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003604 OperandUnpacker operands(_data->tiles, _data->arguments);
3605 const IVectorTile *src = operands.unpack(src_name);
3606 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003607
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003608 const int32_t dst_w = dst->format().w;
3609 const int32_t dst_h = dst->format().h;
3610 const int32_t src_w = src->format().w;
3611 const std::string dt = dst->underlying_source_variables()[0].type.str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003612
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003613 const bool broadcast_src_x = dst_w != 1 && src_w == 1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003614
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003615 const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003616
3617 // Broadcasting on Y is automatic
3618 for(int32_t y = 0; y < dst_h; ++y)
3619 {
3620 _data->code += dst->vector(y).str;
3621 _data->code += " = ";
3622
3623 switch(func)
3624 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003625 case UnaryFunction::Exp:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003626 _data->code += "exp(";
3627 break;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003628 case UnaryFunction::Tanh:
3629 _data->code += "tanh(";
3630 break;
3631 case UnaryFunction::Sqrt:
3632 _data->code += "sqrt(";
3633 break;
3634 case UnaryFunction::Erf:
3635 _data->code += "erf(";
3636 break;
3637 case UnaryFunction::Fabs:
3638 _data->code += "fabs(";
3639 break;
3640 case UnaryFunction::IsGreaterEqual:
3641 _data->code += "isgreaterequal(";
3642 break;
3643 case UnaryFunction::Log:
3644 _data->code += "log(";
3645 break;
3646 case UnaryFunction::SizeOf:
3647 _data->code += "sizeof(";
3648 break;
3649 case UnaryFunction::Round:
3650 _data->code += "round(";
3651 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003652 default:
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003653 CKW_ASSERT_MSG(false, "Unexpected UnaryFunction used.");
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003654 }
3655
3656 _data->code += src_prefix + src->vector(y).str;
3657 _data->code += ");\n";
3658 }
3659 }
3660
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003661 void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003662 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003663 OperandUnpacker operands(_data->tiles, _data->arguments);
3664 const IVectorTile *first = operands.unpack(first_name);
3665 const IVectorTile *second = operands.unpack(second_name);
3666 const IVectorTile *dst = operands.unpack(dst_name);
3667
3668 const int32_t dst_w = dst->format().w;
3669 const int32_t dst_h = dst->format().h;
3670 const int32_t first_w = first->format().w;
3671 const int32_t second_w = second->format().w;
3672 const auto datatype = dst->underlying_source_variables()[0].type;
3673 const std::string datatype_str = datatype.str;
3674
3675 const bool broadcast_first_x = dst_w != 1 && first_w == 1;
3676 const bool broadcast_second_x = dst_w != 1 && second_w == 1;
3677
3678 const std::string first_prefix = broadcast_first_x ? "(" + datatype_str + ")" : "";
3679 const std::string second_prefix = broadcast_second_x ? "(" + datatype_str + ")" : "";
3680
3681 const bool is_float = (datatype.dt == DataType::Fp32 || datatype.dt == DataType::Fp16);
3682
3683 // Broadcasting on Y is automatic
3684 for(int32_t y = 0; y < dst_h; ++y)
3685 {
3686 _data->code += dst->vector(y).str;
3687 _data->code += " = ";
3688
3689 switch(func)
3690 {
3691 case BinaryFunction::Min:
3692 _data->code += is_float ? "fmin(" : "min(";
3693 break;
3694 case BinaryFunction::Max:
3695 _data->code += is_float ? "fmax(" : "max(";
3696 break;
3697 default:
3698 CKW_ASSERT_MSG(false, "Unexpected BinaryFunction used.");
3699 }
3700
3701 _data->code += first_prefix + first->vector(y).str;
3702 _data->code += ", ";
3703 _data->code += second_prefix + second->vector(y).str;
3704 _data->code += ");\n";
3705 }
3706 }
3707
3708 void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name) override
3709 {
3710 OperandUnpacker operands(_data->tiles, _data->arguments);
3711 const IVectorTile *first = operands.unpack(first_name);
3712 const IVectorTile *second = operands.unpack(second_name);
3713 const IVectorTile *third = operands.unpack(third_name);
3714 const IVectorTile *dst = operands.unpack(dst_name);
3715
3716 const int32_t dst_w = dst->format().w;
3717 const int32_t dst_h = dst->format().h;
3718 const int32_t first_w = first->format().w;
3719 const int32_t second_w = second->format().w;
3720 const int32_t third_w = third->format().w;
3721 const std::string dt = dst->underlying_source_variables()[0].type.str;
3722
3723 const bool broadcast_first_x = dst_w != 1 && first_w == 1;
3724 const bool broadcast_second_x = dst_w != 1 && second_w == 1;
3725 const bool broadcast_third_x = dst_w != 1 && third_w == 1;
3726
3727 const std::string first_prefix = broadcast_first_x ? "(" + dt + ")" : "";
3728 const std::string second_prefix = broadcast_second_x ? "(" + dt + ")" : "";
3729 const std::string third_prefix = broadcast_third_x ? "(" + dt + ")" : "";
3730
3731 // Broadcasting on Y is automatic
3732 for(int32_t y = 0; y < dst_h; ++y)
3733 {
3734 _data->code += dst->vector(y).str;
3735 _data->code += " = ";
3736
3737 switch(func)
3738 {
3739 case TernaryFunction::Select:
3740 _data->code += "select(";
3741 break;
3742 default:
3743 CKW_ASSERT_MSG(false, "Unexpected TernaryFunction used.");
3744 }
3745
3746 _data->code += first_prefix + first->vector(y).str;
3747 _data->code += ", ";
3748 _data->code += second_prefix + second->vector(y).str;
3749 _data->code += ", ";
3750 _data->code += third_prefix + third->vector(y).str;
3751 _data->code += ");\n";
3752 }
3753 }
3754
3755 void op_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override
3756 {
3757 OperandUnpacker operands(_data->tiles, _data->arguments);
3758 const IVectorTile *lhs = operands.unpack(o_lhs);
3759 const IVectorTile *rhs = operands.unpack(o_rhs);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003760
3761 assert(is_tile_scalar(lhs));
3762 assert(is_tile_scalar(rhs));
3763
3764 _data->code += "if(";
3765 _data->code += lhs->scalar(0, 0).str;
3766 _data->code += " ";
3767 _data->code += to_string(op);
3768 _data->code += " ";
3769 _data->code += rhs->scalar(0, 0).str;
3770 _data->code += ")\n";
3771 }
3772
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003773 void op_else_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003774 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003775 _data->code += "else ";
3776 op_if_header(o_lhs, op, o_rhs);
3777 }
3778
3779 void op_else_header() override
3780 {
3781 _data->code += "else\n";
3782 }
3783
3784 void op_for_loop_header(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value_name, AssignmentOp update_op, const Operand& update_value_name) override
3785 {
3786 OperandUnpacker operands(_data->tiles, _data->arguments);
3787 const IVectorTile *var = operands.unpack(var_name);
3788 const IVectorTile *cond_value = operands.unpack(cond_value_name);
3789 const IVectorTile *update_value = operands.unpack(update_value_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003790
3791 const int32_t dst_w = var->format().w;
3792 const int32_t dst_h = var->format().h;
3793
3794 // It must be a scalar variable
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01003795 CKW_UNUSED(dst_w, dst_h);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003796 assert(dst_w == 1);
3797 assert(dst_h == 1);
3798
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003799 _data->code += "for(; ";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003800 _data->code += var->scalar(0, 0).str;
3801 _data->code += " ";
3802 _data->code += to_string(cond_op);
3803 _data->code += " " + cond_value->scalar(0, 0).str + "; ";
3804 _data->code += var->scalar(0, 0).str;
3805 _data->code += " ";
3806 _data->code += to_string(update_op);
3807 _data->code += " " + update_value->scalar(0, 0).str + ")";
3808 _data->code += "\n";
3809 }
3810
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003811 void op_load_immediate(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x,
3812 const Operand &o_y, const Operand &o_z, const Operand &o_batch_idx,
3813 const Operand &dilation_y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003814 {
3815 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003816
3817 // Not const as it requires changes to 'load_writer'.
3818 IVectorTile *dst = operands.unpack(o_dst);
3819 IVectorTile *x = operands.unpack(o_x);
3820 IVectorTile *y = operands.unpack(o_y);
3821 IVectorTile *z = operands.unpack(o_z);
3822 IVectorTile *dil_y = operands.unpack(dilation_y);
3823 IVectorTile *b = operands.unpack(o_batch_idx);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003824
3825 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003826 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003827 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003828
3829 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3830
3831 auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
3832
3833 // Initialize the constant part
3834 load_writer->initialize(dst, x, z, b);
3835
3836 for(int i = 0; i < dst->format().h; ++i)
3837 {
3838 std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i);
3839 if(dil_y->scalar(0, 0).str != "1")
3840 {
3841 coord_y += " * " + dil_y->scalar(0, 0).str;
3842 }
3843 load_writer->write(std::make_pair(i, coord_y));
3844 }
3845
3846 load_writer->finalize();
3847 }
3848
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003849 void op_load_indirect(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x,
3850 const Operand &o_indirect_h, const Operand &o_z,
3851 const Operand &o_batch_idx) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003852 {
3853 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003854
3855 // Not const as it requires changes to 'load_writer'.
3856 IVectorTile *dst = operands.unpack(o_dst);
3857 IVectorTile *x = operands.unpack(o_x);
3858 IVectorTile *y_ind = operands.unpack(o_indirect_h);
3859 IVectorTile *z = operands.unpack(o_z);
3860 IVectorTile *b = operands.unpack(o_batch_idx);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003861
3862 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003863 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003864 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003865
3866 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3867
3868 auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
3869
3870 // Initialize the constant part
3871 load_writer->initialize(dst, x, z, b);
3872
3873 for(int i = 0; i < dst->format().h; ++i)
3874 {
3875 load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str));
3876 }
3877
3878 load_writer->finalize();
3879 }
3880
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003881 void op_store_immediate(const TensorOperand &tensor_name, const Operand &src_name, const Operand &x_name,
3882 const Operand &y_name, const Operand &z_name,
3883 const Operand &batch_index_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003884 {
3885 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003886
3887 // Not const as it requires changes to 'load_writer'.
3888 IVectorTile *src = operands.unpack(src_name);
3889 IVectorTile *x = operands.unpack(x_name);
3890 IVectorTile *y = operands.unpack(y_name);
3891 IVectorTile *z = operands.unpack(z_name);
3892 IVectorTile *b = operands.unpack(batch_index_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003893
3894 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003895 IGpuTensorArgument *tensor = tensor_operands.unpack(tensor_name);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003896 auto gpu_sampler = tensor_name.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003897
3898 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3899
3900 auto store_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Store);
3901
3902 // Initialize the constant part
3903 store_writer->initialize(src, x, z, b);
3904
3905 int32_t tile_h = src->format().h;
3906
3907 for(int m0 = tile_h - 1; m0 >= 0; m0--)
3908 {
3909 store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0)));
3910 }
3911
3912 store_writer->finalize();
3913 }
3914
3915 void op_return() override
3916 {
3917 _data->code += "return;\n";
3918 }
3919
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003920 void util_get_indirect_buffer(const Operand &o_dst, const TensorOperand &o_tensor, const Operand &o_x,
3921 const Operand &o_y, const Operand &o_x_off, const Operand &o_y_off) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003922 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003923 OperandUnpacker operands(_data->tiles, _data->arguments);
3924 const IVectorTile *dst = operands.unpack(o_dst);
3925 const IVectorTile *x = operands.unpack(o_x);
3926 const IVectorTile *y = operands.unpack(o_y);
3927 const IVectorTile *x_off = operands.unpack(o_x_off);
3928 const IVectorTile *y_off = operands.unpack(o_y_off);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003929
3930 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003931 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003932
3933 assert(dst->format().w == 1);
3934 assert(x->format().w == 1);
3935 assert(y->format().w == 1);
3936 assert(x_off->format().w == 1);
3937 assert(y_off->format().w == 1);
3938 assert(dst->format().dt == DataType::Int32);
3939 assert(x->format().dt == DataType::Int32);
3940 assert(y->format().dt == DataType::Int32);
3941 assert(x_off->format().dt == DataType::Int32);
3942 assert(y_off->format().dt == DataType::Int32);
3943
3944 const std::string width = tensor->component(TensorComponent::W);
3945 const std::string height = tensor->component(TensorComponent::H);
3946 const std::string wxh = tensor->component(TensorComponent::WxH);
3947 /*
3948 int x_s;
3949 int y_s;
3950 x_s = (xi_0 + x_k);
3951 y_s = (yi_0 + y_k);
3952 mi_0 = x_s + y_s * width + b * widthxheight;
3953 mi_0 = select(-1, mi_0, x_s >= 0);
3954 mi_0 = select(-1, mi_0, y_s >= 0);
3955 mi_0 = select(-1, mi_0, x_s < 128);
3956 mi_0 = select(-1, mi_0, y_s < 128);
3957 */
3958 compound_statement_begin();
3959 declare_tile("_x_s", TileInfo(DataType::Int32));
3960 declare_tile("_y_s", TileInfo(DataType::Int32));
3961 auto x_s = operands.unpack(Operand("_x_s"));
3962 auto y_s = operands.unpack(Operand("_y_s"));
3963 for(int i = 0; i < dst->format().h; ++i)
3964 {
3965 // x_s = (xi_0 + x_k);
3966 // y_s = (yi_0 + y_k);
3967 _data->code += x_s->scalar(0, i).str;
3968 _data->code += " = (";
3969 _data->code += x->scalar(0, i).str;
3970 _data->code += " + ";
3971 _data->code += x_off->scalar(0, i).str;
3972 _data->code += ");\n";
3973 _data->code += y_s->scalar(0, i).str;
3974 _data->code += " = (";
3975 _data->code += y->scalar(0, i).str;
3976 _data->code += " + ";
3977 _data->code += y_off->scalar(0, i).str;
3978 _data->code += ");\n";
3979 // mi_0 = x_s + y_s * width;
3980 _data->code += dst->scalar(0, i).str;
3981 _data->code += " = ";
3982 _data->code += x_s->scalar(0, i).str;
3983 _data->code += " + ";
3984 _data->code += y_s->scalar(0, i).str;
3985 _data->code += " * " + width + ";\n";
3986 // mi_0 = select(wxh, mi_0, x_s >= 0);
3987 _data->code += dst->scalar(0, i).str;
3988 _data->code += " = select(-1, ";
3989 _data->code += dst->scalar(0, i).str;
3990 _data->code += ", ";
3991 _data->code += x_s->scalar(0, i).str;
3992 _data->code += " >= 0);\n";
3993 // mi_0 = select(wxh, mi_0, y_s >= 0);
3994 _data->code += dst->scalar(0, i).str;
3995 _data->code += " = select(-1, ";
3996 _data->code += dst->scalar(0, i).str;
3997 _data->code += ", ";
3998 _data->code += y_s->scalar(0, i).str;
3999 _data->code += " >= 0);\n";
4000 // mi_0 = select(wxh, mi_0, x_s < width);
4001 _data->code += dst->scalar(0, i).str;
4002 _data->code += " = select(-1, ";
4003 _data->code += dst->scalar(0, i).str;
4004 _data->code += ", ";
4005 _data->code += x_s->scalar(0, i).str;
4006 _data->code += " < ";
4007 _data->code += width + ");\n";
4008 // mi_0 = select(wxh, mi_0, y_s < height);
4009 _data->code += dst->scalar(0, i).str;
4010 _data->code += " = select(-1, ";
4011 _data->code += dst->scalar(0, i).str;
4012 _data->code += ", ";
4013 _data->code += y_s->scalar(0, i).str;
4014 _data->code += " < ";
4015 _data->code += height + ");\n";
4016 }
4017 compound_statement_end();
4018 }
4019
4020private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004021 GpuKernelWriterDataHolder *_data{ nullptr };
4022 GpuKernelWriterAttribute *_attr{ nullptr };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004023};
4024
4025/** IGpuKernelWriter factory class */
4026class GpuKernelWriterFactory final
4027{
4028public:
4029 /** Static method to call the IGpuKernelWriter class accordingly with the Gpu programming language
4030 *
4031 * @param[in] gpu GPU target
4032 *
4033 * @return IGpuKernelWriter
4034 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004035 static std::unique_ptr<IGpuKernelWriter>
4036 create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004037 {
4038 switch(x->programming_language())
4039 {
4040 case GpuTargetLanguage::OpenCL:
4041 return std::make_unique<ClKernelWriter>(attr, x);
4042 default:
4043 std::cout << "Unsupported Gpu programming language" << std::endl;
4044 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01004045 return nullptr;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004046 }
4047 }
4048};
4049
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004050inline int32_t
4051adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004052{
4053 auto tensor = tensor_info_id->shape;
4054
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004055 int32_t dim[3] = { 0 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004056
4057 switch(tensor_format)
4058 {
4059 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004060 dim[0] = tensor[0];
4061 dim[1] = tensor[1];
4062 dim[2] = tensor[2];
4063 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004064 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004065 dim[0] = tensor[0];
4066 dim[1] = tensor[1] * tensor[2];
4067 dim[2] = 1;
4068 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004069 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004070 std::cout << "Unsupported tensor format" << std::endl;
4071 assert(false);
4072 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004073 }
4074
4075 return std::min(step, dim[idx]);
4076}
4077
4078} // namespace prototype
4079} // namespace ckw
4080
Viet-Hoa Doce3c48c2023-07-03 13:44:43 +01004081#endif // CKW_PROTOTYPE_SRC_PROTOTYPE_H