blob: 88d6e898e4a8c101cd20db630ff0fa0b974edc35 [file] [log] [blame]
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
Viet-Hoa Doce3c48c2023-07-03 13:44:43 +010025#ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H
26#define CKW_PROTOTYPE_SRC_PROTOTYPE_H
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010027
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010028#include <algorithm>
29#include <array>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010030#include <cassert> // assert (to be removed)
31#include <chrono>
32#include <cmath>
33#include <cstdint> // int32_t
Nikolaj Jensen5ff48022023-06-27 14:13:24 +010034#include <functional>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010035#include <iostream> // cout (to be removed)
36#include <map>
37#include <memory>
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010038#include <stdexcept>
Nikolaj Jensenacea4072023-07-03 09:44:42 +010039#include <string>
40#include <unordered_map>
41#include <vector>
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010042
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010043#include "ckw/Error.h"
Nikolaj Jensenacea4072023-07-03 09:44:42 +010044#include "ckw/TensorInfo.h"
Nikolaj Jensen5ff48022023-06-27 14:13:24 +010045#include "ckw/types/ConvertPolicy.h"
46#include "ckw/types/DataType.h"
47#include "ckw/types/Functions.h"
48#include "ckw/types/GpuTargetLanguage.h"
49#include "ckw/types/Operators.h"
50#include "ckw/types/TensorSamplerTypes.h"
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010051
52namespace ckw
53{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010054namespace prototype
55{
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010056
57// Dummy data structure for Size2D
58using Size2D = std::vector<int32_t>;
59
60// Dummy Status
61using Status = void;
62
63enum class ComponentType : int32_t
64{
65 Complex = 0,
66 Simple = 1,
67 Unfusable = 2
68};
69
70enum class GpuCompilationSpeed
71{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010072 Fast = 0x00, // fast compilation may increase the latency of the network
73 Slow = 0x01 // slow compilation may decrease the latency of the network
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010074};
75
76enum class GpuExtensions
77{
78 Fp16,
79 Dot8,
80 Mmul,
81 FastMath
82};
83
84struct TensorInfo
85{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010086 TensorShape shape{ { 0 } };
87 DataType data_type{ DataType::Unknown };
88 TensorDataLayout data_layout{ TensorDataLayout::Nhwc };
89 int32_t id{ -1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010090};
91
92struct ComponentAttribute
93{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010094 GpuCompilationSpeed compilation_speed{ GpuCompilationSpeed::Fast };
95 bool overwrite_tile{ true };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010096};
97
98inline std::string data_type_to_cl_type(DataType dt)
99{
100 switch(dt)
101 {
102 case DataType::Fp32:
103 return "float";
104 case DataType::Fp16:
105 return "half";
106 case DataType::Int8:
107 return "char";
108 case DataType::Uint8:
109 return "uchar";
110 case DataType::Uint16:
111 return "ushort";
112 case DataType::Int16:
113 return "short";
114 case DataType::Uint32:
115 return "uint";
116 case DataType::Int32:
117 return "int";
118 case DataType::Bool:
119 return "bool";
120 default:
121 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100122 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100123 }
124}
125
126inline int32_t width_to_cl_vector_size(int32_t width)
127{
128 switch(width)
129 {
130 case 1:
131 return 1;
132 case 2:
133 return 2;
134 case 3:
135 return 3;
136 case 4:
137 return 4;
138 case 5:
139 case 6:
140 case 7:
141 case 8:
142 return 8;
143 case 9:
144 case 10:
145 case 11:
146 case 12:
147 case 13:
148 case 14:
149 case 15:
150 case 16:
151 return 16;
152 default:
153 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100154 return 0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100155 }
156}
157
158inline std::string get_cl_data_type(DataType dt, int32_t width)
159{
160 std::string data_type;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100161 int32_t w = width_to_cl_vector_size(width);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100162 data_type += data_type_to_cl_type(dt);
163 if(w != 1)
164 {
165 data_type += std::to_string(w);
166 }
167 return data_type;
168}
169
170inline std::string to_opencl_store(int32_t vector_length)
171{
172 if(vector_length != 1)
173 {
174 return "vstore" + std::to_string(vector_length) + "(";
175 }
176 else
177 {
178 return "*(";
179 }
180}
181
182struct TileInfo
183{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100184 TileInfo()
185 {
186 }
187
188 TileInfo(DataType dt)
189 : dt(dt), w(1), h(1)
190 {
191 }
192
193 TileInfo(DataType dt, int32_t width)
194 : dt(dt), w(width), h(1)
195 {
196 }
197
198 TileInfo(DataType dt, int32_t width, int32_t height)
199 : dt(dt), w(width), h(height)
200 {
201 }
202
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100203 DataType dt{ DataType::Unknown }; // Data type of the tile
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100204 int32_t w{ 0 }; // Width (i.e. c0 - portion of the channels)
205 int32_t h{ 0 }; // Height (i.e. s0 - portion of the spatial dimensions)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100206};
207
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100208inline std::ostream &operator<<(std::ostream &o, const TileInfo &a)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100209{
210 o << a.w << " x " << a.h;
211 return o;
212}
213
214struct DataTypeAsString
215{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100216 std::string str{ "" };
217 DataType dt{ DataType::Unknown };
218 int32_t size{ 1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100219};
220
221struct ValueAsString
222{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100223 std::string str{ "" };
224 DataTypeAsString type{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100225};
226
227// https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c
228// A Tile is a collection of variables used to express a 2D data.
229class IScalarTile
230{
231public:
232 virtual ~IScalarTile() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100233
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100234 /** Method to get the scalar variable from a tile
235 * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
236 * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
237 *
238 * @return the scalar variable as a string
239 */
240 virtual ValueAsString scalar(int32_t x, int32_t y) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100241
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100242 /** Method to get the list of underlying variable names used by the tile
243 *
244 * @return the list of variable names
245 */
246 virtual std::vector<ValueAsString> underlying_source_variables() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100247
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100248 /** Method to get the name of the tile.
249 *
250 * @return the name of the tile
251 */
252 std::string name() const
253 {
254 return _basename;
255 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100256
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100257 /** Method to get the tile format
258 *
259 * @return the format
260 */
261 TileInfo format() const
262 {
263 return _format;
264 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100265
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100266 /** Method to know whether the tile is assignable or not (constant)
267 *
268 * @return true if the tile is assignable
269 */
270 virtual bool is_assignable() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100271
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100272 /** Method to know whether the tile needs to be declared
273 *
274 * @return true if the tile needs to be declared in the code before being used
275 */
276 virtual bool need_declaration() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100277
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100278protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100279 TileInfo _format{}; // Tile format
280 std::string _basename{ "" }; // Tile name
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100281};
282
283// A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context.
284// The vector size is given by the width of the tile. The number of vectors height by depth defines the number of vectors
285class IVectorTile : public IScalarTile
286{
287public:
288 virtual ~IVectorTile() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100289
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100290 /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
291 * The user can query the list of supported width for the vectors through preferred_vector_sizes().
292 *
293 * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
294 *
295 * @return the vector variable as a string
296 */
297 virtual ValueAsString vector(int32_t y) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100298
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100299 /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
300 *
301 * @return the vector variable as a string
302 */
303 virtual ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const = 0;
304 /** Method to get the preferred vector sizes.
305 *
306 * @return a vector with the preferred vector sizes
307 */
308 //virtual std::vector<int32_t> preferred_vector_sizes() const = 0;
309};
310
311class ClTile : public IVectorTile
312{
313public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100314 ClTile(const std::string &name, TileInfo format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100315 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100316 _format = format;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100317 _basename = name;
318 }
319
320 ValueAsString scalar(int32_t x, int32_t y) const override
321 {
322 x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
323 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
324
325 ValueAsString t;
326 t.str = build_variable_name(y);
327 t.type.str = get_cl_data_type(_format.dt, 1);
328 t.type.dt = _format.dt;
329 t.type.size = 1;
330
331 // Check required because if the width has only one element, we cannot use .s0
332 if(_format.w != 1)
333 {
334 // Automatic broadcasting
335 t.str += ".s" + std::to_string(x);
336 }
337
338 return t;
339 }
340
341 ValueAsString vector(int32_t y) const override
342 {
343 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
344
345 ValueAsString t;
346 t.str = build_variable_name(y);
347 t.type.str = get_cl_data_type(_format.dt, _format.w);
348 t.type.dt = _format.dt;
349 t.type.size = _format.w;
350 return t;
351 }
352
353 ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
354 {
355 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
356
357 ValueAsString t;
358 t.str = build_variable_name(y);
359 t.type.str = get_cl_data_type(_format.dt, width);
360 t.type.dt = _format.dt;
361 t.type.size = width;
362
363 if(_format.w != 1)
364 {
365 t.str += ".s";
366 for(int i = 0; i < width; ++i)
367 {
368 t.str += to_scalar_hex(x_start + i);
369 }
370 }
371 return t;
372 }
373
374 std::vector<ValueAsString> underlying_source_variables() const override
375 {
376 std::vector<ValueAsString> vars;
377 for(int32_t y = 0; y < _format.h; ++y)
378 {
379 ValueAsString t;
380 t.str = build_variable_name(y);
381 t.type.str = get_cl_data_type(_format.dt, _format.w);
382 t.type.dt = _format.dt;
383 t.type.size = _format.w;
384 vars.push_back(t);
385 }
386 return vars;
387 }
388
389 bool is_assignable() const override
390 {
391 return true;
392 }
393
394 bool need_declaration() const override
395 {
396 return true;
397 }
398
399private:
400 std::string build_variable_name(int32_t y) const
401 {
402 std::string var_name = _basename;
403
404 if(_format.h == 1)
405 {
406 return var_name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100407 }
408 else
409 {
410 var_name += "_";
411 var_name += std::to_string(y);
412 }
413
414 return var_name;
415 }
416
417 std::string to_scalar_hex(int32_t x) const
418 {
419 switch(x)
420 {
421 case 0:
422 case 1:
423 case 2:
424 case 3:
425 case 4:
426 case 5:
427 case 6:
428 case 7:
429 case 8:
430 case 9:
431 return std::to_string(x);
432 case 10:
433 return "A";
434 case 11:
435 return "B";
436 case 12:
437 return "C";
438 case 13:
439 return "D";
440 case 14:
441 return "E";
442 case 15:
443 return "F";
444 default:
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100445 std::cout << "Unsupported hexadecimal value" << std::endl;
446 assert(false);
447 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100448 }
449 }
450};
451
452// Unique features: It contains values in the form of string. The name used for this object is misleading since the variables can change the value over time.
453class ClConstantTile : public IVectorTile
454{
455public:
456 ClConstantTile(const std::vector<std::vector<std::string>> &in, DataType dt)
457 {
458 _format.w = in[0].size();
459 _format.h = in.size();
460 _format.dt = dt;
461
462 _data = std::vector<std::vector<std::string>>(_format.h, std::vector<std::string>(_format.w));
463
464 for(int32_t y = 0; y < _format.h; ++y)
465 {
466 for(int32_t x = 0; x < _format.w; ++x)
467 {
468 _data[y][x] = in[y][x];
469 }
470 }
471 }
472
473 ValueAsString scalar(int32_t x, int32_t y) const override
474 {
475 x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
476 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
477
478 ValueAsString t;
479 t.str = _data[y][x];
480 t.type.str = get_cl_data_type(_format.dt, 1);
481 t.type.dt = _format.dt;
482 t.type.size = 1;
483
484 return t;
485 }
486
487 ValueAsString vector(int32_t y) const override
488 {
489 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
490
491 return vector(0, _format.w, y);
492 }
493
494 ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
495 {
496 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
497
498 ValueAsString t;
499 t.str = "";
500 t.type.str = get_cl_data_type(_format.dt, width);
501 t.type.dt = _format.dt;
502 t.type.size = width;
503
504 if(width > 1)
505 {
506 t.str += "((" + get_cl_data_type(_format.dt, width) + ")(";
507 }
508
509 int32_t x = x_start;
510 for(; x < width - 1; ++x)
511 {
512 t.str += scalar(x, y).str;
513 t.str += ", ";
514 }
515 t.str += scalar(x, y).str;
516
517 if(width > 1)
518 {
519 t.str += "))";
520 }
521
522 return t;
523 }
524
525 std::vector<ValueAsString> underlying_source_variables() const override
526 {
527 std::vector<ValueAsString> vars;
528
529 for(int32_t y = 0; y < _format.h; ++y)
530 {
531 for(int32_t x = 0; x < _format.w; ++x)
532 {
533 ValueAsString t;
534 t.str = _data[y][x];
535 t.type.str = get_cl_data_type(_format.dt, 1);
536 t.type.dt = _format.dt;
537 t.type.size = 1;
538 vars.push_back(t);
539 }
540 }
541
542 return vars;
543 }
544
545 bool is_assignable() const override
546 {
547 return false;
548 }
549
550 bool need_declaration() const override
551 {
552 return false;
553 }
554
555private:
556 std::vector<std::vector<std::string>> _data{};
557};
558
559enum class TensorComponentIndex : int32_t
560{
561 IndexMask = 0x0000000f,
562};
563
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100564enum class TensorComponentGroup : int32_t
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100565{
566 OffsetFirstElement = 0x00000100,
567 Stride = 0x00001000,
568 Dimension = 0x00010000,
569 FoldedDimension = 0x00100000,
570 Constant = 0x01000000
571};
572
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100573inline std::string to_string(TensorComponentType x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100574{
575 switch(x)
576 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100577 case TensorComponentType::Unknown:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100578 return "Unknown";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100579 case TensorComponentType::OffsetFirstElement:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100580 return "OffsetFirstElement";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100581 case TensorComponentType::Stride1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100582 return "Stride1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100583 case TensorComponentType::Stride2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100584 return "Stride2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100585 case TensorComponentType::Stride3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100586 return "Stride3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100587 case TensorComponentType::Stride4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100588 return "Stride4";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100589 case TensorComponentType::Dim0:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100590 return "Dim0";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100591 case TensorComponentType::Dim1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100592 return "Dim1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100593 case TensorComponentType::Dim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100594 return "Dim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100595 case TensorComponentType::Dim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100596 return "Dim3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100597 case TensorComponentType::Dim4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100598 return "Dim4";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100599 case TensorComponentType::Dim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100600 return "Dim1xDim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100601 case TensorComponentType::Dim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100602 return "Dim1xDim2xDim3";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100603 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100604 assert(false);
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100605 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100606 }
607}
608
609class ITensorArgument
610{
611public:
612 virtual ~ITensorArgument() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100613
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100614 /** Method to get the tensor component as a string
615 *
616 * @param[in] x tensor component to query
617 *
618 * @return the tensor component as a string
619 */
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100620 virtual std::string component(TensorComponentType x) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100621
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100622 /** Method to get the tensor component type declaration as a string
623 *
624 * @return the tensor component type declaration as a string
625 */
626 virtual std::string component_type_declaration() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100627
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100628 /** Method to get the tensor component data type
629 *
630 * @return the tensor component data type
631 */
632 virtual DataType component_data_type() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100633
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100634 /** Method to get the tensor component declarations
635 *
636 * @return a vector containing the tensor component declarations
637 */
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100638 virtual std::vector<TensorComponentType> component_declarations() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100639
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100640 /** Method to get the name of the tensor argument.
641 *
642 * @return the name of the tensor argument
643 */
644 std::string name() const
645 {
646 return _basename;
647 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100648
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100649 /** Method to get the tensor format
650 *
651 * @return the format
652 */
653 TensorInfo format() const
654 {
655 return _format;
656 }
657
658protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100659 TensorInfo _format{};
660 std::string _basename{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100661};
662
663enum class GpuTensorStorage : int32_t
664{
665 Unknown = 0x0000,
666 BufferUint8Ptr = 0x0012,
667 Image2dReadOnly = 0x0020,
668 Image2dWriteOnly = 0x0021,
669 Image3dReadOnly = 0x0030,
670 Image3dWriteOnly = 0x0031
671};
672
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100673inline GpuTensorStorage to_gpu_tensor_storage(TensorStorageType s)
674{
675 switch(s)
676 {
677 case TensorStorageType::Unknown:
678 return GpuTensorStorage::Unknown;
679
680 case TensorStorageType::BufferUint8Ptr:
681 return GpuTensorStorage::BufferUint8Ptr;
682
683 case TensorStorageType::Texture2dReadOnly:
684 return GpuTensorStorage::Image2dReadOnly;
685
686 case TensorStorageType::Texture2dWriteOnly:
687 return GpuTensorStorage::Image2dWriteOnly;
688
689 default:
690 assert(false);
691 return GpuTensorStorage::Unknown;
692 }
693}
694
695inline TensorStorageType to_tensor_storage(GpuTensorStorage s)
696{
697 switch(s)
698 {
699 case GpuTensorStorage::Unknown:
700 return TensorStorageType::Unknown;
701
702 case GpuTensorStorage::BufferUint8Ptr:
703 return TensorStorageType::BufferUint8Ptr;
704
705 case GpuTensorStorage::Image2dReadOnly:
706 return TensorStorageType::Texture2dReadOnly;
707
708 case GpuTensorStorage::Image2dWriteOnly:
709 return TensorStorageType::Texture2dWriteOnly;
710
711 default:
712 assert(false);
713 return TensorStorageType::Unknown;
714 }
715}
716
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100717class IGpuTensorArgument : public ITensorArgument
718{
719public:
720 virtual ~IGpuTensorArgument() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100721
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100722 /** Method to get the tensor storage, which is the underlying storage used to keep the data memory
723 *
724 * @param[in] x tensor storage to query
725 *
726 * @return the tensor storage as a string
727 */
728 virtual std::string storage(GpuTensorStorage x) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100729
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100730 /** Method to get the tensor storage type declaration as a string
731 *
732 * @param[in] x tensor component to query
733 *
734 * @return the tensor storage type declaration as a string
735 */
736 virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100737
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100738 /** Method to get the tensor storage declarations
739 *
740 * @return a vector containing the tensor storage declarations
741 */
742 virtual std::vector<GpuTensorStorage> storage_declarations() const = 0;
743};
744
745class ClTensorArgument : public IGpuTensorArgument
746{
747public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100748 ClTensorArgument(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100749 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100750 _basename = name;
751 _format = x;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100752 _return_by_value_when_possible = return_by_value_when_possible;
753 }
754
755 // Methods to override
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100756 std::string component(TensorComponentType x) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100757 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100758 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Constant)))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100759 {
760 int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
761 return std::to_string(idx - 1);
762 }
763
764 if(_return_by_value_when_possible)
765 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100766 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Dimension)))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100767 {
768 int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
769 return std::to_string(_format.shape[idx]);
770 }
771
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100772 if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::FoldedDimension)))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100773 {
774 switch(x)
775 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100776 case TensorComponentType::Dim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100777 return std::to_string(_format.shape[1] * _format.shape[2]);
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100778 case TensorComponentType::Dim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100779 return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100780 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100781 std::cout << "Unsupported folded dimension" << std::endl;
782 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100783 }
784 }
785 }
786
787 if(std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end())
788 {
789 _components_required.push_back(x);
790 }
791
792 return build_component_name(x);
793 }
794
795 std::string component_type_declaration() const override
796 {
797 return "int";
798 };
799
800 DataType component_data_type() const override
801 {
802 return DataType::Int32;
803 }
804
805 std::string storage(GpuTensorStorage x) override
806 {
807 if(std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end())
808 {
809 _storage_required.push_back(x);
810 }
811
812 return build_storage_name(x);
813 }
814
815 std::string storage_type_declaration(GpuTensorStorage x) const override
816 {
817 switch(x)
818 {
819 case GpuTensorStorage::BufferUint8Ptr:
820 return "__global uchar*";
821 case GpuTensorStorage::Image2dReadOnly:
822 return "__read_only image2d_t";
823 case GpuTensorStorage::Image2dWriteOnly:
824 return "__write_only image2d_t";
825 case GpuTensorStorage::Image3dReadOnly:
826 return "__read_only image3d_t ";
827 case GpuTensorStorage::Image3dWriteOnly:
828 return "__write_only image3d_t ";
829 default:
830 std::cout << "Unsupported storage" << std::endl;
831 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100832 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100833 }
834 };
835
836 std::vector<GpuTensorStorage> storage_declarations() const override
837 {
838 return _storage_required;
839 }
840
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100841 std::vector<TensorComponentType> component_declarations() const override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100842 {
843 return _components_required;
844 }
845
846private:
847 std::string build_storage_name(GpuTensorStorage x) const
848 {
849 std::string var_name = _basename;
850
851 switch(x)
852 {
853 case GpuTensorStorage::BufferUint8Ptr:
854 return var_name + "_ptr";
855 case GpuTensorStorage::Image2dReadOnly:
856 case GpuTensorStorage::Image2dWriteOnly:
857 return var_name + "_img2d";
858 case GpuTensorStorage::Image3dReadOnly:
859 case GpuTensorStorage::Image3dWriteOnly:
860 return var_name + "_img3d";
861 default:
862 std::cout << "Unsupported storage" << std::endl;
863 assert(false);
864 }
865
866 return var_name;
867 }
868
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100869 std::string build_component_name(TensorComponentType x) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100870 {
871 std::string var_name = _basename;
872
873 switch(x)
874 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100875 case TensorComponentType::OffsetFirstElement:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100876 return var_name + "_offset_first_element";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100877 case TensorComponentType::Stride1:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100878 return var_name + "_stride1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100879 case TensorComponentType::Stride2:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100880 return var_name + "_stride2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100881 case TensorComponentType::Stride3:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100882 return var_name + "_stride3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100883 case TensorComponentType::Dim0:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100884 return var_name + "_dim0";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100885 case TensorComponentType::Dim1:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100886 return var_name + "_dim1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100887 case TensorComponentType::Dim2:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100888 return var_name + "_dim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100889 case TensorComponentType::Dim3:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100890 return var_name + "_dim3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100891 case TensorComponentType::Dim1xDim2:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100892 return var_name + "_dim1xdim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100893 case TensorComponentType::Dim1xDim2xDim3:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100894 return var_name + "_dim1xdim2xdim3";
895 default:
896 std::cout << "Unsupported component" << std::endl;
897 assert(false);
898 }
899
900 return var_name;
901 }
902
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100903 bool _return_by_value_when_possible{ false };
904 std::vector<GpuTensorStorage> _storage_required{};
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100905 std::vector<TensorComponentType> _components_required{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100906};
907
908/**
909 * @brief Data structure that contains the declared tiles by the components.
910 * The registry is a linear data structure that follows the similar principle of the stack. The user can use the @p increment_registry_level() method to
911 * increase the level of the stack (0 when it starts). When the user uses the @p decrement_registry_level() method, the registry decreases the level of the stack
912 * and remove (pop) all the tiles from the level above.
913 * When a tile is declared on the level 0, it is a global tile. A global tile is visible in all parts of the code.
914 * Since different components may use the same name to define a tile, the registry adopts the IdSpace concept, an @p id to prevent name collisions
915 * when declaring tiles among different components.
916 *
917 */
918class GpuTileRegistry
919{
920public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100921 enum class RegistryTileType
922 {
923 Tile,
924 Link
925 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100926
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100927 using RegistryIdSpace = int32_t;
928 using RegistryLevel = int32_t;
929 using RegistryTileName = std::string;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100930
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100931 struct RegistryTileTableEntry
932 {
933 RegistryLevel registry_level{ 0 };
934 std::unique_ptr<IVectorTile> tile_object{ nullptr };
935 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100936
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100937 struct RegistryTileTypeTableEntry
938 {
939 RegistryTileType tile_type{ RegistryTileType::Tile };
940 RegistryTileName tile_name{};
941 RegistryIdSpace registry_idspace{ 0 };
942 RegistryLevel registry_level{ 0 };
943 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100944
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100945 using RegistryTileTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTableEntry>>;
946 using RegistryTileTypeTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTypeTableEntry>>;
947
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100948 /**
949 * @brief Construct a new Gpu Tile Registry object
950 *
951 */
952 GpuTileRegistry()
953 {
954 _language = GpuTargetLanguage::Unknown;
955 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100956
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100957 /**
958 * @brief Construct a new Gpu Tile Registry object providing the Gpu programming language
959 *
960 * @param[in] language Gpu programming language to use
961 */
962 GpuTileRegistry(GpuTargetLanguage language)
963 {
964 _language = language;
965 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100966
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100967 /**
968 * @brief Default destructor. Destroy the Gpu Tile Registry object
969 *
970 */
971 ~GpuTileRegistry() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100972
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100973 /**
974 * @brief Set the working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles.
975 * Therefore, the IdSpace should be set before declaring any tiles.
976 *
977 * @param[in] id The IdSpace id
978 */
979 void set_IdSpace(int32_t id)
980 {
981 _IdSpace = id;
982 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100983
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100984 /**
985 * @brief Get the current working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles
986 *
987 * @return The IdSpace id
988 */
989 int32_t IdSpace() const
990 {
991 return _IdSpace;
992 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100993
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100994 /**
995 * @brief Gets all the IdSpace declarations defined in the tile registry.
996 *
997 * @return all the IdSpace declarations defined in the tile registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
998 */
999 std::vector<int32_t> IdSpace_declarations() const
1000 {
1001 std::vector<int32_t> x;
1002
1003 auto it = _frags.begin();
1004
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001005 while(it != _frags.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001006 {
1007 x.push_back(it->first);
1008
1009 it++;
1010 }
1011
1012 return x;
1013 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001014
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001015 /**
1016 * @brief Declare a tile from a previously created tile
1017 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001018 void insert(const std::string &name, const IVectorTile *frag)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001019 {
1020 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001021 const int32_t key_IdSpace = _IdSpace;
1022 const std::string key_var_name = name;
1023 const std::string var_name = frag->name();
1024 TileInfo format = frag->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001025
1026 // First check whether a tile with the same name exists
1027 IVectorTile *result = (*this)[key_var_name];
1028 assert(result == nullptr);
1029 if(result == nullptr)
1030 {
1031 std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
1032
1033 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1034 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1035
1036 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Link;
1037 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1038 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1039 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1040 }
1041 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001042
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001043 /**
1044 * @brief Declare a tile with TileInfo. The tile will be stored in the IdSpace set with @p set_IdSpace()
1045 *
1046 * @note The reference name used for declaring the tile should not be previously used in the IdSpace
1047 *
1048 * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
1049 * @param[in] format Tile format use to use
1050 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001051 void insert(const std::string &name, const TileInfo &format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001052 {
1053 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001054 const int32_t key_IdSpace = _IdSpace;
1055 const std::string key_var_name = name;
1056 const std::string var_name = generate_tile_name(name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001057
1058 // First check whether a tile with the same name exists
1059 IVectorTile *result = (*this)[key_var_name];
1060 assert(result == nullptr);
1061 if(result == nullptr)
1062 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001063 std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001064 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1065 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1066
1067 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1068 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1069 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1070 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1071 }
1072 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001073
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001074 /**
1075 * @brief Declare a constant tile. The content of the tile is passed as a vector of std::string
1076 *
1077 * @note The reference name used for declaring the tile should not be previously used in the IdSpace
1078 *
1079 * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
1080 * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
1081 * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user's responsibilty to ensure
1082 * that the data type is aligned with the content of the std::string.
1083 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001084 void insert(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001085 {
1086 assert(_language == GpuTargetLanguage::OpenCL);
1087 const int32_t key_IdSpace = _IdSpace;
1088 const std::string key_var_name = name;
1089
1090 // First check whether a tile with the same name exists
1091 IVectorTile *result = (*this)[key_var_name];
1092 assert(result == nullptr);
1093 if(result == nullptr)
1094 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001095 std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001096 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1097 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1098
1099 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1100 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1101 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1102 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1103 }
1104 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001105
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001106 /**
1107 * @brief Declare an anonymous constant tile. The content of the tile is passed as a vector of std::string
1108 *
1109 * @note This method can be used to declare temporary tiles that need to be accessed only once.
1110 *
1111 * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
1112 * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user responsibilty to ensure
1113 * that the data type is aligned with what passed with the std::string.
1114 *
1115 * @return IVectorTile* the anonymous constant tile
1116 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001117 IVectorTile *insert(const std::vector<std::vector<std::string>> &in, DataType dt)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001118 {
1119 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001120 const int32_t key_IdSpace = _IdSpace;
1121 const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001122
1123 // First check whether a tile with the same name exists
1124 IVectorTile *result = (*this)[key_var_name];
1125 assert(result == nullptr);
1126 if(result == nullptr)
1127 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001128 std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001129 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1130 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1131
1132 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1133 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1134 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1135 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1136 }
1137
1138 return (*this)[key_var_name];
1139 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001140
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001141 /**
1142 * @brief Get the tile from the registry. This method searches the tile in the IdSpace provided by the user
1143 *
1144 * @param[in] name The name of the tile to retrieve
1145 * @param[in] IdSpace The IdSpace id where to search the tile
1146 *
1147 * @return IVectorTile* The tile
1148 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001149 IVectorTile *get(const std::string &name, int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001150 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001151 const int32_t key_IdSpace = IdSpace;
1152 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001153
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001154 IVectorTile *result = nullptr;
1155 auto search_IdSpace = _frags.find(key_IdSpace);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001156 if(search_IdSpace != _frags.end())
1157 {
1158 auto search_tile = _frags[key_IdSpace].find(key_var_name);
1159 if(search_tile != _frags[key_IdSpace].end())
1160 {
1161 result = search_tile->second.tile_object.get();
1162 assert(result != nullptr);
1163 }
1164 }
1165
1166 return result;
1167 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001168
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001169 /**
1170 * @brief Get the tile from the registry. This method searches the tile in the IdSpace set with @p set_IdSpace()
1171 *
1172 * @param[in] name The name of the tile to retrieve
1173 *
1174 * @return IVectorTile* The tile
1175 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001176 IVectorTile *operator[](const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001177 {
1178 return get(name, _IdSpace);
1179 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001180
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001181 /**
1182 * @brief Check whether the tile in the in the IdSpace provided by the user exists
1183 *
1184 * @param[in] name Name of the tile to search for
1185 * @param[in] IdSpace The IdSpace id where to search the tile
1186 *
1187 * @return true if the tile exists
1188 * @return false if the tile does not exist
1189 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001190 bool has_tile(const std::string &name, int32_t IdSpace) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001191 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001192 const int32_t key_IdSpace = IdSpace;
1193 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001194
1195 // IVectorTile* result = nullptr;
1196 auto search_IdSpace = _frags.find(key_IdSpace);
1197
1198 return search_IdSpace != _frags.end();
1199 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001200
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001201 /**
1202 * @brief Check whether the tile within the current IdSpace exists
1203 *
1204 * @param[in] name Name of the tile to search for
1205 *
1206 * @return true if the tile exists
1207 * @return false if the tile does not exist
1208 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001209 bool has_tile(const std::string &name) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001210 {
1211 return has_tile(name, _IdSpace);
1212 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001213
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001214 /**
1215 * @brief Get all the tiles declared within the IdSpace provided by the user
1216 *
1217 * @param[in] IdSpace IdSpace where to retrieve all the declared tiles
1218 *
1219 * @return std::vector<IVectorTile*> A vector with all the declared tiles in the IdSpace provided by the user
1220 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001221 std::vector<IVectorTile *> tile_declarations(int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001222 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001223 std::vector<IVectorTile *> tiles;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001224
1225 std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it = _frag_types[IdSpace].begin();
1226
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001227 while(it != _frag_types[IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001228 {
1229 // The following line should be enabled. However, we cannot at this stage
1230 // because it used to retrieve the output tile produced by each component.
1231 // However, this method should NOT be used to retrieve the output tile
1232 //if(it->second.tile_type == RegistryTileType::Tile)
1233 {
1234 tiles.push_back(get(it->second.tile_name, it->second.registry_idspace));
1235 }
1236 it++;
1237 }
1238
1239 return tiles;
1240 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001241
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001242 /**
1243 * @brief Increase the level of stack.
1244 *
1245 */
1246 void increment_registry_level()
1247 {
1248 _registry_level++;
1249 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001250
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001251 /**
1252 * @brief Remove all the tiles declared at the current stack level and decrease the level of the stack.
1253 *
1254 */
1255 void decrement_registry_level()
1256 {
1257 assert(_registry_level >= 0);
1258
1259 // Remove all variables in the local scope
1260 std::map<RegistryTileName, RegistryTileTableEntry>::iterator it = _frags[_IdSpace].begin();
1261
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001262 while(it != _frags[_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001263 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001264 if(it->second.registry_level == _registry_level)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001265 {
1266 it = _frags[_IdSpace].erase(it);
1267 }
1268 else
1269 {
1270 it++;
1271 }
1272 }
1273
1274 std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it_type = _frag_types[_IdSpace].begin();
1275
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001276 while(it_type != _frag_types[_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001277 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001278 if(it_type->second.registry_level == _registry_level)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001279 {
1280 it_type = _frag_types[_IdSpace].erase(it_type);
1281 }
1282 else
1283 {
1284 it_type++;
1285 }
1286 }
1287
1288 _registry_level--;
1289 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001290
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001291 /**
1292 * @brief Get the level of the stack
1293 *
1294 */
1295 int32_t level() const
1296 {
1297 return _registry_level;
1298 }
1299
1300private:
1301 // This method ensures that the key is unique among different components
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001302 std::string generate_tile_name(const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001303 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001304 assert(_IdSpace >= 0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001305 if(_registry_level == 0)
1306 {
1307 return "_G" + std::to_string(_IdSpace) + "_" + name;
1308 }
1309 else
1310 {
1311 return name;
1312 }
1313 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001314
1315 RegistryTileTable _frags{};
1316 RegistryTileTypeTable _frag_types{};
1317 RegistryLevel _registry_level{ 0 };
1318 RegistryIdSpace _IdSpace{ -1 };
1319 int32_t _anonymous_frag_count{ 0 }; // Counter used to create the anonymous tiles
1320 GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001321};
1322
1323using TensorEntry = std::unique_ptr<IGpuTensorArgument>;
1324
1325/**
1326 * @brief Data structure that contains the tensors consumed by the components.
1327 * Since different components may use the same name as reference for a tensor, the registry adopts the IdSpace concept, an @p id to prevent name collisions
1328 * when declaring tensors among different components.
1329 *
1330 */
1331class GpuTensorArgumentRegistry
1332{
1333public:
1334 /**
1335 * @brief Construct a new Gpu Tensor Registry object
1336 *
1337 */
1338 GpuTensorArgumentRegistry()
1339 {
1340 _language = GpuTargetLanguage::Unknown;
1341 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001342
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001343 /**
1344 * @brief Construct a new Gpu Tensor Registry object
1345 *
1346 * @param[in] language Gpu programming language to use
1347 */
1348 GpuTensorArgumentRegistry(GpuTargetLanguage language)
1349 {
1350 _language = language;
1351 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001352
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001353 /**
1354 * @brief Default destructor. Destroy the Gpu Tensor Registry object
1355 *
1356 */
1357 ~GpuTensorArgumentRegistry() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001358
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001359 /**
1360 * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors.
1361 * Therefore, the IdSpace should be set before declaring any tensors.
1362 *
1363 * @param[in] id The IdSpace id
1364 */
1365 void set_IdSpace(int32_t id)
1366 {
1367 _IdSpace = id;
1368 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001369
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001370 /**
1371 * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors
1372 *
1373 * @return The IdSpace id
1374 */
1375 int32_t IdSpace() const
1376 {
1377 return _IdSpace;
1378 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001379
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001380 /**
1381 * @brief Gets all the IdSpace declarations defined in the tensor registry.
1382 *
1383 * @return all the IdSpace declarations defined in the tensor registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
1384 */
1385 std::vector<int32_t> IdSpace_declarations() const
1386 {
1387 std::vector<int32_t> x;
1388
1389 auto it = _refs.begin();
1390
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001391 while(it != _refs.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001392 {
1393 x.push_back(it->first);
1394
1395 it++;
1396 }
1397
1398 return x;
1399 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001400
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001401 /**
1402 * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace()
1403 *
1404 * @note The reference name used for declaring the tensor should not be previously used in the IdSpace
1405 *
1406 * @param[in] name Reference name for the tensor. The reference name can be used to retrieve the tensor stored in the registry.
1407 * @param[in] x Pair of tensor info and tensor id
1408 * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components
1409 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001410 void insert(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001411 {
1412 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001413 const int32_t key_IdSpace = _IdSpace;
1414 const int32_t tensor_id = x.id;
1415 const std::string key_var_name = name;
1416 const std::string var_name = generate_tensor_name(name, tensor_id);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001417
1418 // First, check whether the tensor has already a reference. If so, trigger an assert
1419 assert(!has_tensor_argument(name));
1420
1421 // Check whether a tensor with that tensorID exists
1422 auto result = _tensor_arguments.find(tensor_id);
1423 if(result == _tensor_arguments.end())
1424 {
1425 // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001426 std::unique_ptr<ClTensorArgument> arg = std::make_unique<ClTensorArgument>(var_name, x,
1427 return_by_value_when_possible);
1428 _tensor_arguments[tensor_id] = std::move(arg);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001429 }
1430
1431 _refs[key_IdSpace][key_var_name] = tensor_id;
1432 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001433
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001434 /**
1435 * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace()
1436 *
1437 * @param[in] name The name of the tensor to retrieve
1438 *
1439 * @return IGpuTensor* The tensor
1440 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001441 IGpuTensorArgument *operator[](const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001442 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001443 const int32_t key_IdSpace = _IdSpace;
1444 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001445
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001446 IGpuTensorArgument *result = nullptr;
1447 auto search_IdSpace = _refs.find(key_IdSpace);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001448 if(search_IdSpace != _refs.end())
1449 {
1450 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1451
1452 if(search_tensor_id != _refs[key_IdSpace].end())
1453 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001454 const int32_t tensor_id = search_tensor_id->second;
1455 auto search_tensor_argument = _tensor_arguments.find(tensor_id);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001456 if(search_tensor_argument != _tensor_arguments.end())
1457 {
1458 result = search_tensor_argument->second.get();
1459 }
1460 assert(result != nullptr);
1461 }
1462 }
1463
1464 return result;
1465 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001466
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001467 /**
1468 * @brief Get all the tensors declared in the IdSpace provided by the user
1469 *
1470 * @return std::vector<IGpuTensorArgument*> A vector with all the declared tensors
1471 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001472 std::vector<IGpuTensorArgument *> tensor_argument_declarations()
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001473 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001474 std::vector<IGpuTensorArgument *> args;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001475
1476 auto it = _tensor_arguments.begin();
1477
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001478 while(it != _tensor_arguments.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001479 {
1480 args.push_back(it->second.get());
1481 it++;
1482 }
1483
1484 return args;
1485 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001486
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001487 /**
1488 * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists
1489 *
1490 * @param[in] name Name of the tensor argument to search for
1491 *
1492 * @return true if the tensor argument exists
1493 * @return false if the tensor argument does not exist
1494 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001495 bool has_tensor_argument(const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001496 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001497 const int32_t key_IdSpace = _IdSpace;
1498 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001499
1500 auto search_IdSpace = _refs.find(key_IdSpace);
1501
1502 if(search_IdSpace != _refs.end())
1503 {
1504 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1505
1506 return search_tensor_id != _refs[key_IdSpace].end();
1507 }
1508 else
1509 {
1510 return false;
1511 }
1512 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001513
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001514 /**
1515 * @brief Check whether the tensor argument is in the the IdSpace provided by the user
1516 *
1517 * @param[in] name Name of the tensor argument to search for
1518 * @param[in] IdSpace The IdSpace id where to search the tensor argument
1519 *
1520 * @return true if the tile exists
1521 * @return false if the tile does not exist
1522 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001523 bool has_tensor_argument(const std::string &name, int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001524 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001525 const int32_t key_IdSpace = IdSpace;
1526 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001527
1528 auto search_IdSpace = _refs.find(key_IdSpace);
1529
1530 if(search_IdSpace != _refs.end())
1531 {
1532 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1533
1534 return search_tensor_id != _refs[key_IdSpace].end();
1535 }
1536 else
1537 {
1538 return false;
1539 }
1540 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001541
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001542private:
1543 // This method ensures that the key is unique among different components
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001544 std::string generate_tensor_name(const std::string &name, int32_t tensor_id)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001545 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001546 assert(tensor_id >= 0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001547
1548 return name + std::to_string(tensor_id);
1549 }
1550
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001551 std::map<int32_t, TensorEntry> _tensor_arguments{};
1552 std::map<int32_t, std::map<std::string, int32_t>> _refs{};
1553 int32_t _IdSpace{ -1 };
1554 GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001555};
1556
1557enum class OpType : int32_t
1558{
1559 Elementwise = 0x0000,
1560 Relational = 0x1000,
1561 Algebra = 0x2000
1562};
1563
1564inline std::string to_string(AssignmentOp op)
1565{
1566 switch(op)
1567 {
1568 case AssignmentOp::Decrement:
1569 return "-=";
1570 case AssignmentOp::Increment:
1571 return "+=";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001572 default:
1573 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001574 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001575 }
1576}
1577
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01001578inline std::string to_string(UnaryOp op)
1579{
1580 switch(op)
1581 {
1582 case UnaryOp::LogicalNot:
1583 return "!";
Adnan AlSinan66f3d382023-07-10 15:07:45 +01001584 case UnaryOp::BitwiseNot:
1585 return "~";
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01001586 default:
1587 assert(false);
1588 return "";
1589 }
1590}
1591
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001592inline std::string to_string(BinaryOp op)
1593{
1594 switch(op)
1595 {
1596 case BinaryOp::Add:
1597 return "+";
1598 case BinaryOp::Sub:
1599 return "-";
1600 case BinaryOp::Mul:
1601 return "*";
1602 case BinaryOp::Div:
1603 return "/";
1604 case BinaryOp::Mod:
1605 return "%";
1606 case BinaryOp::Equal:
1607 return "==";
1608 case BinaryOp::Less:
1609 return "<";
1610 case BinaryOp::LessEqual:
1611 return "<=";
1612 case BinaryOp::Greater:
1613 return ">";
1614 case BinaryOp::GreaterEqual:
1615 return ">=";
1616 case BinaryOp::LogicalAnd:
1617 return "&&";
1618 case BinaryOp::LogicalOr:
1619 return "||";
Adnan AlSinan66f3d382023-07-10 15:07:45 +01001620 case BinaryOp::BitwiseXOR:
1621 return "^";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001622 default:
1623 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001624 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001625 }
1626}
1627
1628inline std::string binary_op_string(BinaryOp op)
1629{
1630 switch(op)
1631 {
1632 case BinaryOp::Add:
1633 return "add";
1634 case BinaryOp::Sub:
1635 return "sub";
1636 case BinaryOp::Mul:
1637 return "mul";
1638 case BinaryOp::Div:
1639 return "div";
1640 case BinaryOp::Mod:
1641 return "mod";
1642 case BinaryOp::Equal:
1643 return "eq";
1644 case BinaryOp::Less:
1645 return "gt";
1646 case BinaryOp::LessEqual:
1647 return "gteq";
1648 case BinaryOp::Greater:
1649 return "lt";
1650 case BinaryOp::GreaterEqual:
1651 return "lte";
1652 default:
1653 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001654 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001655 }
1656}
1657
1658enum class OperandType : int32_t
1659{
1660 Unknown = 0x00000000,
1661 ScalarFp32 = 0x00001011, // Immediate scalar tile
1662 ScalarFp16 = 0x00001012, // Immediate scalar tile
1663 ScalarInt32 = 0x00001021, // Immediate scalar tile
1664 ScalarInt16 = 0x00001022, // Immediate scalar tile
1665 ScalarInt8 = 0x00001024, // Immediate scalar tile
1666 ScalarUInt32 = 0x00001031, // Immediate scalar tile
1667 ScalarUInt16 = 0x00001032, // Immediate scalar tile
1668 ScalarUInt8 = 0x00001034, // Immediate scalar tile
1669 ScalarBool = 0x00001041, // Immediate scalar tile
1670 ScalarTile = 0x00001050, // Scalar from a tile
1671 Tile = 0x00010000, // Tile
1672 TensorStride1 = 0x00100001, // Tensor component
1673 TensorStride2 = 0x00100002, // Tensor component
1674 TensorStride3 = 0x00100003, // Tensor component
1675 TensorStride4 = 0x00100004, // Tensor component
1676 TensorDim0 = 0x00100010, // Tensor component
1677 TensorDim1 = 0x00100020, // Tensor component
1678 TensorDim2 = 0x00100030, // Tensor component
1679 TensorDim3 = 0x00100040, // Tensor component
1680 TensorDim4 = 0x00100050, // Tensor component
1681 TensorC = 0x00100010, // Tensor component
1682 TensorW = 0x00100020, // Tensor component
1683 TensorH = 0x00100030, // Tensor component
1684 TensorD = 0x00100040, // Tensor component
1685 TensorN = 0x00100050, // Tensor component
1686 TensorDim1xDim2 = 0x00100100, // Tensor component
1687 TensorDim1xDim2xDim3 = 0x00100200, // Tensor component
1688 TensorWxH = 0x00100300, // Tensor component
1689 TensorWxHxD = 0x00100400, // Tensor component
1690 TensorDataOffset = 0x00100500, // Tensor component
1691};
1692
1693struct ScalarTileCoord
1694{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001695 ScalarTileCoord()
1696 {
1697 }
1698
1699 ScalarTileCoord(int32_t x0, int32_t y0)
1700 : x(x0), y(y0)
1701 {
1702 }
1703
1704 int32_t x{ -1 };
1705 int32_t y{ -1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001706};
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001707
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001708/**
1709 * @brief Operand class. This object is used to pass the operands to the operations performed by the writer.
1710 * Operand can be of three types:
1711 * -# Scalar immediate: constant expression
1712 * -# Tile: A tile
1713 * -# Tensor component: A component (scalar) of a tensor
1714 *
1715 */
1716class Operand
1717{
1718public:
1719 Operand(const std::string &val)
1720 {
1721 _str = val;
1722 _type = OperandType::Tile;
1723 }
1724
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001725 Operand(const std::string &val, const ScalarTileCoord &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001726 {
1727 _str = val;
1728 _type = OperandType::ScalarTile;
1729 _coord = coord;
1730 }
1731
1732 Operand(const std::string &val, OperandType type)
1733 {
1734 _str = val;
1735 _type = type;
1736 }
1737
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001738 Operand(const Operand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001739 {
1740 _str = t.value();
1741 _type = t.type();
1742 }
1743
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001744 Operand &operator=(const Operand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001745 {
1746 _str = t.value();
1747 _type = t.type();
1748 _coord = t.scalar_tile_coordinate();
1749 return *this;
1750 }
1751
1752 std::string value() const
1753 {
1754 return _str;
1755 }
1756
1757 OperandType type() const
1758 {
1759 return _type;
1760 }
1761
1762 ScalarTileCoord scalar_tile_coordinate() const
1763 {
1764 return _coord;
1765 }
1766
1767private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001768 std::string _str{};
1769 OperandType _type{ OperandType::Unknown };
1770 ScalarTileCoord _coord{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001771};
1772
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01001773using GpuSamplerTensorStorage = GpuTensorStorage;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001774
1775struct GpuSampler
1776{
1777 GpuSampler() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001778
1779 TensorSamplerFormat format{ TensorSamplerFormat::Unknown };
1780 GpuSamplerTensorStorage storage{ GpuSamplerTensorStorage::Unknown };
1781 TensorSamplerAddressModeX address_mode_x{ TensorSamplerAddressModeX::Unknown };
1782 TensorSamplerAddressModeY address_mode_y{ TensorSamplerAddressModeY::Unknown };
1783 TensorSamplerAddressModeZ address_mode_z{ TensorSamplerAddressModeZ::Unknown };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001784};
1785
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001786inline GpuSampler
1787create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y,
1788 int32_t step_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001789{
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001790 CKW_UNUSED(step_x, step_y, step_z);
1791
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001792 auto tensor = tensor_info_id->shape;
1793
1794 GpuSampler dst_sampler;
1795 dst_sampler.format = sampler.format;
1796 dst_sampler.storage = GpuSamplerTensorStorage::BufferUint8Ptr;
1797 dst_sampler.address_mode_x = sampler.address_mode_x;
1798 dst_sampler.address_mode_y = sampler.address_mode_y;
1799 dst_sampler.address_mode_z = sampler.address_mode_z;
1800
1801 int32_t dim_x = 0;
1802 int32_t dim_y = 0;
1803 int32_t dim_z = 0;
1804
1805 switch(sampler.format)
1806 {
1807 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001808 dim_x = tensor[0];
1809 dim_y = tensor[1];
1810 dim_z = tensor[2];
1811 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001812 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001813 dim_x = tensor[0];
1814 dim_y = tensor[1] * tensor[2];
1815 dim_z = 1;
1816 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001817 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001818 std::cout << "Unsupported tensor format" << std::endl;
1819 assert(false);
1820 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001821 }
1822
1823 if(dim_x == 1)
1824 {
1825 assert(step_x == 1);
1826 dst_sampler.address_mode_x = TensorSamplerAddressModeX::None;
1827 }
1828
1829 if(dim_y == 1)
1830 {
1831 assert(step_y == 1);
1832 dst_sampler.address_mode_y = TensorSamplerAddressModeY::None;
1833 }
1834
1835 if(dim_z == 1)
1836 {
1837 assert(step_z == 1);
1838 dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None;
1839 }
1840
1841 return dst_sampler;
1842}
1843
1844class GpuOutputSampler
1845{
1846public:
1847 GpuOutputSampler() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001848
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001849 /**
1850 * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once
1851 * by the root component. Once initialized, all simpler components will need to used this sampler
1852 * or a broadcasted version of it
1853 *
1854 * @param[in] sampler GpuSampler
1855 * @param[in] step_x Increment step in the X direction. Not necessarily it is the same of n0 of tile!
1856 * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile!
1857 * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile!
1858 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001859 void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage,
1860 TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001861 {
1862 assert(_is_initialized == false);
1863
1864 _step_x = step_x;
1865 _step_y = step_y;
1866 _step_z = step_z;
1867 _tensor_info_id = tensor_info_id;
1868 _sampler = create_sampler(tensor_storage, tensor_format);
1869 _is_initialized = true;
1870 };
1871
1872 GpuSampler sampler() const
1873 {
1874 return _sampler;
1875 };
1876
1877 int32_t step_x() const
1878 {
1879 return _step_x;
1880 };
1881
1882 int32_t step_y() const
1883 {
1884 return _step_y;
1885 };
1886
1887 int32_t step_z() const
1888 {
1889 return _step_z;
1890 };
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001891
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001892private:
1893 GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format)
1894 {
1895 // Output can only be in output mode
1896 assert(tensor_storage != GpuSamplerTensorStorage::Image2dReadOnly);
1897 assert(tensor_storage != GpuSamplerTensorStorage::Image3dReadOnly);
1898
1899 auto tensor = _tensor_info_id->shape;
1900
1901 GpuSampler sampler;
1902 sampler.format = tensor_format;
1903 sampler.storage = tensor_storage;
1904 sampler.address_mode_x = TensorSamplerAddressModeX::None;
1905 sampler.address_mode_y = TensorSamplerAddressModeY::None;
1906 sampler.address_mode_z = TensorSamplerAddressModeZ::None;
1907
1908 // In the case of texture, we do not need any special checks at the border
1909 if(tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr)
1910 {
1911 int32_t dim_x = 0;
1912 int32_t dim_y = 0;
1913 int32_t dim_z = 0;
1914
1915 switch(tensor_format)
1916 {
1917 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001918 dim_x = tensor[0];
1919 dim_y = tensor[1];
1920 dim_z = tensor[2];
1921 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001922 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001923 dim_x = tensor[0];
1924 dim_y = tensor[1] * tensor[2];
1925 dim_z = 1;
1926 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001927 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001928 std::cout << "Unsupported tensor format" << std::endl;
1929 assert(false);
1930 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001931 }
1932
1933 if((dim_x % _step_x) != 0 && dim_x != 1)
1934 {
1935 sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin;
1936 }
1937
1938 if((dim_y % _step_y) != 0 && dim_y != 1)
1939 {
1940 sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly;
1941 }
1942
1943 if((dim_z % _step_z) != 0 && dim_z != 1)
1944 {
1945 sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly;
1946 }
1947 }
1948
1949 return sampler;
1950 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001951
1952 GpuSampler _sampler{}; // GpuSampler
1953 int32_t _step_x{ 1 };
1954 int32_t _step_y{ 1 };
1955 int32_t _step_z{ 1 };
1956 const TensorInfo *_tensor_info_id{ nullptr };
1957 bool _is_initialized{ false };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001958};
1959
1960/**
1961 * @brief Tensor operand class. This object is used to pass the operands as tensor to the operations performed by the writer.
1962 */
1963class TensorOperand
1964{
1965public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001966 TensorOperand(const std::string &val, GpuSampler sampler)
1967 : _str(val), _sampler(sampler)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001968 {
1969 }
1970
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001971 TensorOperand &operator=(const TensorOperand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001972 {
1973 _str = t.value();
1974 _sampler = t.sampler();
1975 return *this;
1976 }
1977
1978 std::string value() const
1979 {
1980 return _str;
1981 }
1982
1983 GpuSampler sampler() const
1984 {
1985 return _sampler;
1986 }
1987
1988private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001989 std::string _str{};
1990 GpuSampler _sampler{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001991};
1992
1993/**
1994 * @brief Data structure that contains all the necessary information to write the Gpu kernel with the Gpu kernel Writer
1995 * This data structure must be initialized before being passed to the Gpu Kernel Writer
1996 *
1997 */
1998class GpuKernelWriterDataHolder
1999{
2000public:
2001 /**
2002 * @brief Construct a new Gpu Kernel Data object. In this phase, we should also store
2003 * the GPU target and target specific capabilities (extensions). For now, we just initialize the
2004 * programming language
2005 *
2006 * @param[in] language Gpu programming language to use
2007 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002008 GpuKernelWriterDataHolder(GpuTargetLanguage language)
2009 : tiles(language), arguments(language), code(""), _language(language)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002010 {
2011 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002012
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002013 /**
2014 * @brief Get the Gpu programming language used
2015 *
2016 * @return GpuTargetLanguage the Gpu programming language
2017 */
2018 GpuTargetLanguage programming_language() const
2019 {
2020 return _language;
2021 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002022
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002023 /**
2024 * @brief @ref GpuTileRegistry
2025 *
2026 */
2027 GpuTileRegistry tiles{};
2028 /**
2029 * @brief @ref GpuTensorArgumentRegistry
2030 *
2031 */
2032 GpuTensorArgumentRegistry arguments{};
2033 /**
2034 * @brief @ref GpuOutputSampler.
2035 *
2036 */
2037 GpuOutputSampler output_sampler{};
2038 /**
2039 * @brief Source code
2040 *
2041 */
2042 std::string code{};
2043
2044 // GpuExtensionRegistry extensions{};
2045private:
2046 GpuTargetLanguage _language;
2047};
2048
2049struct LWS
2050{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002051 int32_t x{ 1 };
2052 int32_t y{ 1 };
2053 int32_t z{ 1 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002054};
2055
2056/**
2057 * @brief Utility class used to get the tile from the operand. If the operand is not a tile, @ref OperandUnpacker
2058 * declare an anonymous tile in the tile registry.
2059 */
2060class OperandUnpacker
2061{
2062public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002063 OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments)
2064 : _tiles(tiles), _arguments(arguments)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002065 {
2066 // Increase the level of the stack to allocate possible temporary tiles
2067 _tiles.increment_registry_level();
2068 };
2069
2070 ~OperandUnpacker()
2071 {
2072 // Decrease the level of the stack to deallocate any temporary tiles
2073 _tiles.decrement_registry_level();
2074 }
2075
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002076 IVectorTile *unpack(const Operand &src)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002077 {
2078 // Get the tile
2079 if(src.type() == OperandType::Tile)
2080 {
2081 assert(_tiles.has_tile(src.value()));
2082 return _tiles[src.value()];
2083 }
2084 // Create an anonymous tile with a constant
2085 else if(static_cast<int32_t>(src.type()) & 0x00001000)
2086 {
2087 if(src.type() == OperandType::ScalarTile)
2088 {
2089 ScalarTileCoord coord = src.scalar_tile_coordinate();
2090 assert(_tiles.has_tile(src.value()));
2091 assert(coord.x >= 0);
2092 assert(coord.y >= 0);
2093 auto val = _tiles[src.value()]->scalar(coord.x, coord.y);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002094 return _tiles.insert({ { { val.str } } }, val.type.dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002095 }
2096 else
2097 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002098 return _tiles.insert({ { { src.value() } } }, to_tile_data_type(src.type()));
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002099 }
2100 }
2101 // Create an anonymous tile with the tensor component
2102 else
2103 {
2104 assert(_arguments.has_tensor_argument(src.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002105 auto x = _arguments[src.value()];
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002106 const std::string val = x->component(to_tensor_component(src.type()));
2107 const DataType dt = x->component_data_type();
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002108 return _tiles.insert({ { { val } } }, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002109 }
2110 }
2111
2112private:
2113 DataType to_tile_data_type(OperandType x)
2114 {
2115 return static_cast<DataType>(static_cast<int32_t>(x) & 0x00ff);
2116 }
2117
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002118 TensorComponentType to_tensor_component(OperandType x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002119 {
2120 switch(x)
2121 {
2122 case OperandType::TensorDim0:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002123 return TensorComponentType::Dim0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002124 case OperandType::TensorDim1:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002125 return TensorComponentType::Dim1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002126 case OperandType::TensorDim2:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002127 return TensorComponentType::Dim2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002128 case OperandType::TensorDim3:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002129 return TensorComponentType::Dim3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002130 case OperandType::TensorDim4:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002131 return TensorComponentType::Dim4;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002132 case OperandType::TensorStride1:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002133 return TensorComponentType::Stride1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002134 case OperandType::TensorStride2:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002135 return TensorComponentType::Stride2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002136 case OperandType::TensorStride3:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002137 return TensorComponentType::Stride3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002138 case OperandType::TensorStride4:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002139 return TensorComponentType::Stride4;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002140 case OperandType::TensorDim1xDim2:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002141 return TensorComponentType::Dim1xDim2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002142 case OperandType::TensorDim1xDim2xDim3:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002143 return TensorComponentType::Dim1xDim2xDim3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002144 case OperandType::TensorDataOffset:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002145 return TensorComponentType::OffsetFirstElement;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002146 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002147 assert(false);
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002148 return TensorComponentType::Unknown;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002149 }
2150 }
2151
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002152 GpuTileRegistry &_tiles;
2153 GpuTensorArgumentRegistry &_arguments;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002154};
2155
2156/**
2157 * @brief Utility class used to get the tensor argument from the operand. If the operand is not a tile, @ref OperandUnpacker
2158 * declare an anonymous tile in the tile registry.
2159 * Tensor dimension reduction aims for reducing the tensor data dimension while keeping data's tensor structure.
2160 */
2161class TensorOperandUnpacker
2162{
2163public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002164 TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments)
2165 : _arguments(arguments){};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002166
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002167 IGpuTensorArgument *unpack(const TensorOperand &src)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002168 {
2169 assert(_arguments.has_tensor_argument(src.value()));
2170 return _arguments[src.value()];
2171 }
2172
2173private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002174 GpuTensorArgumentRegistry &_arguments;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002175};
2176
2177/**
2178 * @brief The GpuKernel will be used in three occasions (stages):
2179 * #- Compilation stage
2180 * #- Tuning stage
2181 * #- Dispatch stage
2182 */
2183struct GpuKernel
2184{
2185 // Compilation stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002186 std::string code{}; // Source code, required for the compilation stage
2187 std::vector<GpuExtensions> list_extensions{}; // Extensions, required for the compilation stage
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002188 // Tuning stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002189 std::string config_id{}; // Unique id, required for the tuning stage
2190 std::vector<LWS> list_lws{}; // LWS to test, required for the tuning stage
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002191 // Dispatch stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002192 GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage
2193 std::vector<std::pair<int32_t, GpuTensorStorage>> list_tensor_storages; // List of tensor storages, required for the dispatch stage
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002194 std::vector<std::pair<int32_t, TensorComponentType>> list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002195};
2196
SiCong Li16b37522023-07-18 17:56:49 +01002197// Generate all extension pragmas (hardcoded for now)
2198inline std::string generate_extensions()
2199{
2200 std::string ext = R"(
2201#if defined(cl_khr_fp16)
2202#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2203#endif // defined(cl_khr_fp16)
2204
2205#if defined(cl_arm_integer_dot_product_int8)
2206#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
2207#endif // defined(cl_arm_integer_dot_product_int8)
2208
2209#if defined(cl_arm_integer_dot_product_accumulate_int8)
2210#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
2211#endif // defined(cl_arm_integer_dot_product_accumulate_int8)
2212
2213#if defined(cl_arm_printf)
2214#pragma OPENCL EXTENSION cl_arm_printf : enable
2215#endif // defined(cl_arm_printf);
2216)";
2217 return ext;
2218}
2219
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002220// This function should produce an object with the source
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002221inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002222{
2223 std::string code;
SiCong Li16b37522023-07-18 17:56:49 +01002224 code += generate_extensions();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002225 code += "__kernel void ";
2226 code += name;
2227 code += "(\n";
2228
2229 auto IdSpaces = in.arguments.IdSpace_declarations();
2230
2231 std::vector<std::string> arg_str;
2232
2233 auto tensor_args = in.arguments.tensor_argument_declarations();
2234
2235 for(auto &i : tensor_args)
2236 {
2237 // For each tensor used, get the storage and tensor components
2238 auto storages = i->storage_declarations();
2239 auto components = i->component_declarations();
2240
2241 for(auto &y : storages)
2242 {
2243 std::string str;
2244 str += i->storage_type_declaration(y);
2245 str += " ";
2246 str += i->storage(y);
2247 arg_str.push_back(str);
2248 }
2249
2250 for(auto &y : components)
2251 {
2252 std::string str;
2253 str += i->component_type_declaration();
2254 str += " ";
2255 str += i->component(y);
2256 arg_str.push_back(str);
2257 }
2258 }
2259
2260 for(size_t i = 0; i < arg_str.size(); ++i)
2261 {
2262 code += arg_str[i];
2263 if(i + 1 < arg_str.size())
2264 {
2265 code += ",\n";
2266 }
2267 }
2268
2269 code += ")\n";
2270 code += "{\n";
2271 code += in.code;
2272 code += "}\n";
2273
2274 return code;
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002275}
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002276
2277/**
2278 * @brief This class is responsible to map a N-Tensor to a 3d tensor. The mapper needs the GpuSampler to know
2279 * how to reduce the dimensionality of a tensor
2280 *
2281 */
2282class GpuTensor3dMapper
2283{
2284public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002285 GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler)
2286 : _sampler(sampler), _tensor(tensor){};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002287
2288 std::string tensor_component_x() const
2289 {
2290 const auto format = _sampler.format;
2291 switch(format)
2292 {
2293 case TensorSamplerFormat::C_WH_1:
2294 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002295 return _tensor->component(TensorComponentType::Dim0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002296 default:
2297 std::cout << "Unsupported tensor format" << std::endl;
2298 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002299 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002300 }
2301 }
2302
2303 std::string tensor_component_y() const
2304 {
2305 const auto format = _sampler.format;
2306 switch(format)
2307 {
2308 case TensorSamplerFormat::C_WH_1:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002309 return _tensor->component(TensorComponentType::Dim1xDim2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002310 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002311 return _tensor->component(TensorComponentType::Dim1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002312 default:
2313 std::cout << "Unsupported tensor format" << std::endl;
2314 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002315 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002316 }
2317 }
2318
2319 std::string tensor_component_z() const
2320 {
2321 const auto format = _sampler.format;
2322 switch(format)
2323 {
2324 case TensorSamplerFormat::C_WH_1:
2325 return "1";
2326 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002327 return _tensor->component(TensorComponentType::Dim2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002328 default:
2329 std::cout << "Unsupported tensor format" << std::endl;
2330 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002331 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002332 }
2333 }
2334
2335 std::string tensor_component_stride_y() const
2336 {
2337 const auto format = _sampler.format;
2338 switch(format)
2339 {
2340 case TensorSamplerFormat::C_WH_1:
2341 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002342 return _tensor->component(TensorComponentType::Stride1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002343 default:
2344 std::cout << "Unsupported tensor format" << std::endl;
2345 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002346 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002347 }
2348 }
2349
2350 std::string tensor_component_stride_z() const
2351 {
2352 const auto format = _sampler.format;
2353 switch(format)
2354 {
2355 case TensorSamplerFormat::C_WH_1:
2356 return "0";
2357 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002358 return _tensor->component(TensorComponentType::Stride2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002359 default:
2360 std::cout << "Unsupported tensor format" << std::endl;
2361 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002362 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002363 }
2364 }
2365
2366 std::string tensor_component_stride_batch() const
2367 {
2368 const auto format = _sampler.format;
2369 switch(format)
2370 {
2371 case TensorSamplerFormat::C_WH_1:
2372 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002373 return _tensor->component(TensorComponentType::Stride3);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002374 default:
2375 std::cout << "Unsupported tensor format" << std::endl;
2376 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002377 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002378 }
2379 }
2380
2381 bool is_one_component_x() const
2382 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002383 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002384 const auto format = _sampler.format;
2385 switch(format)
2386 {
2387 case TensorSamplerFormat::C_WH_1:
2388 case TensorSamplerFormat::C_W_H:
2389 return t.shape[0] == 1;
2390 default:
2391 std::cout << "Unsupported tensor format" << std::endl;
2392 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002393 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002394 }
2395 }
2396
2397 bool is_one_component_y() const
2398 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002399 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002400 const auto format = _sampler.format;
2401 switch(format)
2402 {
2403 case TensorSamplerFormat::C_WH_1:
2404 return (t.shape[1] * t.shape[2]) == 1;
2405 case TensorSamplerFormat::C_W_H:
2406 return t.shape[1] == 1;
2407 default:
2408 std::cout << "Unsupported tensor format" << std::endl;
2409 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002410 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002411 }
2412 }
2413
2414 bool is_one_component_z() const
2415 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002416 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002417 const auto format = _sampler.format;
2418 switch(format)
2419 {
2420 case TensorSamplerFormat::C_WH_1:
2421 return true;
2422 case TensorSamplerFormat::C_W_H:
2423 return t.shape[2] == 1;
2424 default:
2425 std::cout << "Unsupported tensor format" << std::endl;
2426 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002427 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002428 }
2429 }
2430
2431 bool is_one_component_batch() const
2432 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002433 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002434 const auto format = _sampler.format;
2435 switch(format)
2436 {
2437 case TensorSamplerFormat::C_WH_1:
2438 case TensorSamplerFormat::C_W_H:
2439 return t.shape[3] == 1;
2440 default:
2441 std::cout << "Unsupported tensor format" << std::endl;
2442 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002443 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002444 }
2445 }
2446
2447 GpuSampler gpu_sampler() const
2448 {
2449 return _sampler;
2450 }
2451
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002452 IGpuTensorArgument *tensor_argument() const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002453 {
2454 return _tensor;
2455 }
2456
2457private:
2458 GpuSampler _sampler;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002459 IGpuTensorArgument *_tensor;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002460};
2461
2462struct GpuKernelWriterAttribute
2463{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002464 bool return_tensor_component_by_value{ false };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002465};
2466
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002467enum class RoundingMode
2468{
2469 None,
2470 Rte,
2471 Rtz,
2472 Rtp,
2473 Rtn
2474};
2475
2476// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl05.html
2477class IGpuKernelWriter
2478{
2479public:
2480 virtual ~IGpuKernelWriter() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002481
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002482 virtual void set_IdSpace(int32_t id) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002483
2484 virtual void import_tile(const std::string &dst, const IVectorTile *src) = 0;
2485
2486 virtual void declare_argument(const std::string &name, const TensorInfo &tensor) = 0;
2487
2488 virtual void declare_tile(const std::string &name, const TileInfo &info) = 0;
2489
2490 virtual void declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) = 0;
2491
2492 virtual void write_text(const std::string &x) = 0;
2493
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002494 virtual void compound_statement_begin() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002495
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002496 virtual void compound_statement_end() = 0;
2497
2498 // Operations
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002499 virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002500
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002501 virtual void op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002502
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002503 virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002504
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002505 virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002506
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002507 virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002508
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002509 virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002510
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002511 virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002512
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002513 virtual void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002514
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002515 virtual void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002516
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002517 virtual void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name) = 0;
2518
2519 virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
2520
2521 virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
2522
2523 virtual void op_else_header() = 0;
2524
Nikolaj Jensenfab6c212023-06-27 14:13:24 +01002525 virtual void op_for_loop_header(const Operand &var_name, BinaryOp cond_op, const Operand &cond_value, const Operand &update_var, AssignmentOp update_op, const Operand &update_value) = 0;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002526
2527 virtual void op_load_indirect(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y_indirect, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002528
2529 virtual void op_load_immediate(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32), const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0;
2530
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002531 virtual void op_store_immediate(const TensorOperand &tensor, const Operand &src, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002532
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002533 virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002534
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002535 virtual void op_return() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002536
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002537 // Utils
2538 // It is the process of converting
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002539 virtual void util_get_indirect_buffer(const Operand &dst, const TensorOperand &tensor, const Operand &x,
2540 const Operand &y, const Operand &x_off, const Operand &y_off) = 0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002541};
2542
2543enum class GpuLoadStoreType
2544{
2545 Load = 1,
2546 Store = 2
2547};
2548
2549class IGpuLoadStoreHelperWriter
2550{
2551public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002552 IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type)
2553 : _writer(x), _mapper(mapper), _type(type)
2554 {
2555 }
2556
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002557 IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002558
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002559 IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002560
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002561 virtual ~IGpuLoadStoreHelperWriter() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002562
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002563 virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002564
2565 virtual void write(const std::pair<int32_t, std::string> &y) = 0;
2566
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002567 virtual void finalize() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002568
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002569protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002570 IGpuKernelWriter *_writer;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002571 GpuTensor3dMapper _mapper;
2572 GpuLoadStoreType _type;
2573};
2574
2575class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter
2576{
2577public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002578 ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
2579 : IGpuLoadStoreHelperWriter(x, mapper, type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002580 {
2581 }
2582
2583 ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002584
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002585 ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default;
2586
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002587 static bool
2588 validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002589 {
2590 CKW_UNUSED(x, type, dst);
2591
2592 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr)
2593 {
2594 return false;
2595 }
2596 return true;
2597 }
2598
2599 void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
2600 {
2601 assert(validate(_writer, _mapper, _type, dst));
2602
2603 _dst = dst;
2604 _ls_width_full = dst->format().w;
2605
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002606 _coord_x = x->scalar(0, 0).str;
2607 _coord_z = z->scalar(0, 0).str;
2608 _coord_b = b->scalar(0, 0).str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002609 _coord_orig_z = _coord_z;
2610
2611 out_of_bound_initialize_x(_coord_x);
2612 out_of_bound_initialize_z(_coord_z);
2613
2614 /*
2615 meaning of else:
2616 - x: partial load/store
2617 - y: no load/store operation
2618 - z: no load/store operation
2619 if(x)
2620 {
2621 if(z)
2622 {
2623 if(y)
2624 {
2625 // full load/store width
2626 }
2627 else
2628 {
2629 // no load/store
2630 }
2631 }
2632 else
2633 {
2634 // no load/store
2635 }
2636 }
2637 else
2638 {
2639 if(z)
2640 {
2641 if(y)
2642 {
2643 // partial load/store width
2644 }
2645 else
2646 {
2647 // no load/store
2648 }
2649 }
2650 else
2651 {
2652 // no load/store
2653 }
2654 }
2655 */
2656 }
2657
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002658 void write(const std::pair<int32_t, std::string> &y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002659 {
2660 int32_t idx_y = y.first;
2661 std::string coord_y = y.second;
2662
2663 // The only check required is on Y.
2664 out_of_bound_initialize_y(coord_y);
2665
2666 const std::string dst = _dst->vector(idx_y).str;
2667 const std::string address = to_ls_buffer_address(_coord_x, coord_y, _coord_z, _coord_b);
2668 const std::string ls_buf = to_ls_buffer(_type, _ls_width_full, dst, address);
2669
2670 _writer->write_text(ls_buf);
2671 _writer->write_text(";\n");
2672
2673 out_of_bound_finalize_y(dst);
2674
2675 // The left over load/store will be written in the finalize stage
2676 if(_ls_width_part.size() != 0)
2677 {
2678 int32_t w = 0;
2679 for(auto &p : _ls_width_part)
2680 {
2681 const std::string dst0 = _dst->vector(w, p, idx_y).str;
2682 const std::string coord_x = _coord_x + " + " + std::to_string(w);
2683 const std::string address = to_ls_buffer_address(coord_x, coord_y, _coord_z, _coord_b);
2684 const std::string ls_buf0 = to_ls_buffer(_type, p, dst0, address);
2685 _leftovers_x.push_back(std::make_pair(std::make_pair(dst0, coord_y), ls_buf0));
2686
2687 w += p;
2688 }
2689 }
2690 }
2691
2692 void finalize() override
2693 {
2694 out_of_bound_finalize_z();
2695 out_of_bound_finalize_x();
2696 }
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002697
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002698private:
2699 IVectorTile *_dst{ nullptr };
2700 int32_t _ls_width_full{ 0 };
2701 std::vector<int32_t> _ls_width_part{};
2702 std::vector<std::pair<std::pair<std::string, std::string>, std::string>> _leftovers_x{};
2703 std::string _coord_x{};
2704 std::string _coord_z{};
2705 std::string _coord_orig_z{};
2706 std::string _coord_b{};
2707
2708 void out_of_bound_initialize_x(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002709 {
2710 if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
2711 {
2712 auto tensor_format = _mapper.tensor_argument()->format();
2713 auto shape = tensor_format.shape;
2714
2715 _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full);
2716 if(_ls_width_part.size() != 0)
2717 {
2718 _writer->write_text("if(" + coord + " > 0)\n");
2719 _writer->compound_statement_begin();
2720 }
2721 }
2722 };
2723
2724 void out_of_bound_finalize_x()
2725 {
2726 if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
2727 {
2728 if(_ls_width_part.size() != 0)
2729 {
2730 _writer->compound_statement_end();
2731 _writer->write_text("else\n");
2732 _writer->compound_statement_begin();
2733
2734 out_of_bound_initialize_z(_coord_orig_z);
2735 for(auto &i : _leftovers_x)
2736 {
2737 out_of_bound_initialize_y(i.first.second);
2738 _writer->write_text(i.second);
2739 _writer->write_text(";\n");
2740 out_of_bound_finalize_y(i.first.first);
2741 }
2742 out_of_bound_finalize_z();
2743 _writer->compound_statement_end();
2744 }
2745 }
2746 };
2747
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002748 void out_of_bound_initialize_y(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002749 {
2750 std::string max = "";
2751
2752 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
2753
2754 switch(address_mode_y)
2755 {
2756 case TensorSamplerAddressModeY::Skip:
2757 case TensorSamplerAddressModeY::ClampToBorder:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002758 // NOTE: This line should not be moved outside of the switch statement.
2759 // The reason for that is because when we query the component, the component is marked as used
2760 // and added to the list of arguments of the kernel. Since, not in all cases this component is required,
2761 // we should request the component only when used
2762 max = _mapper.tensor_component_y();
2763 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
2764 _writer->compound_statement_begin();
2765 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002766 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
2767 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002768 _writer->write_text("if(" + coord + " >= 0)\n");
2769 _writer->compound_statement_begin();
2770 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002771 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
2772 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002773 max = _mapper.tensor_component_y();
2774 _writer->write_text("if(" + coord + " < " + max + ")\n");
2775 _writer->compound_statement_begin();
2776 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002777 case TensorSamplerAddressModeY::ClampToNearest:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002778 max = _mapper.tensor_component_y();
2779 coord = "clamp(" + coord + ", 0, " + max + " - 1)";
2780 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002781 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002782 max = _mapper.tensor_component_y();
2783 coord = "min(" + coord + ", " + max + " - 1)";
2784 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002785 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002786 coord = "max(" + coord + ", 0)";
2787 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002788 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002789 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002790 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002791 std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
2792 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002793 }
2794 };
2795
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002796 void out_of_bound_finalize_y(const std::string &dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002797 {
2798 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
2799
2800 switch(address_mode_y)
2801 {
2802 case TensorSamplerAddressModeY::ClampToBorder:
2803 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
2804 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
2805 case TensorSamplerAddressModeY::Skip:
2806 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
2807 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002808 _writer->compound_statement_end();
2809 break;
SiCong Li16b37522023-07-18 17:56:49 +01002810 case TensorSamplerAddressModeY::None:
2811 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002812
2813 default:
2814 assert(false);
2815 }
2816
2817 switch(address_mode_y)
2818 {
2819 case TensorSamplerAddressModeY::ClampToBorder:
2820 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
2821 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002822 _writer->write_text("else\n");
2823 _writer->compound_statement_begin();
2824 _writer->write_text(dst);
2825 _writer->write_text(" = 0.0f;\n");
2826 _writer->compound_statement_end();
2827 break;
SiCong Li16b37522023-07-18 17:56:49 +01002828 case TensorSamplerAddressModeY::None:
2829 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002830
2831 default:
2832 assert(false);
2833 }
2834 };
2835
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002836 void out_of_bound_initialize_z(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002837 {
2838 std::string max = "";
2839
2840 const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
2841
2842 switch(address_mode_z)
2843 {
2844 case TensorSamplerAddressModeZ::Skip:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002845 max = _mapper.tensor_component_z();
2846 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
2847 _writer->compound_statement_begin();
2848 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002849 case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002850 _writer->write_text("if(" + coord + " >= 0)\n");
2851 _writer->compound_statement_begin();
2852 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002853 case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002854 max = _mapper.tensor_component_z();
2855 _writer->write_text("if(" + coord + " < " + max + ")\n");
2856 _writer->compound_statement_begin();
2857 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002858 case TensorSamplerAddressModeZ::ClampToNearest:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002859 max = _mapper.tensor_component_z();
2860 coord = "clamp(" + coord + ", 0, " + max + " - 1)";
2861 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002862 case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002863 max = _mapper.tensor_component_z();
2864 coord = "min(" + coord + ", " + max + " - 1)";
2865 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002866 case TensorSamplerAddressModeZ::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002867 coord = "max(" + coord + ", 0)";
2868 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002869 case TensorSamplerAddressModeZ::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002870 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002871 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002872 std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
2873 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002874 }
2875 };
2876
2877 void out_of_bound_finalize_z()
2878 {
2879 const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
2880
2881 switch(address_mode_z)
2882 {
2883 case TensorSamplerAddressModeZ::Skip:
2884 case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
2885 case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002886 _writer->compound_statement_end();
2887 break;
SiCong Li16b37522023-07-18 17:56:49 +01002888 case TensorSamplerAddressModeZ::None:
2889 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002890
2891 default:
2892 assert(false);
2893 }
2894 };
2895
2896 std::vector<int32_t> decompose_leftover_ls_vector_width(int32_t ls_leftover_vector_width) const
2897 {
2898 std::vector<int32_t> x;
2899
2900 switch(ls_leftover_vector_width)
2901 {
2902 case 0:
2903 break;
2904 case 1:
2905 case 2:
2906 case 3:
2907 case 4:
2908 case 8:
2909 case 16:
2910 x.push_back(ls_leftover_vector_width);
2911 break;
2912 case 5:
2913 x.push_back(4);
2914 x.push_back(1);
2915 break;
2916 case 6:
2917 x.push_back(4);
2918 x.push_back(2);
2919 break;
2920 case 7:
2921 x.push_back(4);
2922 x.push_back(3);
2923 break;
2924 case 9:
2925 x.push_back(8);
2926 x.push_back(1);
2927 break;
2928 case 10:
2929 x.push_back(8);
2930 x.push_back(2);
2931 break;
2932 case 11:
2933 x.push_back(8);
2934 x.push_back(3);
2935 break;
2936 case 12:
2937 x.push_back(8);
2938 x.push_back(4);
2939 break;
2940 case 13:
2941 x.push_back(8);
2942 x.push_back(4);
2943 x.push_back(1);
2944 break;
2945 case 14:
2946 x.push_back(8);
2947 x.push_back(4);
2948 x.push_back(2);
2949 break;
2950 case 15:
2951 x.push_back(8);
2952 x.push_back(4);
2953 x.push_back(3);
2954 break;
2955
2956 default:
2957 assert(false);
2958 }
2959 return x;
2960 }
2961
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002962 std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data,
2963 const std::string &address)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002964 {
2965 switch(type)
2966 {
2967 case GpuLoadStoreType::Load:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002968 if(vector_width != 1)
2969 {
2970 return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
2971 }
2972 else
2973 {
2974 return data + " = *(" + address + ")";
2975 }
2976 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002977 case GpuLoadStoreType::Store:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002978 if(vector_width != 1)
2979 {
2980 return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
2981 }
2982 else
2983 {
2984 return "*(" + address + ") = " + data;
2985 }
2986 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002987 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002988 std::cout << "Unsupported GpuLoadStoreType" << std::endl;
2989 assert(false);
2990 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002991 }
2992 }
2993
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002994 std::string to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z,
2995 const std::string &b) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002996 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002997 auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002998 assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002999 const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage);
3000 const std::string dst_type = get_cl_data_type(_dst->format().dt, 1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003001
3002 std::string address;
3003 address += "(__global ";
3004 address += dst_type;
3005 address += "*)(";
3006 address += ptr_buf;
3007 if(x != "0" && (_mapper.is_one_component_x() != true))
3008 {
3009 address += " + (";
3010 address += x + ") * sizeof(" + dst_type + ")";
3011 }
Jakub Sujake1c96e72023-07-31 13:36:58 +01003012 if(y != "0")
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003013 {
3014 const std::string stride_y = _mapper.tensor_component_stride_y();
3015 address += " + (";
3016 address += y + ")";
3017 address += " * ";
3018 address += stride_y;
3019 }
3020 if(z != "0" && (_mapper.is_one_component_z() != true))
3021 {
3022 const std::string stride_z = _mapper.tensor_component_stride_z();
3023 address += " + (";
3024 address += z + ")";
3025 address += " * ";
3026 address += stride_z;
3027 }
3028 if(b != "0" && (_mapper.is_one_component_batch() != true))
3029 {
3030 const std::string stride_b = _mapper.tensor_component_stride_batch();
3031 address += " + (";
3032 address += b + ")";
3033 address += " * ";
3034 address += stride_b;
3035 }
3036 address += ")";
3037 return address;
3038 }
3039};
3040
3041class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter
3042{
3043public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003044 static bool
3045 validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003046 {
3047 CKW_UNUSED(x);
3048
3049 if(dst->format().w != 4)
3050 {
3051 return false;
3052 }
3053 if(mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None)
3054 {
3055 return false;
3056 }
3057 if(mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None)
3058 {
3059 return false;
3060 }
3061 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load)
3062 {
3063 return false;
3064 }
3065 if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && type == GpuLoadStoreType::Store)
3066 {
3067 return false;
3068 }
3069 if((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16))
3070 {
3071 return false;
3072 }
3073 return true;
3074 /*
3075 - x: Only GpuSamplerAddressModeX::None is supported and vector length = 4
3076 - z: Only GpuSamplerAddressModeZ::None is supported
3077 */
3078 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003079
3080 ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
3081 : IGpuLoadStoreHelperWriter(x, mapper, type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003082 {
3083 }
3084
3085 ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003086
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003087 ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default;
3088
3089 void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
3090 {
3091 assert(validate(_writer, _mapper, _type, dst));
3092
3093 _dst = dst;
3094 _ls_width_full = dst->format().w;
3095 _coord_x = x->scalar(0, 0).str;
3096 _coord_z = z->scalar(0, 0).str;
3097 _coord_b = b->scalar(0, 0).str;
3098
3099 /*
3100 if(y)
3101 {
3102 // full load/store width
3103 }
3104 else
3105 {
3106 // no load/store
3107 }
3108 */
3109 }
3110
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003111 void write(const std::pair<int32_t, std::string> &y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003112 {
3113 int32_t idx_y = y.first;
3114 std::string coord_y = y.second;
3115
3116 // The only check required is on Y.
3117 out_of_bound_initialize_y(coord_y);
3118
3119 const std::string dst = _dst->vector(idx_y).str;
3120 const std::string sampler = to_ls_image2d_sampler();
3121 const std::string coord = to_ls_image2d_coord(_coord_x, coord_y, _coord_z, _coord_b);
3122 const std::string ls_buf = to_ls_image2d(_type, _ls_width_full, dst, sampler, coord);
3123
3124 _writer->write_text(ls_buf);
3125 _writer->write_text(";\n");
3126
3127 out_of_bound_finalize_y(dst);
3128 }
3129
3130 void finalize() override
3131 {
3132 }
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003133
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003134private:
3135 IVectorTile *_dst{ nullptr };
3136 int32_t _ls_width_full{ 0 };
3137 std::string _coord_x{};
3138 std::string _coord_z{};
3139 std::string _coord_b{};
3140
3141 void out_of_bound_initialize_y(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003142 {
3143 std::string max = "";
3144
3145 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3146
3147 switch(address_mode_y)
3148 {
3149 case TensorSamplerAddressModeY::Skip:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003150 max = _mapper.tensor_component_y();
3151 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
3152 _writer->compound_statement_begin();
3153 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003154 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003155 _writer->write_text("if(" + coord + " >= 0)\n");
3156 _writer->compound_statement_begin();
3157 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003158 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003159 max = _mapper.tensor_component_y();
3160 _writer->write_text("if(" + coord + " < " + max + ")\n");
3161 _writer->compound_statement_begin();
3162 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003163 case TensorSamplerAddressModeY::ClampToBorder:
3164 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
3165 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
3166 case TensorSamplerAddressModeY::ClampToNearest:
3167 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
3168 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
3169 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003170 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003171 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003172 std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl;
3173 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003174 }
3175 };
3176
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003177 void out_of_bound_finalize_y(const std::string &dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003178 {
3179 CKW_UNUSED(dst);
3180
3181 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3182
3183 switch(address_mode_y)
3184 {
3185 case TensorSamplerAddressModeY::Skip:
3186 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
3187 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003188 _writer->compound_statement_end();
3189 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003190
3191 default:
3192 assert(false);
3193 }
3194 };
3195
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003196 std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string &data,
3197 const std::string &sampler, const std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003198 {
3199 CKW_UNUSED(vector_width);
3200
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003201 auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
3202 const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003203 const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003204
3205 switch(type)
3206 {
3207 case GpuLoadStoreType::Load:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003208 return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")";
3209 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003210 case GpuLoadStoreType::Store:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003211 return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003212 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003213 assert(false);
3214 std::cout << "Unsupported GpuLoadStoreType" << std::endl;
3215 assert(false);
3216 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003217 }
3218 }
3219
3220 std::string to_ls_image2d_sampler() const
3221 {
3222 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3223
3224 switch(address_mode_y)
3225 {
3226 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003227 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003228 case TensorSamplerAddressModeY::Skip:
3229 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
3230 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
3231 case TensorSamplerAddressModeY::ClampToBorder:
3232 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
3233 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003234 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003235 case TensorSamplerAddressModeY::ClampToNearest:
3236 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
3237 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003238 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003239 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003240 std::cout << "Unsupported address_mode_coord" << std::endl;
3241 assert(false);
3242 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003243 }
3244 }
3245
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003246 std::string to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z,
3247 const std::string &b) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003248 {
3249 std::string coord_x = "(" + x + ") >> 2";
3250 std::string coord_y = "(";
3251
Jakub Sujake1c96e72023-07-31 13:36:58 +01003252 if(y != "0")
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003253 {
3254 coord_y += y;
3255 }
3256 if(z != "0" && (_mapper.is_one_component_z() != true))
3257 {
3258 const std::string dim = _mapper.tensor_component_y();
3259 coord_y += " + (";
3260 coord_y += z + ")";
3261 coord_y += " * ";
3262 coord_y += dim;
3263 }
3264 if(b != "0" && (_mapper.is_one_component_batch() != true))
3265 {
3266 const std::string dim0 = _mapper.tensor_component_y();
3267 const std::string dim1 = _mapper.tensor_component_z();
3268 coord_y += " + (";
3269 coord_y += b + ")";
3270 coord_y += " * ";
3271 coord_y += dim0;
3272 coord_y += " * ";
3273 coord_y += dim1;
3274 }
3275 coord_y += ")";
3276 return "(int2)(" + coord_x + ", " + coord_y + ")";
3277 }
3278};
3279
3280/** IGpuLoadStoreHelperWriter factory class */
3281class ClLoadStoreHelperWriterFactory final
3282{
3283public:
3284 /** Static method to call the IGpuLoadStoreHelperWriter class accordingly with the tensor storage set in the mapper
3285 *
3286 *
3287 * @return IGpuLoadStoreHelperWriter
3288 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003289 static std::unique_ptr<IGpuLoadStoreHelperWriter>
3290 create(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003291 {
3292 const auto tensor_storage = mapper.gpu_sampler().storage;
3293 switch(tensor_storage)
3294 {
3295 case GpuSamplerTensorStorage::BufferUint8Ptr:
3296 return std::make_unique<ClLoadStoreBufferHelperWriter>(x, mapper, type);
3297 case GpuSamplerTensorStorage::Image2dReadOnly:
3298 case GpuSamplerTensorStorage::Image2dWriteOnly:
3299 return std::make_unique<ClLoadStoreImage2dHelperWriter>(x, mapper, type);
3300 default:
3301 std::cout << "Unsupported Gpu tensor storage" << std::endl;
3302 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01003303 return nullptr;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003304 }
3305 }
3306};
3307
3308// This utility method needs to go in utils.h
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003309inline bool is_tile_scalar(const IVectorTile *x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003310{
3311 return x->format().w == 1 && x->format().h == 1;
3312}
3313
3314class ClKernelWriter : public IGpuKernelWriter
3315{
3316public:
3317 ClKernelWriter(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
3318 {
3319 _data = x;
3320 _attr = attr;
3321 }
3322
3323 ClKernelWriter(const ClKernelWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003324
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003325 ClKernelWriter &operator=(const ClKernelWriter &) = default;
3326
3327 // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure
3328 // there are no conflicts or ambiguity in the code
3329 void set_IdSpace(int32_t id) override
3330 {
3331 _data->tiles.set_IdSpace(id);
3332 _data->arguments.set_IdSpace(id);
3333 }
3334
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003335 void import_tile(const std::string &dst_name, const IVectorTile *src) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003336 {
3337 _data->tiles.insert(dst_name, src);
3338 }
3339
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003340 void declare_argument(const std::string &name, const TensorInfo &tensor) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003341 {
3342 assert(_data->arguments[name] == nullptr);
3343 _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value);
3344 }
3345
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003346 void declare_tile(const std::string &name, const TileInfo &format) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003347 {
3348 assert(_data->tiles[name] == nullptr);
3349 _data->tiles.insert(name, format);
3350
3351 IVectorTile *x = _data->tiles[name];
3352
3353 for(auto &t : x->underlying_source_variables())
3354 {
3355 _data->code += t.type.str + " " + t.str + ";\n";
3356 }
3357 }
3358
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003359 void declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in,
3360 DataType dt) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003361 {
3362 assert(_data->tiles[name] == nullptr);
3363 _data->tiles.insert(name, in, dt);
3364 // Note: A constant does not need to be declared in the code
3365 }
3366
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003367 void write_text(const std::string &x) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003368 {
3369 _data->code += x;
3370 }
3371
3372 void compound_statement_begin() override
3373 {
3374 _data->tiles.increment_registry_level();
3375 _data->code += "{\n";
3376 }
3377
3378 void compound_statement_end() override
3379 {
3380 _data->tiles.decrement_registry_level();
3381 _data->code += "}\n";
3382 }
3383
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003384 void op_get_global_id(const Operand &dst_var, int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003385 {
3386 assert(dst_var.type() == OperandType::Tile);
3387 assert(_data->tiles.has_tile(dst_var.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003388 assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003389
3390 auto var = _data->tiles[dst_var.value()];
3391
3392 _data->code += var->scalar(0, 0).str;
3393 _data->code += " = get_global_id(";
3394 _data->code += std::to_string(dim);
3395 _data->code += ");\n";
3396 };
3397
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003398 void op_get_global_coord(const Operand &o_dst, const Operand &o_step, const TensorOperand &o_tensor,
3399 int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003400 {
3401 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003402 auto dst = operands.unpack(o_dst);
3403 auto step = operands.unpack(o_step);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003404
3405 // Validation: Check that x, y and z are scalar
3406
3407 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003408 auto tensor = tensor_operands.unpack(o_tensor);
3409 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003410
3411 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3412
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003413 switch(dim)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003414 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003415 case 0:
3416 if(mapper.is_one_component_x())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003417 {
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003418 _data->code += dst->scalar(0, 0).str;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003419 _data->code += " = 0;\n";
3420 }
3421 else
3422 {
3423 if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
3424 {
3425 // Validation: Check: fixed tensor shape
3426 // TO BE CHANGED
3427 _data->code += dst->scalar(0, 0).str;
3428 _data->code += " = get_global_id(0) * ";
3429 _data->code += step->scalar(0, 0).str;
3430 _data->code += ";\n";
3431 }
3432 else
3433 {
3434 _data->code += dst->scalar(0, 0).str;
3435 _data->code += " = get_global_id(0) * ";
3436 _data->code += step->scalar(0, 0).str;
3437 _data->code += ";\n";
3438 }
3439 }
3440 break;
3441 case 1:
3442 if(mapper.is_one_component_y())
3443 {
3444 _data->code += dst->scalar(0, 0).str;
3445 _data->code += " = 0;\n";
3446 }
3447 else
3448 {
3449 if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin)
3450 {
3451 }
3452 else
3453 {
3454 _data->code += dst->scalar(0, 0).str;
3455 _data->code += " = get_global_id(1) * ";
3456 _data->code += step->scalar(0, 0).str;
3457 _data->code += ";\n";
3458 }
3459 }
3460 break;
3461 case 2:
3462 if(mapper.is_one_component_z())
3463 {
3464 _data->code += dst->scalar(0, 0).str;
3465 _data->code += " = 0;\n";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003466 }
3467 else
3468 {
3469 _data->code += dst->scalar(0, 0).str;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003470 _data->code += " = get_global_id(2) * ";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003471 _data->code += step->scalar(0, 0).str;
3472 _data->code += ";\n";
3473 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003474 break;
3475 default:
3476 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003477 }
3478 };
3479
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003480 void op_get_global_batch(const Operand &o_dst, const TensorOperand &o_tensor) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003481 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003482 OperandUnpacker operands(_data->tiles, _data->arguments);
3483 const IVectorTile *dst = operands.unpack(o_dst);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003484
3485 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003486 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003487 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003488
3489 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3490
3491 if(mapper.is_one_component_batch())
3492 {
3493 _data->code += dst->scalar(0, 0).str;
3494 _data->code += " = 0;\n";
3495 }
3496 else
3497 {
3498 std::cout << "Unsupported batched computation" << std::endl;
3499 assert(false);
3500 }
3501 };
3502
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003503 void op_get_global_size(const Operand &dst_var, int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003504 {
3505 assert(dst_var.type() == OperandType::Tile);
3506 assert(_data->tiles.has_tile(dst_var.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003507 assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003508
3509 auto var = _data->tiles[dst_var.value()];
3510
3511 _data->code += var->scalar(0, 0).str;
3512 _data->code += " = get_global_size(";
3513 _data->code += std::to_string(dim);
3514 _data->code += ");\n";
3515 }
3516
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003517 void op_unary_expression(const Operand &dst_name, UnaryOp op, const Operand &src_name) override
3518 {
3519 OperandUnpacker operands(_data->tiles, _data->arguments);
3520 const IVectorTile *src = operands.unpack(src_name);
3521 const IVectorTile *dst = operands.unpack(dst_name);
3522
3523 const int32_t dst_w = dst->format().w;
3524 const int32_t dst_h = dst->format().h;
3525 const int32_t src_w = src->format().w;
3526 const std::string dt = dst->underlying_source_variables()[0].type.str;
3527
3528 const bool broadcast_src_x = dst_w != 1 && src_w == 1;
3529
3530 const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
3531
3532 // Broadcasting on Y is automatic
3533 for(int32_t y = 0; y < dst_h; ++y)
3534 {
3535 _data->code += dst->vector(y).str;
3536 _data->code += " = ";
3537 _data->code += to_string(op);
3538 _data->code += src_prefix + src->vector(y).str;
3539 _data->code += ";\n";
3540 }
3541 }
3542
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003543 void op_binary_expression(const Operand &dst_name, const Operand &lhs_name, BinaryOp op,
3544 const Operand &rhs_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003545 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003546 OperandUnpacker operands(_data->tiles, _data->arguments);
3547 const IVectorTile *lhs = operands.unpack(lhs_name);
3548 const IVectorTile *rhs = operands.unpack(rhs_name);
3549 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003550
3551 const int32_t dst_w = dst->format().w;
3552 const int32_t dst_h = dst->format().h;
3553 assert(lhs != nullptr);
3554 const int32_t lhs_w = lhs->format().w;
3555 const int32_t rhs_w = rhs->format().w;
3556
3557 if(op == BinaryOp::MatMul_Nt_T)
3558 {
3559 assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16));
3560 for(int32_t y = 0; y < dst_h; ++y)
3561 {
3562 for(int32_t x = 0; x < dst_w; ++x)
3563 {
3564 for(int32_t k = 0; k < lhs_w; ++k)
3565 {
3566 _data->code += dst->scalar(x, y).str;
3567 _data->code += " = fma(";
3568 _data->code += lhs->scalar(k, y).str;
3569 _data->code += ", ";
3570 _data->code += rhs->scalar(k, x).str;
3571 _data->code += ", ";
3572 _data->code += dst->scalar(x, y).str;
3573 _data->code += ");\n";
3574 }
3575 }
3576 }
3577
3578 return;
3579 }
3580
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003581 const bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1;
3582 const bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003583
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003584 const std::string lhs_prefix = broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
3585 const std::string rhs_prefix = broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
3586 const std::string op_str = to_string(op);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003587
3588 // Broadcasting on Y is automatic
3589 for(int32_t y = 0; y < dst_h; ++y)
3590 {
3591 _data->code += dst->vector(y).str;
3592 _data->code += " = ";
3593 _data->code += lhs_prefix + lhs->vector(y).str;
3594 _data->code += " ";
3595 _data->code += op_str;
3596 _data->code += " ";
3597 _data->code += rhs_prefix + rhs->vector(y).str;
3598 _data->code += ";\n";
3599 }
3600 };
3601
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003602 void op_cast_expression(const Operand &o_dst, const Operand &o_src, ConvertPolicy policy) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003603 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003604 OperandUnpacker operands(_data->tiles, _data->arguments);
3605 const IVectorTile *src = operands.unpack(o_src);
3606 const IVectorTile *dst = operands.unpack(o_dst);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003607 // const int32_t dst_w = dst->format().w;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003608 const int32_t dst_h = dst->format().h;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003609 const std::string dt = dst->underlying_source_variables()[0].type.str;
Adnan AlSinan66f3d382023-07-10 15:07:45 +01003610 const bool is_float = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16);
3611 const std::string sat = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : "");
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003612
3613 // Broadcasting on Y is automatic
3614 for(int32_t y = 0; y < dst_h; ++y)
3615 {
3616 _data->code += dst->vector(y).str;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003617 _data->code += " = convert_" + dt + sat + "(";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003618 _data->code += src->vector(y).str;
3619 _data->code += ");\n";
3620 }
3621 };
3622
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003623 void op_assign(const Operand &dst_name, const Operand &src_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003624 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003625 OperandUnpacker operands(_data->tiles, _data->arguments);
3626 const IVectorTile *src = operands.unpack(src_name);
3627 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003628
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003629 const int32_t dst_w = dst->format().w;
3630 const int32_t dst_h = dst->format().h;
3631 const int32_t src_w = src->format().w;
3632 const std::string dt = dst->underlying_source_variables()[0].type.str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003633
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003634 const bool broadcast_src_x = dst_w != 1 && src_w == 1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003635
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003636 const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003637
3638 // Broadcasting on Y is automatic
3639 for(int32_t y = 0; y < dst_h; ++y)
3640 {
3641 _data->code += dst->vector(y).str;
3642 _data->code += " = ";
3643 _data->code += src_prefix + src->vector(y).str;
3644 _data->code += ";\n";
3645 }
3646 }
3647
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003648 void
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003649 op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003650 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003651 OperandUnpacker operands(_data->tiles, _data->arguments);
3652 const IVectorTile *src = operands.unpack(src_name);
3653 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003654
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003655 const int32_t dst_h = dst->format().h;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003656 const std::string dt = dst->underlying_source_variables()[0].type.str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003657
SiCong Li16b37522023-07-18 17:56:49 +01003658 // Always perform an explicit cast. This automatically covers at least the 2 scenarios:
3659 // 1. Widen a scalar into a vector type. This enables scalar-vector broadcasting
3660 // 2. Ensure non-ambiguity over function overloads.
3661 // E.g. a constant tile may be accidentally initialized with a double literal. By casting it to single float,
3662 // it avoids ambiguous function calls
3663 const std::string src_prefix = "(" + dt + ")";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003664
3665 // Broadcasting on Y is automatic
3666 for(int32_t y = 0; y < dst_h; ++y)
3667 {
3668 _data->code += dst->vector(y).str;
3669 _data->code += " = ";
3670
3671 switch(func)
3672 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003673 case UnaryFunction::Exp:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003674 _data->code += "exp(";
3675 break;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003676 case UnaryFunction::Tanh:
3677 _data->code += "tanh(";
3678 break;
3679 case UnaryFunction::Sqrt:
3680 _data->code += "sqrt(";
3681 break;
3682 case UnaryFunction::Erf:
3683 _data->code += "erf(";
3684 break;
3685 case UnaryFunction::Fabs:
3686 _data->code += "fabs(";
3687 break;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003688 case UnaryFunction::Log:
3689 _data->code += "log(";
3690 break;
3691 case UnaryFunction::SizeOf:
3692 _data->code += "sizeof(";
3693 break;
3694 case UnaryFunction::Round:
3695 _data->code += "round(";
3696 break;
Gunes Bayir91cb7332023-07-25 17:00:33 +01003697 case UnaryFunction::Floor:
3698 _data->code += "floor(";
3699 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003700 default:
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003701 CKW_ASSERT_MSG(false, "Unexpected UnaryFunction used.");
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003702 }
3703
3704 _data->code += src_prefix + src->vector(y).str;
3705 _data->code += ");\n";
3706 }
3707 }
3708
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003709 void op_binary_elementwise_function(const Operand &dst_name, BinaryFunction func, const Operand &first_name, const Operand &second_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003710 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003711 OperandUnpacker operands(_data->tiles, _data->arguments);
3712 const IVectorTile *first = operands.unpack(first_name);
3713 const IVectorTile *second = operands.unpack(second_name);
3714 const IVectorTile *dst = operands.unpack(dst_name);
3715
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003716 const int32_t dst_h = dst->format().h;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003717 const auto datatype = dst->underlying_source_variables()[0].type;
3718 const std::string datatype_str = datatype.str;
3719
SiCong Li16b37522023-07-18 17:56:49 +01003720 // Always perform an explicit cast. See similar comments in op_unary_elementwise_function
3721 const std::string first_prefix = "(" + datatype_str + ")";
3722 const std::string second_prefix = "(" + datatype_str + ")";
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003723
3724 const bool is_float = (datatype.dt == DataType::Fp32 || datatype.dt == DataType::Fp16);
3725
3726 // Broadcasting on Y is automatic
3727 for(int32_t y = 0; y < dst_h; ++y)
3728 {
3729 _data->code += dst->vector(y).str;
3730 _data->code += " = ";
3731
3732 switch(func)
3733 {
3734 case BinaryFunction::Min:
3735 _data->code += is_float ? "fmin(" : "min(";
3736 break;
3737 case BinaryFunction::Max:
3738 _data->code += is_float ? "fmax(" : "max(";
3739 break;
3740 default:
3741 CKW_ASSERT_MSG(false, "Unexpected BinaryFunction used.");
3742 }
3743
3744 _data->code += first_prefix + first->vector(y).str;
3745 _data->code += ", ";
3746 _data->code += second_prefix + second->vector(y).str;
3747 _data->code += ");\n";
3748 }
3749 }
3750
3751 void op_ternary_elementwise_function(const Operand &dst_name, TernaryFunction func, const Operand &first_name, const Operand &second_name, const Operand &third_name) override
3752 {
3753 OperandUnpacker operands(_data->tiles, _data->arguments);
3754 const IVectorTile *first = operands.unpack(first_name);
3755 const IVectorTile *second = operands.unpack(second_name);
3756 const IVectorTile *third = operands.unpack(third_name);
3757 const IVectorTile *dst = operands.unpack(dst_name);
3758
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003759 const int32_t dst_h = dst->format().h;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003760 const std::string dt = dst->underlying_source_variables()[0].type.str;
3761
SiCong Li16b37522023-07-18 17:56:49 +01003762 // Always perform an explicit cast. See similar comments in op_unary_elementwise_function
3763 const std::string first_prefix = "(" + dt + ")";
3764 const std::string second_prefix = "(" + dt + ")";
3765 const std::string third_prefix = "(" + dt + ")";
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003766
3767 // Broadcasting on Y is automatic
3768 for(int32_t y = 0; y < dst_h; ++y)
3769 {
3770 _data->code += dst->vector(y).str;
3771 _data->code += " = ";
3772
3773 switch(func)
3774 {
3775 case TernaryFunction::Select:
3776 _data->code += "select(";
3777 break;
Gunes Bayir91cb7332023-07-25 17:00:33 +01003778 case TernaryFunction::Clamp:
3779 _data->code += "clamp(";
3780 break;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003781 default:
3782 CKW_ASSERT_MSG(false, "Unexpected TernaryFunction used.");
3783 }
3784
3785 _data->code += first_prefix + first->vector(y).str;
3786 _data->code += ", ";
3787 _data->code += second_prefix + second->vector(y).str;
3788 _data->code += ", ";
3789 _data->code += third_prefix + third->vector(y).str;
3790 _data->code += ");\n";
3791 }
3792 }
3793
3794 void op_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override
3795 {
3796 OperandUnpacker operands(_data->tiles, _data->arguments);
3797 const IVectorTile *lhs = operands.unpack(o_lhs);
3798 const IVectorTile *rhs = operands.unpack(o_rhs);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003799
3800 assert(is_tile_scalar(lhs));
3801 assert(is_tile_scalar(rhs));
3802
3803 _data->code += "if(";
3804 _data->code += lhs->scalar(0, 0).str;
3805 _data->code += " ";
3806 _data->code += to_string(op);
3807 _data->code += " ";
3808 _data->code += rhs->scalar(0, 0).str;
3809 _data->code += ")\n";
3810 }
3811
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003812 void op_else_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003813 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003814 _data->code += "else ";
3815 op_if_header(o_lhs, op, o_rhs);
3816 }
3817
3818 void op_else_header() override
3819 {
3820 _data->code += "else\n";
3821 }
3822
Nikolaj Jensenfab6c212023-06-27 14:13:24 +01003823 void op_for_loop_header(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value_name, const Operand &update_var_name, AssignmentOp update_op, const Operand& update_value_name) override
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003824 {
3825 OperandUnpacker operands(_data->tiles, _data->arguments);
3826 const IVectorTile *var = operands.unpack(var_name);
3827 const IVectorTile *cond_value = operands.unpack(cond_value_name);
Nikolaj Jensenfab6c212023-06-27 14:13:24 +01003828 const IVectorTile *update_var = operands.unpack(update_var_name);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003829 const IVectorTile *update_value = operands.unpack(update_value_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003830
3831 const int32_t dst_w = var->format().w;
3832 const int32_t dst_h = var->format().h;
3833
3834 // It must be a scalar variable
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01003835 CKW_UNUSED(dst_w, dst_h);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003836 assert(dst_w == 1);
3837 assert(dst_h == 1);
3838
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003839 _data->code += "for(; ";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003840 _data->code += var->scalar(0, 0).str;
3841 _data->code += " ";
3842 _data->code += to_string(cond_op);
3843 _data->code += " " + cond_value->scalar(0, 0).str + "; ";
Nikolaj Jensenfab6c212023-06-27 14:13:24 +01003844 _data->code += update_var->scalar(0, 0).str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003845 _data->code += " ";
3846 _data->code += to_string(update_op);
3847 _data->code += " " + update_value->scalar(0, 0).str + ")";
3848 _data->code += "\n";
3849 }
3850
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003851 void op_load_immediate(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x,
3852 const Operand &o_y, const Operand &o_z, const Operand &o_batch_idx,
3853 const Operand &dilation_y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003854 {
3855 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003856
3857 // Not const as it requires changes to 'load_writer'.
3858 IVectorTile *dst = operands.unpack(o_dst);
3859 IVectorTile *x = operands.unpack(o_x);
3860 IVectorTile *y = operands.unpack(o_y);
3861 IVectorTile *z = operands.unpack(o_z);
3862 IVectorTile *dil_y = operands.unpack(dilation_y);
3863 IVectorTile *b = operands.unpack(o_batch_idx);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003864
3865 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003866 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003867 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003868
3869 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3870
3871 auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
3872
3873 // Initialize the constant part
3874 load_writer->initialize(dst, x, z, b);
3875
3876 for(int i = 0; i < dst->format().h; ++i)
3877 {
3878 std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i);
3879 if(dil_y->scalar(0, 0).str != "1")
3880 {
3881 coord_y += " * " + dil_y->scalar(0, 0).str;
3882 }
3883 load_writer->write(std::make_pair(i, coord_y));
3884 }
3885
3886 load_writer->finalize();
3887 }
3888
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003889 void op_load_indirect(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x,
3890 const Operand &o_indirect_h, const Operand &o_z,
3891 const Operand &o_batch_idx) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003892 {
3893 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003894
3895 // Not const as it requires changes to 'load_writer'.
3896 IVectorTile *dst = operands.unpack(o_dst);
3897 IVectorTile *x = operands.unpack(o_x);
3898 IVectorTile *y_ind = operands.unpack(o_indirect_h);
3899 IVectorTile *z = operands.unpack(o_z);
3900 IVectorTile *b = operands.unpack(o_batch_idx);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003901
3902 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003903 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003904 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003905
3906 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3907
3908 auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
3909
3910 // Initialize the constant part
3911 load_writer->initialize(dst, x, z, b);
3912
3913 for(int i = 0; i < dst->format().h; ++i)
3914 {
3915 load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str));
3916 }
3917
3918 load_writer->finalize();
3919 }
3920
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003921 void op_store_immediate(const TensorOperand &tensor_name, const Operand &src_name, const Operand &x_name,
3922 const Operand &y_name, const Operand &z_name,
3923 const Operand &batch_index_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003924 {
3925 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003926
3927 // Not const as it requires changes to 'load_writer'.
3928 IVectorTile *src = operands.unpack(src_name);
3929 IVectorTile *x = operands.unpack(x_name);
3930 IVectorTile *y = operands.unpack(y_name);
3931 IVectorTile *z = operands.unpack(z_name);
3932 IVectorTile *b = operands.unpack(batch_index_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003933
3934 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003935 IGpuTensorArgument *tensor = tensor_operands.unpack(tensor_name);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003936 auto gpu_sampler = tensor_name.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003937
3938 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3939
3940 auto store_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Store);
3941
3942 // Initialize the constant part
3943 store_writer->initialize(src, x, z, b);
3944
3945 int32_t tile_h = src->format().h;
3946
3947 for(int m0 = tile_h - 1; m0 >= 0; m0--)
3948 {
3949 store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0)));
3950 }
3951
3952 store_writer->finalize();
3953 }
3954
3955 void op_return() override
3956 {
3957 _data->code += "return;\n";
3958 }
3959
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003960 void util_get_indirect_buffer(const Operand &o_dst, const TensorOperand &o_tensor, const Operand &o_x,
3961 const Operand &o_y, const Operand &o_x_off, const Operand &o_y_off) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003962 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003963 OperandUnpacker operands(_data->tiles, _data->arguments);
3964 const IVectorTile *dst = operands.unpack(o_dst);
3965 const IVectorTile *x = operands.unpack(o_x);
3966 const IVectorTile *y = operands.unpack(o_y);
3967 const IVectorTile *x_off = operands.unpack(o_x_off);
3968 const IVectorTile *y_off = operands.unpack(o_y_off);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003969
3970 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003971 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003972
3973 assert(dst->format().w == 1);
3974 assert(x->format().w == 1);
3975 assert(y->format().w == 1);
3976 assert(x_off->format().w == 1);
3977 assert(y_off->format().w == 1);
3978 assert(dst->format().dt == DataType::Int32);
3979 assert(x->format().dt == DataType::Int32);
3980 assert(y->format().dt == DataType::Int32);
3981 assert(x_off->format().dt == DataType::Int32);
3982 assert(y_off->format().dt == DataType::Int32);
3983
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01003984 const std::string width = tensor->component(TensorComponentType::Dim1);
3985 const std::string height = tensor->component(TensorComponentType::Dim2);
3986 const std::string wxh = tensor->component(TensorComponentType::Dim1xDim2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003987 /*
3988 int x_s;
3989 int y_s;
3990 x_s = (xi_0 + x_k);
3991 y_s = (yi_0 + y_k);
3992 mi_0 = x_s + y_s * width + b * widthxheight;
3993 mi_0 = select(-1, mi_0, x_s >= 0);
3994 mi_0 = select(-1, mi_0, y_s >= 0);
3995 mi_0 = select(-1, mi_0, x_s < 128);
3996 mi_0 = select(-1, mi_0, y_s < 128);
3997 */
3998 compound_statement_begin();
3999 declare_tile("_x_s", TileInfo(DataType::Int32));
4000 declare_tile("_y_s", TileInfo(DataType::Int32));
4001 auto x_s = operands.unpack(Operand("_x_s"));
4002 auto y_s = operands.unpack(Operand("_y_s"));
4003 for(int i = 0; i < dst->format().h; ++i)
4004 {
4005 // x_s = (xi_0 + x_k);
4006 // y_s = (yi_0 + y_k);
4007 _data->code += x_s->scalar(0, i).str;
4008 _data->code += " = (";
4009 _data->code += x->scalar(0, i).str;
4010 _data->code += " + ";
4011 _data->code += x_off->scalar(0, i).str;
4012 _data->code += ");\n";
4013 _data->code += y_s->scalar(0, i).str;
4014 _data->code += " = (";
4015 _data->code += y->scalar(0, i).str;
4016 _data->code += " + ";
4017 _data->code += y_off->scalar(0, i).str;
4018 _data->code += ");\n";
4019 // mi_0 = x_s + y_s * width;
4020 _data->code += dst->scalar(0, i).str;
4021 _data->code += " = ";
4022 _data->code += x_s->scalar(0, i).str;
4023 _data->code += " + ";
4024 _data->code += y_s->scalar(0, i).str;
4025 _data->code += " * " + width + ";\n";
4026 // mi_0 = select(wxh, mi_0, x_s >= 0);
4027 _data->code += dst->scalar(0, i).str;
4028 _data->code += " = select(-1, ";
4029 _data->code += dst->scalar(0, i).str;
4030 _data->code += ", ";
4031 _data->code += x_s->scalar(0, i).str;
4032 _data->code += " >= 0);\n";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004033 // mi_0 = select(wxh, mi_0, x_s < width);
4034 _data->code += dst->scalar(0, i).str;
4035 _data->code += " = select(-1, ";
4036 _data->code += dst->scalar(0, i).str;
4037 _data->code += ", ";
4038 _data->code += x_s->scalar(0, i).str;
4039 _data->code += " < ";
4040 _data->code += width + ");\n";
Jakub Sujake1c96e72023-07-31 13:36:58 +01004041 // mi_0 = select(wxh, mi_0, y_s >= 0);
4042 _data->code += dst->scalar(0, i).str;
4043 _data->code += " = select(-1, ";
4044 _data->code += dst->scalar(0, i).str;
4045 _data->code += ", ";
4046 _data->code += y_s->scalar(0, i).str;
4047 _data->code += " >= 0);\n";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004048 // mi_0 = select(wxh, mi_0, y_s < height);
4049 _data->code += dst->scalar(0, i).str;
4050 _data->code += " = select(-1, ";
4051 _data->code += dst->scalar(0, i).str;
4052 _data->code += ", ";
4053 _data->code += y_s->scalar(0, i).str;
4054 _data->code += " < ";
4055 _data->code += height + ");\n";
4056 }
4057 compound_statement_end();
4058 }
4059
4060private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004061 GpuKernelWriterDataHolder *_data{ nullptr };
4062 GpuKernelWriterAttribute *_attr{ nullptr };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004063};
4064
4065/** IGpuKernelWriter factory class */
4066class GpuKernelWriterFactory final
4067{
4068public:
4069 /** Static method to call the IGpuKernelWriter class accordingly with the Gpu programming language
4070 *
4071 * @param[in] gpu GPU target
4072 *
4073 * @return IGpuKernelWriter
4074 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004075 static std::unique_ptr<IGpuKernelWriter>
4076 create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004077 {
4078 switch(x->programming_language())
4079 {
4080 case GpuTargetLanguage::OpenCL:
4081 return std::make_unique<ClKernelWriter>(attr, x);
4082 default:
4083 std::cout << "Unsupported Gpu programming language" << std::endl;
4084 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01004085 return nullptr;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004086 }
4087 }
4088};
4089
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004090inline int32_t
4091adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004092{
4093 auto tensor = tensor_info_id->shape;
4094
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004095 int32_t dim[3] = { 0 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004096
4097 switch(tensor_format)
4098 {
4099 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004100 dim[0] = tensor[0];
4101 dim[1] = tensor[1];
4102 dim[2] = tensor[2];
4103 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004104 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004105 dim[0] = tensor[0];
4106 dim[1] = tensor[1] * tensor[2];
4107 dim[2] = 1;
4108 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004109 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004110 std::cout << "Unsupported tensor format" << std::endl;
4111 assert(false);
4112 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004113 }
4114
4115 return std::min(step, dim[idx]);
4116}
4117
4118} // namespace prototype
4119} // namespace ckw
4120
Viet-Hoa Doce3c48c2023-07-03 13:44:43 +01004121#endif // CKW_PROTOTYPE_SRC_PROTOTYPE_H