blob: b392fe265128bd6d09fb4966cc64e8a6611b2178 [file] [log] [blame]
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
Viet-Hoa Doce3c48c2023-07-03 13:44:43 +010025#ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H
26#define CKW_PROTOTYPE_SRC_PROTOTYPE_H
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010027
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010028#include "ckw/Error.h"
Nikolaj Jensenacea4072023-07-03 09:44:42 +010029#include "ckw/TensorInfo.h"
Nikolaj Jensen5ff48022023-06-27 14:13:24 +010030#include "ckw/types/ConvertPolicy.h"
31#include "ckw/types/DataType.h"
32#include "ckw/types/Functions.h"
33#include "ckw/types/GpuTargetLanguage.h"
34#include "ckw/types/Operators.h"
35#include "ckw/types/TensorSamplerTypes.h"
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010036
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010037#include <algorithm>
38#include <array>
39#include <cassert> // assert (to be removed)
40#include <chrono>
41#include <cmath>
42#include <cstdint> // int32_t
43#include <functional>
44#include <iostream> // cout (to be removed)
45#include <map>
46#include <memory>
47#include <stdexcept>
48#include <string>
49#include <unordered_map>
50#include <vector>
51
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010052namespace ckw
53{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010054namespace prototype
55{
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010056
57// Dummy data structure for Size2D
58using Size2D = std::vector<int32_t>;
59
60// Dummy Status
61using Status = void;
62
63enum class ComponentType : int32_t
64{
65 Complex = 0,
66 Simple = 1,
67 Unfusable = 2
68};
69
70enum class GpuCompilationSpeed
71{
Nikolaj Jensenacea4072023-07-03 09:44:42 +010072 Fast = 0x00, // fast compilation may increase the latency of the network
73 Slow = 0x01 // slow compilation may decrease the latency of the network
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010074};
75
76enum class GpuExtensions
77{
78 Fp16,
79 Dot8,
80 Mmul,
81 FastMath
82};
83
84struct TensorInfo
85{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010086 TensorShape shape{{0}};
87 DataType data_type{DataType::Unknown};
88 TensorDataLayout data_layout{TensorDataLayout::Nhwc};
89 int32_t id{-1};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010090};
91
92struct ComponentAttribute
93{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010094 GpuCompilationSpeed compilation_speed{GpuCompilationSpeed::Fast};
95 bool overwrite_tile{true};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +010096};
97
98inline std::string data_type_to_cl_type(DataType dt)
99{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100100 switch (dt)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100101 {
102 case DataType::Fp32:
103 return "float";
104 case DataType::Fp16:
105 return "half";
106 case DataType::Int8:
107 return "char";
108 case DataType::Uint8:
109 return "uchar";
110 case DataType::Uint16:
111 return "ushort";
112 case DataType::Int16:
113 return "short";
114 case DataType::Uint32:
115 return "uint";
116 case DataType::Int32:
117 return "int";
118 case DataType::Bool:
119 return "bool";
120 default:
121 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100122 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100123 }
124}
125
126inline int32_t width_to_cl_vector_size(int32_t width)
127{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100128 switch (width)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100129 {
130 case 1:
131 return 1;
132 case 2:
133 return 2;
134 case 3:
135 return 3;
136 case 4:
137 return 4;
138 case 5:
139 case 6:
140 case 7:
141 case 8:
142 return 8;
143 case 9:
144 case 10:
145 case 11:
146 case 12:
147 case 13:
148 case 14:
149 case 15:
150 case 16:
151 return 16;
152 default:
153 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100154 return 0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100155 }
156}
157
158inline std::string get_cl_data_type(DataType dt, int32_t width)
159{
160 std::string data_type;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100161 int32_t w = width_to_cl_vector_size(width);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100162 data_type += data_type_to_cl_type(dt);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100163 if (w != 1)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100164 {
165 data_type += std::to_string(w);
166 }
167 return data_type;
168}
169
170inline std::string to_opencl_store(int32_t vector_length)
171{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100172 if (vector_length != 1)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100173 {
174 return "vstore" + std::to_string(vector_length) + "(";
175 }
176 else
177 {
178 return "*(";
179 }
180}
181
182struct TileInfo
183{
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100184 TileInfo()
185 {
186 }
187
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100188 TileInfo(DataType dt) : dt(dt), w(1), h(1)
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100189 {
190 }
191
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100192 TileInfo(DataType dt, int32_t width) : dt(dt), w(width), h(1)
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100193 {
194 }
195
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100196 TileInfo(DataType dt, int32_t width, int32_t height) : dt(dt), w(width), h(height)
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100197 {
198 }
199
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100200 DataType dt{DataType::Unknown}; // Data type of the tile
201 int32_t w{0}; // Width (i.e. c0 - portion of the channels)
202 int32_t h{0}; // Height (i.e. s0 - portion of the spatial dimensions)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100203};
204
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100205inline std::ostream &operator<<(std::ostream &o, const TileInfo &a)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100206{
207 o << a.w << " x " << a.h;
208 return o;
209}
210
211struct DataTypeAsString
212{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100213 std::string str{""};
214 DataType dt{DataType::Unknown};
215 int32_t size{1};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100216};
217
218struct ValueAsString
219{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100220 std::string str{""};
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100221 DataTypeAsString type{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100222};
223
224// https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c
225// A Tile is a collection of variables used to express a 2D data.
226class IScalarTile
227{
228public:
229 virtual ~IScalarTile() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100230
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100231 /** Method to get the scalar variable from a tile
232 * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
233 * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
234 *
235 * @return the scalar variable as a string
236 */
237 virtual ValueAsString scalar(int32_t x, int32_t y) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100238
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100239 /** Method to get the list of underlying variable names used by the tile
240 *
241 * @return the list of variable names
242 */
243 virtual std::vector<ValueAsString> underlying_source_variables() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100244
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100245 /** Method to get the name of the tile.
246 *
247 * @return the name of the tile
248 */
249 std::string name() const
250 {
251 return _basename;
252 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100253
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100254 /** Method to get the tile format
255 *
256 * @return the format
257 */
258 TileInfo format() const
259 {
260 return _format;
261 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100262
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100263 /** Method to know whether the tile is assignable or not (constant)
264 *
265 * @return true if the tile is assignable
266 */
267 virtual bool is_assignable() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100268
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100269 /** Method to know whether the tile needs to be declared
270 *
271 * @return true if the tile needs to be declared in the code before being used
272 */
273 virtual bool need_declaration() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100274
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100275protected:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100276 TileInfo _format{}; // Tile format
277 std::string _basename{""}; // Tile name
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100278};
279
280// A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context.
281// The vector size is given by the width of the tile. The number of vectors height by depth defines the number of vectors
282class IVectorTile : public IScalarTile
283{
284public:
285 virtual ~IVectorTile() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100286
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100287 /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
288 * The user can query the list of supported width for the vectors through preferred_vector_sizes().
289 *
290 * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
291 *
292 * @return the vector variable as a string
293 */
294 virtual ValueAsString vector(int32_t y) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100295
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100296 /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
297 *
298 * @return the vector variable as a string
299 */
300 virtual ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const = 0;
301 /** Method to get the preferred vector sizes.
302 *
303 * @return a vector with the preferred vector sizes
304 */
305 //virtual std::vector<int32_t> preferred_vector_sizes() const = 0;
306};
307
308class ClTile : public IVectorTile
309{
310public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100311 ClTile(const std::string &name, TileInfo format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100312 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100313 _format = format;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100314 _basename = name;
315 }
316
317 ValueAsString scalar(int32_t x, int32_t y) const override
318 {
319 x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
320 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
321
322 ValueAsString t;
323 t.str = build_variable_name(y);
324 t.type.str = get_cl_data_type(_format.dt, 1);
325 t.type.dt = _format.dt;
326 t.type.size = 1;
327
328 // Check required because if the width has only one element, we cannot use .s0
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100329 if (_format.w != 1)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100330 {
331 // Automatic broadcasting
332 t.str += ".s" + std::to_string(x);
333 }
334
335 return t;
336 }
337
338 ValueAsString vector(int32_t y) const override
339 {
340 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
341
342 ValueAsString t;
343 t.str = build_variable_name(y);
344 t.type.str = get_cl_data_type(_format.dt, _format.w);
345 t.type.dt = _format.dt;
346 t.type.size = _format.w;
347 return t;
348 }
349
350 ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
351 {
352 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
353
354 ValueAsString t;
355 t.str = build_variable_name(y);
356 t.type.str = get_cl_data_type(_format.dt, width);
357 t.type.dt = _format.dt;
358 t.type.size = width;
359
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100360 if (_format.w != 1)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100361 {
362 t.str += ".s";
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100363 for (int i = 0; i < width; ++i)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100364 {
365 t.str += to_scalar_hex(x_start + i);
366 }
367 }
368 return t;
369 }
370
371 std::vector<ValueAsString> underlying_source_variables() const override
372 {
373 std::vector<ValueAsString> vars;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100374 for (int32_t y = 0; y < _format.h; ++y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100375 {
376 ValueAsString t;
377 t.str = build_variable_name(y);
378 t.type.str = get_cl_data_type(_format.dt, _format.w);
379 t.type.dt = _format.dt;
380 t.type.size = _format.w;
381 vars.push_back(t);
382 }
383 return vars;
384 }
385
386 bool is_assignable() const override
387 {
388 return true;
389 }
390
391 bool need_declaration() const override
392 {
393 return true;
394 }
395
396private:
397 std::string build_variable_name(int32_t y) const
398 {
399 std::string var_name = _basename;
400
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100401 if (_format.h == 1)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100402 {
403 return var_name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100404 }
405 else
406 {
407 var_name += "_";
408 var_name += std::to_string(y);
409 }
410
411 return var_name;
412 }
413
414 std::string to_scalar_hex(int32_t x) const
415 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100416 switch (x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100417 {
418 case 0:
419 case 1:
420 case 2:
421 case 3:
422 case 4:
423 case 5:
424 case 6:
425 case 7:
426 case 8:
427 case 9:
428 return std::to_string(x);
429 case 10:
430 return "A";
431 case 11:
432 return "B";
433 case 12:
434 return "C";
435 case 13:
436 return "D";
437 case 14:
438 return "E";
439 case 15:
440 return "F";
441 default:
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100442 std::cout << "Unsupported hexadecimal value" << std::endl;
443 assert(false);
444 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100445 }
446 }
447};
448
449// Unique features: It contains values in the form of string. The name used for this object is misleading since the variables can change the value over time.
450class ClConstantTile : public IVectorTile
451{
452public:
453 ClConstantTile(const std::vector<std::vector<std::string>> &in, DataType dt)
454 {
455 _format.w = in[0].size();
456 _format.h = in.size();
457 _format.dt = dt;
458
459 _data = std::vector<std::vector<std::string>>(_format.h, std::vector<std::string>(_format.w));
460
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100461 for (int32_t y = 0; y < _format.h; ++y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100462 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100463 for (int32_t x = 0; x < _format.w; ++x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100464 {
465 _data[y][x] = in[y][x];
466 }
467 }
468 }
469
470 ValueAsString scalar(int32_t x, int32_t y) const override
471 {
472 x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
473 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
474
475 ValueAsString t;
476 t.str = _data[y][x];
477 t.type.str = get_cl_data_type(_format.dt, 1);
478 t.type.dt = _format.dt;
479 t.type.size = 1;
480
481 return t;
482 }
483
484 ValueAsString vector(int32_t y) const override
485 {
486 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
487
488 return vector(0, _format.w, y);
489 }
490
491 ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
492 {
493 y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
494
495 ValueAsString t;
496 t.str = "";
497 t.type.str = get_cl_data_type(_format.dt, width);
498 t.type.dt = _format.dt;
499 t.type.size = width;
500
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100501 if (width > 1)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100502 {
503 t.str += "((" + get_cl_data_type(_format.dt, width) + ")(";
504 }
505
506 int32_t x = x_start;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100507 for (; x < width - 1; ++x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100508 {
509 t.str += scalar(x, y).str;
510 t.str += ", ";
511 }
512 t.str += scalar(x, y).str;
513
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100514 if (width > 1)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100515 {
516 t.str += "))";
517 }
518
519 return t;
520 }
521
522 std::vector<ValueAsString> underlying_source_variables() const override
523 {
524 std::vector<ValueAsString> vars;
525
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100526 for (int32_t y = 0; y < _format.h; ++y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100527 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100528 for (int32_t x = 0; x < _format.w; ++x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100529 {
530 ValueAsString t;
531 t.str = _data[y][x];
532 t.type.str = get_cl_data_type(_format.dt, 1);
533 t.type.dt = _format.dt;
534 t.type.size = 1;
535 vars.push_back(t);
536 }
537 }
538
539 return vars;
540 }
541
542 bool is_assignable() const override
543 {
544 return false;
545 }
546
547 bool need_declaration() const override
548 {
549 return false;
550 }
551
552private:
553 std::vector<std::vector<std::string>> _data{};
554};
555
556enum class TensorComponentIndex : int32_t
557{
558 IndexMask = 0x0000000f,
559};
560
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100561enum class TensorComponentGroup : int32_t
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100562{
563 OffsetFirstElement = 0x00000100,
564 Stride = 0x00001000,
565 Dimension = 0x00010000,
566 FoldedDimension = 0x00100000,
567 Constant = 0x01000000
568};
569
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100570inline std::string to_string(TensorComponentType x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100571{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100572 switch (x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100573 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100574 case TensorComponentType::Unknown:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100575 return "Unknown";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100576 case TensorComponentType::OffsetFirstElement:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100577 return "OffsetFirstElement";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100578 case TensorComponentType::Stride1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100579 return "Stride1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100580 case TensorComponentType::Stride2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100581 return "Stride2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100582 case TensorComponentType::Stride3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100583 return "Stride3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100584 case TensorComponentType::Stride4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100585 return "Stride4";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100586 case TensorComponentType::Dim0:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100587 return "Dim0";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100588 case TensorComponentType::Dim1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100589 return "Dim1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100590 case TensorComponentType::Dim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100591 return "Dim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100592 case TensorComponentType::Dim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100593 return "Dim3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100594 case TensorComponentType::Dim4:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100595 return "Dim4";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100596 case TensorComponentType::Dim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100597 return "Dim1xDim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100598 case TensorComponentType::Dim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100599 return "Dim1xDim2xDim3";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100600 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100601 assert(false);
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100602 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100603 }
604}
605
606class ITensorArgument
607{
608public:
609 virtual ~ITensorArgument() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100610
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100611 /** Method to get the tensor component as a string
612 *
613 * @param[in] x tensor component to query
614 *
615 * @return the tensor component as a string
616 */
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100617 virtual std::string component(TensorComponentType x) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100618
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100619 /** Method to get the tensor component type declaration as a string
620 *
621 * @return the tensor component type declaration as a string
622 */
623 virtual std::string component_type_declaration() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100624
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100625 /** Method to get the tensor component data type
626 *
627 * @return the tensor component data type
628 */
629 virtual DataType component_data_type() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100630
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100631 /** Method to get the tensor component declarations
632 *
633 * @return a vector containing the tensor component declarations
634 */
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100635 virtual std::vector<TensorComponentType> component_declarations() const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100636
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100637 /** Method to get the name of the tensor argument.
638 *
639 * @return the name of the tensor argument
640 */
641 std::string name() const
642 {
643 return _basename;
644 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100645
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100646 /** Method to get the tensor format
647 *
648 * @return the format
649 */
650 TensorInfo format() const
651 {
652 return _format;
653 }
654
655protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100656 TensorInfo _format{};
657 std::string _basename{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100658};
659
660enum class GpuTensorStorage : int32_t
661{
662 Unknown = 0x0000,
663 BufferUint8Ptr = 0x0012,
664 Image2dReadOnly = 0x0020,
665 Image2dWriteOnly = 0x0021,
666 Image3dReadOnly = 0x0030,
667 Image3dWriteOnly = 0x0031
668};
669
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100670inline GpuTensorStorage to_gpu_tensor_storage(TensorStorageType s)
671{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100672 switch (s)
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100673 {
674 case TensorStorageType::Unknown:
675 return GpuTensorStorage::Unknown;
676
677 case TensorStorageType::BufferUint8Ptr:
678 return GpuTensorStorage::BufferUint8Ptr;
679
680 case TensorStorageType::Texture2dReadOnly:
681 return GpuTensorStorage::Image2dReadOnly;
682
683 case TensorStorageType::Texture2dWriteOnly:
684 return GpuTensorStorage::Image2dWriteOnly;
685
686 default:
687 assert(false);
688 return GpuTensorStorage::Unknown;
689 }
690}
691
692inline TensorStorageType to_tensor_storage(GpuTensorStorage s)
693{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100694 switch (s)
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100695 {
696 case GpuTensorStorage::Unknown:
697 return TensorStorageType::Unknown;
698
699 case GpuTensorStorage::BufferUint8Ptr:
700 return TensorStorageType::BufferUint8Ptr;
701
702 case GpuTensorStorage::Image2dReadOnly:
703 return TensorStorageType::Texture2dReadOnly;
704
705 case GpuTensorStorage::Image2dWriteOnly:
706 return TensorStorageType::Texture2dWriteOnly;
707
708 default:
709 assert(false);
710 return TensorStorageType::Unknown;
711 }
712}
713
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100714class IGpuTensorArgument : public ITensorArgument
715{
716public:
717 virtual ~IGpuTensorArgument() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100718
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100719 /** Method to get the tensor storage, which is the underlying storage used to keep the data memory
720 *
721 * @param[in] x tensor storage to query
722 *
723 * @return the tensor storage as a string
724 */
725 virtual std::string storage(GpuTensorStorage x) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100726
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100727 /** Method to get the tensor storage type declaration as a string
728 *
729 * @param[in] x tensor component to query
730 *
731 * @return the tensor storage type declaration as a string
732 */
733 virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100734
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100735 /** Method to get the tensor storage declarations
736 *
737 * @return a vector containing the tensor storage declarations
738 */
739 virtual std::vector<GpuTensorStorage> storage_declarations() const = 0;
740};
741
742class ClTensorArgument : public IGpuTensorArgument
743{
744public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100745 ClTensorArgument(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100746 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100747 _basename = name;
748 _format = x;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100749 _return_by_value_when_possible = return_by_value_when_possible;
750 }
751
752 // Methods to override
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100753 std::string component(TensorComponentType x) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100754 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100755 if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Constant)))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100756 {
757 int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
758 return std::to_string(idx - 1);
759 }
760
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100761 if (_return_by_value_when_possible)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100762 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100763 if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Dimension)))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100764 {
765 int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
766 return std::to_string(_format.shape[idx]);
767 }
768
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100769 if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::FoldedDimension)))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100770 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100771 switch (x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100772 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100773 case TensorComponentType::Dim1xDim2:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100774 return std::to_string(_format.shape[1] * _format.shape[2]);
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100775 case TensorComponentType::Dim1xDim2xDim3:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100776 return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100777 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100778 std::cout << "Unsupported folded dimension" << std::endl;
779 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100780 }
781 }
782 }
783
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100784 if (std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100785 {
786 _components_required.push_back(x);
787 }
788
789 return build_component_name(x);
790 }
791
792 std::string component_type_declaration() const override
793 {
794 return "int";
795 };
796
797 DataType component_data_type() const override
798 {
799 return DataType::Int32;
800 }
801
802 std::string storage(GpuTensorStorage x) override
803 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100804 if (std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100805 {
806 _storage_required.push_back(x);
807 }
808
809 return build_storage_name(x);
810 }
811
812 std::string storage_type_declaration(GpuTensorStorage x) const override
813 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100814 switch (x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100815 {
816 case GpuTensorStorage::BufferUint8Ptr:
817 return "__global uchar*";
818 case GpuTensorStorage::Image2dReadOnly:
819 return "__read_only image2d_t";
820 case GpuTensorStorage::Image2dWriteOnly:
821 return "__write_only image2d_t";
822 case GpuTensorStorage::Image3dReadOnly:
823 return "__read_only image3d_t ";
824 case GpuTensorStorage::Image3dWriteOnly:
825 return "__write_only image3d_t ";
826 default:
827 std::cout << "Unsupported storage" << std::endl;
828 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +0100829 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100830 }
831 };
832
833 std::vector<GpuTensorStorage> storage_declarations() const override
834 {
835 return _storage_required;
836 }
837
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100838 std::vector<TensorComponentType> component_declarations() const override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100839 {
840 return _components_required;
841 }
842
843private:
844 std::string build_storage_name(GpuTensorStorage x) const
845 {
846 std::string var_name = _basename;
847
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100848 switch (x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100849 {
850 case GpuTensorStorage::BufferUint8Ptr:
851 return var_name + "_ptr";
852 case GpuTensorStorage::Image2dReadOnly:
853 case GpuTensorStorage::Image2dWriteOnly:
854 return var_name + "_img2d";
855 case GpuTensorStorage::Image3dReadOnly:
856 case GpuTensorStorage::Image3dWriteOnly:
857 return var_name + "_img3d";
858 default:
859 std::cout << "Unsupported storage" << std::endl;
860 assert(false);
861 }
862
863 return var_name;
864 }
865
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100866 std::string build_component_name(TensorComponentType x) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100867 {
868 std::string var_name = _basename;
869
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100870 switch (x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100871 {
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100872 case TensorComponentType::OffsetFirstElement:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100873 return var_name + "_offset_first_element";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100874 case TensorComponentType::Stride1:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100875 return var_name + "_stride1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100876 case TensorComponentType::Stride2:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100877 return var_name + "_stride2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100878 case TensorComponentType::Stride3:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100879 return var_name + "_stride3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100880 case TensorComponentType::Dim0:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100881 return var_name + "_dim0";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100882 case TensorComponentType::Dim1:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100883 return var_name + "_dim1";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100884 case TensorComponentType::Dim2:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100885 return var_name + "_dim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100886 case TensorComponentType::Dim3:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100887 return var_name + "_dim3";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100888 case TensorComponentType::Dim1xDim2:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100889 return var_name + "_dim1xdim2";
Viet-Hoa Doc8e16172023-06-27 14:09:46 +0100890 case TensorComponentType::Dim1xDim2xDim3:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100891 return var_name + "_dim1xdim2xdim3";
892 default:
893 std::cout << "Unsupported component" << std::endl;
894 assert(false);
895 }
896
897 return var_name;
898 }
899
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100900 bool _return_by_value_when_possible{false};
901 std::vector<GpuTensorStorage> _storage_required{};
902 std::vector<TensorComponentType> _components_required{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100903};
904
905/**
906 * @brief Data structure that contains the declared tiles by the components.
907 * The registry is a linear data structure that follows the similar principle of the stack. The user can use the @p increment_registry_level() method to
908 * increase the level of the stack (0 when it starts). When the user uses the @p decrement_registry_level() method, the registry decreases the level of the stack
909 * and remove (pop) all the tiles from the level above.
910 * When a tile is declared on the level 0, it is a global tile. A global tile is visible in all parts of the code.
911 * Since different components may use the same name to define a tile, the registry adopts the IdSpace concept, an @p id to prevent name collisions
912 * when declaring tiles among different components.
913 *
914 */
915class GpuTileRegistry
916{
917public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100918 enum class RegistryTileType
919 {
920 Tile,
921 Link
922 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100923
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100924 using RegistryIdSpace = int32_t;
925 using RegistryLevel = int32_t;
926 using RegistryTileName = std::string;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100927
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100928 struct RegistryTileTableEntry
929 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100930 RegistryLevel registry_level{0};
931 std::unique_ptr<IVectorTile> tile_object{nullptr};
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100932 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100933
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100934 struct RegistryTileTypeTableEntry
935 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100936 RegistryTileType tile_type{RegistryTileType::Tile};
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100937 RegistryTileName tile_name{};
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100938 RegistryIdSpace registry_idspace{0};
939 RegistryLevel registry_level{0};
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100940 };
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100941
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100942 using RegistryTileTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTableEntry>>;
943 using RegistryTileTypeTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTypeTableEntry>>;
944
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100945 /**
946 * @brief Construct a new Gpu Tile Registry object
947 *
948 */
949 GpuTileRegistry()
950 {
951 _language = GpuTargetLanguage::Unknown;
952 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100953
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100954 /**
955 * @brief Construct a new Gpu Tile Registry object providing the Gpu programming language
956 *
957 * @param[in] language Gpu programming language to use
958 */
959 GpuTileRegistry(GpuTargetLanguage language)
960 {
961 _language = language;
962 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100963
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100964 /**
965 * @brief Default destructor. Destroy the Gpu Tile Registry object
966 *
967 */
968 ~GpuTileRegistry() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100969
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100970 /**
971 * @brief Set the working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles.
972 * Therefore, the IdSpace should be set before declaring any tiles.
973 *
974 * @param[in] id The IdSpace id
975 */
976 void set_IdSpace(int32_t id)
977 {
978 _IdSpace = id;
979 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100980
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100981 /**
982 * @brief Get the current working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles
983 *
984 * @return The IdSpace id
985 */
986 int32_t IdSpace() const
987 {
988 return _IdSpace;
989 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +0100990
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +0100991 /**
992 * @brief Gets all the IdSpace declarations defined in the tile registry.
993 *
994 * @return all the IdSpace declarations defined in the tile registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
995 */
996 std::vector<int32_t> IdSpace_declarations() const
997 {
998 std::vector<int32_t> x;
999
1000 auto it = _frags.begin();
1001
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001002 while (it != _frags.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001003 {
1004 x.push_back(it->first);
1005
1006 it++;
1007 }
1008
1009 return x;
1010 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001011
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001012 /**
1013 * @brief Declare a tile from a previously created tile
1014 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001015 void insert(const std::string &name, const IVectorTile *frag)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001016 {
1017 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001018 const int32_t key_IdSpace = _IdSpace;
1019 const std::string key_var_name = name;
1020 const std::string var_name = frag->name();
1021 TileInfo format = frag->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001022
1023 // First check whether a tile with the same name exists
1024 IVectorTile *result = (*this)[key_var_name];
1025 assert(result == nullptr);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001026 if (result == nullptr)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001027 {
1028 std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
1029
1030 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1031 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1032
1033 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Link;
1034 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1035 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1036 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1037 }
1038 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001039
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001040 /**
1041 * @brief Declare a tile with TileInfo. The tile will be stored in the IdSpace set with @p set_IdSpace()
1042 *
1043 * @note The reference name used for declaring the tile should not be previously used in the IdSpace
1044 *
1045 * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
1046 * @param[in] format Tile format use to use
1047 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001048 void insert(const std::string &name, const TileInfo &format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001049 {
1050 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001051 const int32_t key_IdSpace = _IdSpace;
1052 const std::string key_var_name = name;
1053 const std::string var_name = generate_tile_name(name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001054
1055 // First check whether a tile with the same name exists
1056 IVectorTile *result = (*this)[key_var_name];
1057 assert(result == nullptr);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001058 if (result == nullptr)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001059 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001060 std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001061 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1062 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1063
1064 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1065 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1066 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1067 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1068 }
1069 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001070
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001071 /**
1072 * @brief Declare a constant tile. The content of the tile is passed as a vector of std::string
1073 *
1074 * @note The reference name used for declaring the tile should not be previously used in the IdSpace
1075 *
1076 * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
1077 * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
1078 * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user's responsibilty to ensure
1079 * that the data type is aligned with the content of the std::string.
1080 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001081 void insert(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001082 {
1083 assert(_language == GpuTargetLanguage::OpenCL);
1084 const int32_t key_IdSpace = _IdSpace;
1085 const std::string key_var_name = name;
1086
1087 // First check whether a tile with the same name exists
1088 IVectorTile *result = (*this)[key_var_name];
1089 assert(result == nullptr);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001090 if (result == nullptr)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001091 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001092 std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001093 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1094 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1095
1096 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1097 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1098 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1099 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1100 }
1101 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001102
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001103 /**
1104 * @brief Declare an anonymous constant tile. The content of the tile is passed as a vector of std::string
1105 *
1106 * @note This method can be used to declare temporary tiles that need to be accessed only once.
1107 *
1108 * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
1109 * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user responsibilty to ensure
1110 * that the data type is aligned with what passed with the std::string.
1111 *
1112 * @return IVectorTile* the anonymous constant tile
1113 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001114 IVectorTile *insert(const std::vector<std::vector<std::string>> &in, DataType dt)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001115 {
1116 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001117 const int32_t key_IdSpace = _IdSpace;
1118 const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001119
1120 // First check whether a tile with the same name exists
1121 IVectorTile *result = (*this)[key_var_name];
1122 assert(result == nullptr);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001123 if (result == nullptr)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001124 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001125 std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001126 _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
1127 _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
1128
1129 _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
1130 _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
1131 _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
1132 _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
1133 }
1134
1135 return (*this)[key_var_name];
1136 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001137
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001138 /**
1139 * @brief Get the tile from the registry. This method searches the tile in the IdSpace provided by the user
1140 *
1141 * @param[in] name The name of the tile to retrieve
1142 * @param[in] IdSpace The IdSpace id where to search the tile
1143 *
1144 * @return IVectorTile* The tile
1145 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001146 IVectorTile *get(const std::string &name, int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001147 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001148 const int32_t key_IdSpace = IdSpace;
1149 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001150
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001151 IVectorTile *result = nullptr;
1152 auto search_IdSpace = _frags.find(key_IdSpace);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001153 if (search_IdSpace != _frags.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001154 {
1155 auto search_tile = _frags[key_IdSpace].find(key_var_name);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001156 if (search_tile != _frags[key_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001157 {
1158 result = search_tile->second.tile_object.get();
1159 assert(result != nullptr);
1160 }
1161 }
1162
1163 return result;
1164 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001165
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001166 /**
1167 * @brief Get the tile from the registry. This method searches the tile in the IdSpace set with @p set_IdSpace()
1168 *
1169 * @param[in] name The name of the tile to retrieve
1170 *
1171 * @return IVectorTile* The tile
1172 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001173 IVectorTile *operator[](const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001174 {
1175 return get(name, _IdSpace);
1176 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001177
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001178 /**
1179 * @brief Check whether the tile in the in the IdSpace provided by the user exists
1180 *
1181 * @param[in] name Name of the tile to search for
1182 * @param[in] IdSpace The IdSpace id where to search the tile
1183 *
1184 * @return true if the tile exists
1185 * @return false if the tile does not exist
1186 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001187 bool has_tile(const std::string &name, int32_t IdSpace) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001188 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001189 const int32_t key_IdSpace = IdSpace;
1190 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001191
1192 // IVectorTile* result = nullptr;
1193 auto search_IdSpace = _frags.find(key_IdSpace);
1194
1195 return search_IdSpace != _frags.end();
1196 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001197
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001198 /**
1199 * @brief Check whether the tile within the current IdSpace exists
1200 *
1201 * @param[in] name Name of the tile to search for
1202 *
1203 * @return true if the tile exists
1204 * @return false if the tile does not exist
1205 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001206 bool has_tile(const std::string &name) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001207 {
1208 return has_tile(name, _IdSpace);
1209 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001210
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001211 /**
1212 * @brief Get all the tiles declared within the IdSpace provided by the user
1213 *
1214 * @param[in] IdSpace IdSpace where to retrieve all the declared tiles
1215 *
1216 * @return std::vector<IVectorTile*> A vector with all the declared tiles in the IdSpace provided by the user
1217 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001218 std::vector<IVectorTile *> tile_declarations(int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001219 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001220 std::vector<IVectorTile *> tiles;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001221
1222 std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it = _frag_types[IdSpace].begin();
1223
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001224 while (it != _frag_types[IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001225 {
1226 // The following line should be enabled. However, we cannot at this stage
1227 // because it used to retrieve the output tile produced by each component.
1228 // However, this method should NOT be used to retrieve the output tile
1229 //if(it->second.tile_type == RegistryTileType::Tile)
1230 {
1231 tiles.push_back(get(it->second.tile_name, it->second.registry_idspace));
1232 }
1233 it++;
1234 }
1235
1236 return tiles;
1237 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001238
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001239 /**
1240 * @brief Increase the level of stack.
1241 *
1242 */
1243 void increment_registry_level()
1244 {
1245 _registry_level++;
1246 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001247
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001248 /**
1249 * @brief Remove all the tiles declared at the current stack level and decrease the level of the stack.
1250 *
1251 */
1252 void decrement_registry_level()
1253 {
1254 assert(_registry_level >= 0);
1255
1256 // Remove all variables in the local scope
1257 std::map<RegistryTileName, RegistryTileTableEntry>::iterator it = _frags[_IdSpace].begin();
1258
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001259 while (it != _frags[_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001260 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001261 if (it->second.registry_level == _registry_level)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001262 {
1263 it = _frags[_IdSpace].erase(it);
1264 }
1265 else
1266 {
1267 it++;
1268 }
1269 }
1270
1271 std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it_type = _frag_types[_IdSpace].begin();
1272
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001273 while (it_type != _frag_types[_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001274 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001275 if (it_type->second.registry_level == _registry_level)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001276 {
1277 it_type = _frag_types[_IdSpace].erase(it_type);
1278 }
1279 else
1280 {
1281 it_type++;
1282 }
1283 }
1284
1285 _registry_level--;
1286 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001287
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001288 /**
1289 * @brief Get the level of the stack
1290 *
1291 */
1292 int32_t level() const
1293 {
1294 return _registry_level;
1295 }
1296
1297private:
1298 // This method ensures that the key is unique among different components
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001299 std::string generate_tile_name(const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001300 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001301 assert(_IdSpace >= 0);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001302 if (_registry_level == 0)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001303 {
1304 return "_G" + std::to_string(_IdSpace) + "_" + name;
1305 }
1306 else
1307 {
1308 return name;
1309 }
1310 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001311
1312 RegistryTileTable _frags{};
1313 RegistryTileTypeTable _frag_types{};
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001314 RegistryLevel _registry_level{0};
1315 RegistryIdSpace _IdSpace{-1};
1316 int32_t _anonymous_frag_count{0}; // Counter used to create the anonymous tiles
1317 GpuTargetLanguage _language{GpuTargetLanguage::Unknown}; // Gpu programming language
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001318};
1319
1320using TensorEntry = std::unique_ptr<IGpuTensorArgument>;
1321
1322/**
1323 * @brief Data structure that contains the tensors consumed by the components.
1324 * Since different components may use the same name as reference for a tensor, the registry adopts the IdSpace concept, an @p id to prevent name collisions
1325 * when declaring tensors among different components.
1326 *
1327 */
1328class GpuTensorArgumentRegistry
1329{
1330public:
1331 /**
1332 * @brief Construct a new Gpu Tensor Registry object
1333 *
1334 */
1335 GpuTensorArgumentRegistry()
1336 {
1337 _language = GpuTargetLanguage::Unknown;
1338 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001339
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001340 /**
1341 * @brief Construct a new Gpu Tensor Registry object
1342 *
1343 * @param[in] language Gpu programming language to use
1344 */
1345 GpuTensorArgumentRegistry(GpuTargetLanguage language)
1346 {
1347 _language = language;
1348 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001349
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001350 /**
1351 * @brief Default destructor. Destroy the Gpu Tensor Registry object
1352 *
1353 */
1354 ~GpuTensorArgumentRegistry() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001355
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001356 /**
1357 * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors.
1358 * Therefore, the IdSpace should be set before declaring any tensors.
1359 *
1360 * @param[in] id The IdSpace id
1361 */
1362 void set_IdSpace(int32_t id)
1363 {
1364 _IdSpace = id;
1365 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001366
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001367 /**
1368 * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors
1369 *
1370 * @return The IdSpace id
1371 */
1372 int32_t IdSpace() const
1373 {
1374 return _IdSpace;
1375 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001376
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001377 /**
1378 * @brief Gets all the IdSpace declarations defined in the tensor registry.
1379 *
1380 * @return all the IdSpace declarations defined in the tensor registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
1381 */
1382 std::vector<int32_t> IdSpace_declarations() const
1383 {
1384 std::vector<int32_t> x;
1385
1386 auto it = _refs.begin();
1387
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001388 while (it != _refs.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001389 {
1390 x.push_back(it->first);
1391
1392 it++;
1393 }
1394
1395 return x;
1396 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001397
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001398 /**
1399 * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace()
1400 *
1401 * @note The reference name used for declaring the tensor should not be previously used in the IdSpace
1402 *
1403 * @param[in] name Reference name for the tensor. The reference name can be used to retrieve the tensor stored in the registry.
1404 * @param[in] x Pair of tensor info and tensor id
1405 * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components
1406 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001407 void insert(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001408 {
1409 assert(_language == GpuTargetLanguage::OpenCL);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001410 const int32_t key_IdSpace = _IdSpace;
1411 const int32_t tensor_id = x.id;
1412 const std::string key_var_name = name;
1413 const std::string var_name = generate_tensor_name(name, tensor_id);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001414
1415 // First, check whether the tensor has already a reference. If so, trigger an assert
1416 assert(!has_tensor_argument(name));
1417
1418 // Check whether a tensor with that tensorID exists
1419 auto result = _tensor_arguments.find(tensor_id);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001420 if (result == _tensor_arguments.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001421 {
1422 // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001423 std::unique_ptr<ClTensorArgument> arg =
1424 std::make_unique<ClTensorArgument>(var_name, x, return_by_value_when_possible);
1425 _tensor_arguments[tensor_id] = std::move(arg);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001426 }
1427
1428 _refs[key_IdSpace][key_var_name] = tensor_id;
1429 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001430
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001431 /**
1432 * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace()
1433 *
1434 * @param[in] name The name of the tensor to retrieve
1435 *
1436 * @return IGpuTensor* The tensor
1437 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001438 IGpuTensorArgument *operator[](const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001439 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001440 const int32_t key_IdSpace = _IdSpace;
1441 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001442
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001443 IGpuTensorArgument *result = nullptr;
1444 auto search_IdSpace = _refs.find(key_IdSpace);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001445 if (search_IdSpace != _refs.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001446 {
1447 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1448
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001449 if (search_tensor_id != _refs[key_IdSpace].end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001450 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001451 const int32_t tensor_id = search_tensor_id->second;
1452 auto search_tensor_argument = _tensor_arguments.find(tensor_id);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001453 if (search_tensor_argument != _tensor_arguments.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001454 {
1455 result = search_tensor_argument->second.get();
1456 }
1457 assert(result != nullptr);
1458 }
1459 }
1460
1461 return result;
1462 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001463
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001464 /**
1465 * @brief Get all the tensors declared in the IdSpace provided by the user
1466 *
1467 * @return std::vector<IGpuTensorArgument*> A vector with all the declared tensors
1468 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001469 std::vector<IGpuTensorArgument *> tensor_argument_declarations()
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001470 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001471 std::vector<IGpuTensorArgument *> args;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001472
1473 auto it = _tensor_arguments.begin();
1474
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001475 while (it != _tensor_arguments.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001476 {
1477 args.push_back(it->second.get());
1478 it++;
1479 }
1480
1481 return args;
1482 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001483
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001484 /**
1485 * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists
1486 *
1487 * @param[in] name Name of the tensor argument to search for
1488 *
1489 * @return true if the tensor argument exists
1490 * @return false if the tensor argument does not exist
1491 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001492 bool has_tensor_argument(const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001493 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001494 const int32_t key_IdSpace = _IdSpace;
1495 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001496
1497 auto search_IdSpace = _refs.find(key_IdSpace);
1498
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001499 if (search_IdSpace != _refs.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001500 {
1501 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1502
1503 return search_tensor_id != _refs[key_IdSpace].end();
1504 }
1505 else
1506 {
1507 return false;
1508 }
1509 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001510
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001511 /**
1512 * @brief Check whether the tensor argument is in the the IdSpace provided by the user
1513 *
1514 * @param[in] name Name of the tensor argument to search for
1515 * @param[in] IdSpace The IdSpace id where to search the tensor argument
1516 *
1517 * @return true if the tile exists
1518 * @return false if the tile does not exist
1519 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001520 bool has_tensor_argument(const std::string &name, int32_t IdSpace)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001521 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001522 const int32_t key_IdSpace = IdSpace;
1523 const std::string key_var_name = name;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001524
1525 auto search_IdSpace = _refs.find(key_IdSpace);
1526
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001527 if (search_IdSpace != _refs.end())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001528 {
1529 auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
1530
1531 return search_tensor_id != _refs[key_IdSpace].end();
1532 }
1533 else
1534 {
1535 return false;
1536 }
1537 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001538
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001539private:
1540 // This method ensures that the key is unique among different components
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001541 std::string generate_tensor_name(const std::string &name, int32_t tensor_id)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001542 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001543 assert(tensor_id >= 0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001544
1545 return name + std::to_string(tensor_id);
1546 }
1547
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001548 std::map<int32_t, TensorEntry> _tensor_arguments{};
1549 std::map<int32_t, std::map<std::string, int32_t>> _refs{};
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001550 int32_t _IdSpace{-1};
1551 GpuTargetLanguage _language{GpuTargetLanguage::Unknown}; // Gpu programming language
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001552};
1553
1554enum class OpType : int32_t
1555{
1556 Elementwise = 0x0000,
1557 Relational = 0x1000,
1558 Algebra = 0x2000
1559};
1560
1561inline std::string to_string(AssignmentOp op)
1562{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001563 switch (op)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001564 {
1565 case AssignmentOp::Decrement:
1566 return "-=";
1567 case AssignmentOp::Increment:
1568 return "+=";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001569 default:
1570 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001571 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001572 }
1573}
1574
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01001575inline std::string to_string(UnaryOp op)
1576{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001577 switch (op)
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01001578 {
1579 case UnaryOp::LogicalNot:
1580 return "!";
Adnan AlSinan66f3d382023-07-10 15:07:45 +01001581 case UnaryOp::BitwiseNot:
1582 return "~";
Adnan AlSinan2e6d6592023-08-21 13:54:27 +01001583 case UnaryOp::Negate:
1584 return "-";
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01001585 default:
1586 assert(false);
1587 return "";
1588 }
1589}
1590
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001591inline std::string to_string(BinaryOp op)
1592{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001593 switch (op)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001594 {
1595 case BinaryOp::Add:
1596 return "+";
1597 case BinaryOp::Sub:
1598 return "-";
1599 case BinaryOp::Mul:
1600 return "*";
1601 case BinaryOp::Div:
1602 return "/";
1603 case BinaryOp::Mod:
1604 return "%";
1605 case BinaryOp::Equal:
1606 return "==";
1607 case BinaryOp::Less:
1608 return "<";
1609 case BinaryOp::LessEqual:
1610 return "<=";
1611 case BinaryOp::Greater:
1612 return ">";
1613 case BinaryOp::GreaterEqual:
1614 return ">=";
1615 case BinaryOp::LogicalAnd:
1616 return "&&";
1617 case BinaryOp::LogicalOr:
1618 return "||";
Adnan AlSinan66f3d382023-07-10 15:07:45 +01001619 case BinaryOp::BitwiseXOR:
1620 return "^";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001621 default:
1622 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001623 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001624 }
1625}
1626
1627inline std::string binary_op_string(BinaryOp op)
1628{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001629 switch (op)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001630 {
1631 case BinaryOp::Add:
1632 return "add";
1633 case BinaryOp::Sub:
1634 return "sub";
1635 case BinaryOp::Mul:
1636 return "mul";
1637 case BinaryOp::Div:
1638 return "div";
1639 case BinaryOp::Mod:
1640 return "mod";
1641 case BinaryOp::Equal:
1642 return "eq";
1643 case BinaryOp::Less:
1644 return "gt";
1645 case BinaryOp::LessEqual:
1646 return "gteq";
1647 case BinaryOp::Greater:
1648 return "lt";
1649 case BinaryOp::GreaterEqual:
1650 return "lte";
1651 default:
1652 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001653 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001654 }
1655}
1656
1657enum class OperandType : int32_t
1658{
1659 Unknown = 0x00000000,
1660 ScalarFp32 = 0x00001011, // Immediate scalar tile
1661 ScalarFp16 = 0x00001012, // Immediate scalar tile
1662 ScalarInt32 = 0x00001021, // Immediate scalar tile
1663 ScalarInt16 = 0x00001022, // Immediate scalar tile
1664 ScalarInt8 = 0x00001024, // Immediate scalar tile
1665 ScalarUInt32 = 0x00001031, // Immediate scalar tile
1666 ScalarUInt16 = 0x00001032, // Immediate scalar tile
1667 ScalarUInt8 = 0x00001034, // Immediate scalar tile
1668 ScalarBool = 0x00001041, // Immediate scalar tile
1669 ScalarTile = 0x00001050, // Scalar from a tile
1670 Tile = 0x00010000, // Tile
1671 TensorStride1 = 0x00100001, // Tensor component
1672 TensorStride2 = 0x00100002, // Tensor component
1673 TensorStride3 = 0x00100003, // Tensor component
1674 TensorStride4 = 0x00100004, // Tensor component
1675 TensorDim0 = 0x00100010, // Tensor component
1676 TensorDim1 = 0x00100020, // Tensor component
1677 TensorDim2 = 0x00100030, // Tensor component
1678 TensorDim3 = 0x00100040, // Tensor component
1679 TensorDim4 = 0x00100050, // Tensor component
1680 TensorC = 0x00100010, // Tensor component
1681 TensorW = 0x00100020, // Tensor component
1682 TensorH = 0x00100030, // Tensor component
1683 TensorD = 0x00100040, // Tensor component
1684 TensorN = 0x00100050, // Tensor component
1685 TensorDim1xDim2 = 0x00100100, // Tensor component
1686 TensorDim1xDim2xDim3 = 0x00100200, // Tensor component
1687 TensorWxH = 0x00100300, // Tensor component
1688 TensorWxHxD = 0x00100400, // Tensor component
1689 TensorDataOffset = 0x00100500, // Tensor component
1690};
1691
1692struct ScalarTileCoord
1693{
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001694 ScalarTileCoord()
1695 {
1696 }
1697
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001698 ScalarTileCoord(int32_t x0, int32_t y0) : x(x0), y(y0)
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001699 {
1700 }
1701
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001702 int32_t x{-1};
1703 int32_t y{-1};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001704};
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001705
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001706/**
1707 * @brief Operand class. This object is used to pass the operands to the operations performed by the writer.
1708 * Operand can be of three types:
1709 * -# Scalar immediate: constant expression
1710 * -# Tile: A tile
1711 * -# Tensor component: A component (scalar) of a tensor
1712 *
1713 */
1714class Operand
1715{
1716public:
1717 Operand(const std::string &val)
1718 {
1719 _str = val;
1720 _type = OperandType::Tile;
1721 }
1722
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001723 Operand(const std::string &val, const ScalarTileCoord &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001724 {
1725 _str = val;
1726 _type = OperandType::ScalarTile;
1727 _coord = coord;
1728 }
1729
1730 Operand(const std::string &val, OperandType type)
1731 {
1732 _str = val;
1733 _type = type;
1734 }
1735
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001736 Operand(const Operand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001737 {
1738 _str = t.value();
1739 _type = t.type();
1740 }
1741
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001742 Operand &operator=(const Operand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001743 {
1744 _str = t.value();
1745 _type = t.type();
1746 _coord = t.scalar_tile_coordinate();
1747 return *this;
1748 }
1749
1750 std::string value() const
1751 {
1752 return _str;
1753 }
1754
1755 OperandType type() const
1756 {
1757 return _type;
1758 }
1759
1760 ScalarTileCoord scalar_tile_coordinate() const
1761 {
1762 return _coord;
1763 }
1764
1765private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001766 std::string _str{};
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001767 OperandType _type{OperandType::Unknown};
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001768 ScalarTileCoord _coord{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001769};
1770
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01001771using GpuSamplerTensorStorage = GpuTensorStorage;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001772
1773struct GpuSampler
1774{
1775 GpuSampler() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001776
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001777 TensorSamplerFormat format{TensorSamplerFormat::Unknown};
1778 GpuSamplerTensorStorage storage{GpuSamplerTensorStorage::Unknown};
1779 TensorSamplerAddressModeX address_mode_x{TensorSamplerAddressModeX::Unknown};
1780 TensorSamplerAddressModeY address_mode_y{TensorSamplerAddressModeY::Unknown};
1781 TensorSamplerAddressModeZ address_mode_z{TensorSamplerAddressModeZ::Unknown};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001782};
1783
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001784inline GpuSampler create_simple_sampler(
1785 const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, int32_t step_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001786{
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01001787 CKW_UNUSED(step_x, step_y, step_z);
1788
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001789 auto tensor = tensor_info_id->shape;
1790
1791 GpuSampler dst_sampler;
1792 dst_sampler.format = sampler.format;
1793 dst_sampler.storage = GpuSamplerTensorStorage::BufferUint8Ptr;
1794 dst_sampler.address_mode_x = sampler.address_mode_x;
1795 dst_sampler.address_mode_y = sampler.address_mode_y;
1796 dst_sampler.address_mode_z = sampler.address_mode_z;
1797
1798 int32_t dim_x = 0;
1799 int32_t dim_y = 0;
1800 int32_t dim_z = 0;
1801
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001802 switch (sampler.format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001803 {
1804 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001805 dim_x = tensor[0];
1806 dim_y = tensor[1];
1807 dim_z = tensor[2];
1808 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001809 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001810 dim_x = tensor[0];
1811 dim_y = tensor[1] * tensor[2];
1812 dim_z = 1;
1813 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001814 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001815 std::cout << "Unsupported tensor format" << std::endl;
1816 assert(false);
1817 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001818 }
1819
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001820 if (dim_x == 1)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001821 {
1822 assert(step_x == 1);
1823 dst_sampler.address_mode_x = TensorSamplerAddressModeX::None;
1824 }
1825
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001826 if (dim_y == 1)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001827 {
1828 assert(step_y == 1);
1829 dst_sampler.address_mode_y = TensorSamplerAddressModeY::None;
1830 }
1831
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001832 if (dim_z == 1)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001833 {
1834 assert(step_z == 1);
1835 dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None;
1836 }
1837
1838 return dst_sampler;
1839}
1840
1841class GpuOutputSampler
1842{
1843public:
1844 GpuOutputSampler() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001845
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001846 /**
1847 * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once
1848 * by the root component. Once initialized, all simpler components will need to used this sampler
1849 * or a broadcasted version of it
1850 *
1851 * @param[in] sampler GpuSampler
1852 * @param[in] step_x Increment step in the X direction. Not necessarily it is the same of n0 of tile!
1853 * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile!
1854 * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile!
1855 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001856 void initialize(const TensorInfo *tensor_info_id,
1857 GpuSamplerTensorStorage tensor_storage,
1858 TensorSamplerFormat tensor_format,
1859 int32_t step_x,
1860 int32_t step_y,
1861 int32_t step_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001862 {
1863 assert(_is_initialized == false);
1864
1865 _step_x = step_x;
1866 _step_y = step_y;
1867 _step_z = step_z;
1868 _tensor_info_id = tensor_info_id;
1869 _sampler = create_sampler(tensor_storage, tensor_format);
1870 _is_initialized = true;
1871 };
1872
1873 GpuSampler sampler() const
1874 {
1875 return _sampler;
1876 };
1877
1878 int32_t step_x() const
1879 {
1880 return _step_x;
1881 };
1882
1883 int32_t step_y() const
1884 {
1885 return _step_y;
1886 };
1887
1888 int32_t step_z() const
1889 {
1890 return _step_z;
1891 };
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001892
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001893private:
1894 GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format)
1895 {
1896 // Output can only be in output mode
1897 assert(tensor_storage != GpuSamplerTensorStorage::Image2dReadOnly);
1898 assert(tensor_storage != GpuSamplerTensorStorage::Image3dReadOnly);
1899
1900 auto tensor = _tensor_info_id->shape;
1901
1902 GpuSampler sampler;
1903 sampler.format = tensor_format;
1904 sampler.storage = tensor_storage;
1905 sampler.address_mode_x = TensorSamplerAddressModeX::None;
1906 sampler.address_mode_y = TensorSamplerAddressModeY::None;
1907 sampler.address_mode_z = TensorSamplerAddressModeZ::None;
1908
1909 // In the case of texture, we do not need any special checks at the border
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001910 if (tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001911 {
1912 int32_t dim_x = 0;
1913 int32_t dim_y = 0;
1914 int32_t dim_z = 0;
1915
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001916 switch (tensor_format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001917 {
1918 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001919 dim_x = tensor[0];
1920 dim_y = tensor[1];
1921 dim_z = tensor[2];
1922 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001923 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001924 dim_x = tensor[0];
1925 dim_y = tensor[1] * tensor[2];
1926 dim_z = 1;
1927 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001928 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001929 std::cout << "Unsupported tensor format" << std::endl;
1930 assert(false);
1931 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001932 }
1933
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001934 if ((dim_x % _step_x) != 0 && dim_x != 1)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001935 {
1936 sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin;
1937 }
1938
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001939 if ((dim_y % _step_y) != 0 && dim_y != 1)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001940 {
1941 sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly;
1942 }
1943
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001944 if ((dim_z % _step_z) != 0 && dim_z != 1)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001945 {
1946 sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly;
1947 }
1948 }
1949
1950 return sampler;
1951 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001952
1953 GpuSampler _sampler{}; // GpuSampler
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001954 int32_t _step_x{1};
1955 int32_t _step_y{1};
1956 int32_t _step_z{1};
1957 const TensorInfo *_tensor_info_id{nullptr};
1958 bool _is_initialized{false};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001959};
1960
1961/**
1962 * @brief Tensor operand class. This object is used to pass the operands as tensor to the operations performed by the writer.
1963 */
1964class TensorOperand
1965{
1966public:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01001967 TensorOperand(const std::string &val, GpuSampler sampler) : _str(val), _sampler(sampler)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001968 {
1969 }
1970
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001971 TensorOperand &operator=(const TensorOperand &t)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001972 {
1973 _str = t.value();
1974 _sampler = t.sampler();
1975 return *this;
1976 }
1977
1978 std::string value() const
1979 {
1980 return _str;
1981 }
1982
1983 GpuSampler sampler() const
1984 {
1985 return _sampler;
1986 }
1987
1988private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01001989 std::string _str{};
1990 GpuSampler _sampler{};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01001991};
1992
1993/**
1994 * @brief Data structure that contains all the necessary information to write the Gpu kernel with the Gpu kernel Writer
1995 * This data structure must be initialized before being passed to the Gpu Kernel Writer
1996 *
1997 */
1998class GpuKernelWriterDataHolder
1999{
2000public:
2001 /**
2002 * @brief Construct a new Gpu Kernel Data object. In this phase, we should also store
2003 * the GPU target and target specific capabilities (extensions). For now, we just initialize the
2004 * programming language
2005 *
2006 * @param[in] language Gpu programming language to use
2007 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002008 GpuKernelWriterDataHolder(GpuTargetLanguage language)
2009 : tiles(language), arguments(language), code(""), _language(language)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002010 {
2011 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002012
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002013 /**
2014 * @brief Get the Gpu programming language used
2015 *
2016 * @return GpuTargetLanguage the Gpu programming language
2017 */
2018 GpuTargetLanguage programming_language() const
2019 {
2020 return _language;
2021 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002022
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002023 /**
2024 * @brief @ref GpuTileRegistry
2025 *
2026 */
2027 GpuTileRegistry tiles{};
2028 /**
2029 * @brief @ref GpuTensorArgumentRegistry
2030 *
2031 */
2032 GpuTensorArgumentRegistry arguments{};
2033 /**
2034 * @brief @ref GpuOutputSampler.
2035 *
2036 */
2037 GpuOutputSampler output_sampler{};
2038 /**
2039 * @brief Source code
2040 *
2041 */
2042 std::string code{};
2043
2044 // GpuExtensionRegistry extensions{};
2045private:
2046 GpuTargetLanguage _language;
2047};
2048
2049struct LWS
2050{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002051 int32_t x{1};
2052 int32_t y{1};
2053 int32_t z{1};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002054};
2055
2056/**
2057 * @brief Utility class used to get the tile from the operand. If the operand is not a tile, @ref OperandUnpacker
2058 * declare an anonymous tile in the tile registry.
2059 */
2060class OperandUnpacker
2061{
2062public:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002063 OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments) : _tiles(tiles), _arguments(arguments)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002064 {
2065 // Increase the level of the stack to allocate possible temporary tiles
2066 _tiles.increment_registry_level();
2067 };
2068
2069 ~OperandUnpacker()
2070 {
2071 // Decrease the level of the stack to deallocate any temporary tiles
2072 _tiles.decrement_registry_level();
2073 }
2074
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002075 IVectorTile *unpack(const Operand &src)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002076 {
2077 // Get the tile
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002078 if (src.type() == OperandType::Tile)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002079 {
2080 assert(_tiles.has_tile(src.value()));
2081 return _tiles[src.value()];
2082 }
2083 // Create an anonymous tile with a constant
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002084 else if (static_cast<int32_t>(src.type()) & 0x00001000)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002085 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002086 if (src.type() == OperandType::ScalarTile)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002087 {
2088 ScalarTileCoord coord = src.scalar_tile_coordinate();
2089 assert(_tiles.has_tile(src.value()));
2090 assert(coord.x >= 0);
2091 assert(coord.y >= 0);
2092 auto val = _tiles[src.value()]->scalar(coord.x, coord.y);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002093 return _tiles.insert({{{val.str}}}, val.type.dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002094 }
2095 else
2096 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002097 return _tiles.insert({{{src.value()}}}, to_tile_data_type(src.type()));
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002098 }
2099 }
2100 // Create an anonymous tile with the tensor component
2101 else
2102 {
2103 assert(_arguments.has_tensor_argument(src.value()));
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002104 auto x = _arguments[src.value()];
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002105 const std::string val = x->component(to_tensor_component(src.type()));
2106 const DataType dt = x->component_data_type();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002107 return _tiles.insert({{{val}}}, dt);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002108 }
2109 }
2110
2111private:
2112 DataType to_tile_data_type(OperandType x)
2113 {
2114 return static_cast<DataType>(static_cast<int32_t>(x) & 0x00ff);
2115 }
2116
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002117 TensorComponentType to_tensor_component(OperandType x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002118 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002119 switch (x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002120 {
2121 case OperandType::TensorDim0:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002122 return TensorComponentType::Dim0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002123 case OperandType::TensorDim1:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002124 return TensorComponentType::Dim1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002125 case OperandType::TensorDim2:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002126 return TensorComponentType::Dim2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002127 case OperandType::TensorDim3:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002128 return TensorComponentType::Dim3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002129 case OperandType::TensorDim4:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002130 return TensorComponentType::Dim4;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002131 case OperandType::TensorStride1:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002132 return TensorComponentType::Stride1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002133 case OperandType::TensorStride2:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002134 return TensorComponentType::Stride2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002135 case OperandType::TensorStride3:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002136 return TensorComponentType::Stride3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002137 case OperandType::TensorStride4:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002138 return TensorComponentType::Stride4;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002139 case OperandType::TensorDim1xDim2:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002140 return TensorComponentType::Dim1xDim2;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002141 case OperandType::TensorDim1xDim2xDim3:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002142 return TensorComponentType::Dim1xDim2xDim3;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002143 case OperandType::TensorDataOffset:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002144 return TensorComponentType::OffsetFirstElement;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002145 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002146 assert(false);
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002147 return TensorComponentType::Unknown;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002148 }
2149 }
2150
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002151 GpuTileRegistry &_tiles;
2152 GpuTensorArgumentRegistry &_arguments;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002153};
2154
2155/**
2156 * @brief Utility class used to get the tensor argument from the operand. If the operand is not a tile, @ref OperandUnpacker
2157 * declare an anonymous tile in the tile registry.
2158 * Tensor dimension reduction aims for reducing the tensor data dimension while keeping data's tensor structure.
2159 */
2160class TensorOperandUnpacker
2161{
2162public:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002163 TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments) : _arguments(arguments){};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002164
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002165 IGpuTensorArgument *unpack(const TensorOperand &src)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002166 {
2167 assert(_arguments.has_tensor_argument(src.value()));
2168 return _arguments[src.value()];
2169 }
2170
2171private:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002172 GpuTensorArgumentRegistry &_arguments;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002173};
2174
2175/**
2176 * @brief The GpuKernel will be used in three occasions (stages):
2177 * #- Compilation stage
2178 * #- Tuning stage
2179 * #- Dispatch stage
2180 */
2181struct GpuKernel
2182{
2183 // Compilation stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002184 std::string code{}; // Source code, required for the compilation stage
2185 std::vector<GpuExtensions> list_extensions{}; // Extensions, required for the compilation stage
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002186 // Tuning stage
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002187 std::string config_id{}; // Unique id, required for the tuning stage
2188 std::vector<LWS> list_lws{}; // LWS to test, required for the tuning stage
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002189 // Dispatch stage
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002190 GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage
2191 std::vector<std::pair<int32_t, GpuTensorStorage>>
2192 list_tensor_storages; // List of tensor storages, required for the dispatch stage
2193 std::vector<std::pair<int32_t, TensorComponentType>>
2194 list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002195};
2196
SiCong Li16b37522023-07-18 17:56:49 +01002197// Generate all extension pragmas (hardcoded for now)
2198inline std::string generate_extensions()
2199{
2200 std::string ext = R"(
2201#if defined(cl_khr_fp16)
2202#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2203#endif // defined(cl_khr_fp16)
2204
2205#if defined(cl_arm_integer_dot_product_int8)
2206#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
2207#endif // defined(cl_arm_integer_dot_product_int8)
2208
2209#if defined(cl_arm_integer_dot_product_accumulate_int8)
2210#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
2211#endif // defined(cl_arm_integer_dot_product_accumulate_int8)
2212
2213#if defined(cl_arm_printf)
2214#pragma OPENCL EXTENSION cl_arm_printf : enable
2215#endif // defined(cl_arm_printf);
2216)";
2217 return ext;
2218}
2219
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002220// This function should produce an object with the source
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002221inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string &name)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002222{
2223 std::string code;
SiCong Li16b37522023-07-18 17:56:49 +01002224 code += generate_extensions();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002225 code += "__kernel void ";
2226 code += name;
2227 code += "(\n";
2228
2229 auto IdSpaces = in.arguments.IdSpace_declarations();
2230
2231 std::vector<std::string> arg_str;
2232
2233 auto tensor_args = in.arguments.tensor_argument_declarations();
2234
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002235 for (auto &i : tensor_args)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002236 {
2237 // For each tensor used, get the storage and tensor components
2238 auto storages = i->storage_declarations();
2239 auto components = i->component_declarations();
2240
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002241 for (auto &y : storages)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002242 {
2243 std::string str;
2244 str += i->storage_type_declaration(y);
2245 str += " ";
2246 str += i->storage(y);
2247 arg_str.push_back(str);
2248 }
2249
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002250 for (auto &y : components)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002251 {
2252 std::string str;
2253 str += i->component_type_declaration();
2254 str += " ";
2255 str += i->component(y);
2256 arg_str.push_back(str);
2257 }
2258 }
2259
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002260 for (size_t i = 0; i < arg_str.size(); ++i)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002261 {
2262 code += arg_str[i];
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002263 if (i + 1 < arg_str.size())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002264 {
2265 code += ",\n";
2266 }
2267 }
2268
2269 code += ")\n";
2270 code += "{\n";
2271 code += in.code;
2272 code += "}\n";
2273
2274 return code;
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002275}
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002276
2277/**
2278 * @brief This class is responsible to map a N-Tensor to a 3d tensor. The mapper needs the GpuSampler to know
2279 * how to reduce the dimensionality of a tensor
2280 *
2281 */
2282class GpuTensor3dMapper
2283{
2284public:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002285 GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler) : _sampler(sampler), _tensor(tensor){};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002286
2287 std::string tensor_component_x() const
2288 {
2289 const auto format = _sampler.format;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002290 switch (format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002291 {
2292 case TensorSamplerFormat::C_WH_1:
2293 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002294 return _tensor->component(TensorComponentType::Dim0);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002295 default:
2296 std::cout << "Unsupported tensor format" << std::endl;
2297 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002298 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002299 }
2300 }
2301
2302 std::string tensor_component_y() const
2303 {
2304 const auto format = _sampler.format;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002305 switch (format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002306 {
2307 case TensorSamplerFormat::C_WH_1:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002308 return _tensor->component(TensorComponentType::Dim1xDim2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002309 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002310 return _tensor->component(TensorComponentType::Dim1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002311 default:
2312 std::cout << "Unsupported tensor format" << std::endl;
2313 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002314 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002315 }
2316 }
2317
2318 std::string tensor_component_z() const
2319 {
2320 const auto format = _sampler.format;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002321 switch (format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002322 {
2323 case TensorSamplerFormat::C_WH_1:
2324 return "1";
2325 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002326 return _tensor->component(TensorComponentType::Dim2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002327 default:
2328 std::cout << "Unsupported tensor format" << std::endl;
2329 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002330 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002331 }
2332 }
2333
2334 std::string tensor_component_stride_y() const
2335 {
2336 const auto format = _sampler.format;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002337 switch (format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002338 {
2339 case TensorSamplerFormat::C_WH_1:
2340 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002341 return _tensor->component(TensorComponentType::Stride1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002342 default:
2343 std::cout << "Unsupported tensor format" << std::endl;
2344 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002345 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002346 }
2347 }
2348
2349 std::string tensor_component_stride_z() const
2350 {
2351 const auto format = _sampler.format;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002352 switch (format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002353 {
2354 case TensorSamplerFormat::C_WH_1:
2355 return "0";
2356 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002357 return _tensor->component(TensorComponentType::Stride2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002358 default:
2359 std::cout << "Unsupported tensor format" << std::endl;
2360 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002361 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002362 }
2363 }
2364
2365 std::string tensor_component_stride_batch() const
2366 {
2367 const auto format = _sampler.format;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002368 switch (format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002369 {
2370 case TensorSamplerFormat::C_WH_1:
2371 case TensorSamplerFormat::C_W_H:
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01002372 return _tensor->component(TensorComponentType::Stride3);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002373 default:
2374 std::cout << "Unsupported tensor format" << std::endl;
2375 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002376 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002377 }
2378 }
2379
2380 bool is_one_component_x() const
2381 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002382 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002383 const auto format = _sampler.format;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002384 switch (format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002385 {
2386 case TensorSamplerFormat::C_WH_1:
2387 case TensorSamplerFormat::C_W_H:
2388 return t.shape[0] == 1;
2389 default:
2390 std::cout << "Unsupported tensor format" << std::endl;
2391 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002392 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002393 }
2394 }
2395
2396 bool is_one_component_y() const
2397 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002398 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002399 const auto format = _sampler.format;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002400 switch (format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002401 {
2402 case TensorSamplerFormat::C_WH_1:
2403 return (t.shape[1] * t.shape[2]) == 1;
2404 case TensorSamplerFormat::C_W_H:
2405 return t.shape[1] == 1;
2406 default:
2407 std::cout << "Unsupported tensor format" << std::endl;
2408 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002409 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002410 }
2411 }
2412
2413 bool is_one_component_z() const
2414 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002415 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002416 const auto format = _sampler.format;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002417 switch (format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002418 {
2419 case TensorSamplerFormat::C_WH_1:
2420 return true;
2421 case TensorSamplerFormat::C_W_H:
2422 return t.shape[2] == 1;
2423 default:
2424 std::cout << "Unsupported tensor format" << std::endl;
2425 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002426 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002427 }
2428 }
2429
2430 bool is_one_component_batch() const
2431 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002432 auto t = _tensor->format();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002433 const auto format = _sampler.format;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002434 switch (format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002435 {
2436 case TensorSamplerFormat::C_WH_1:
2437 case TensorSamplerFormat::C_W_H:
2438 return t.shape[3] == 1;
2439 default:
2440 std::cout << "Unsupported tensor format" << std::endl;
2441 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01002442 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002443 }
2444 }
2445
2446 GpuSampler gpu_sampler() const
2447 {
2448 return _sampler;
2449 }
2450
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002451 IGpuTensorArgument *tensor_argument() const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002452 {
2453 return _tensor;
2454 }
2455
2456private:
2457 GpuSampler _sampler;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002458 IGpuTensorArgument *_tensor;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002459};
2460
2461struct GpuKernelWriterAttribute
2462{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002463 bool return_tensor_component_by_value{false};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002464};
2465
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002466enum class RoundingMode
2467{
2468 None,
2469 Rte,
2470 Rtz,
2471 Rtp,
2472 Rtn
2473};
2474
2475// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl05.html
2476class IGpuKernelWriter
2477{
2478public:
2479 virtual ~IGpuKernelWriter() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002480
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002481 virtual void set_IdSpace(int32_t id) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002482
2483 virtual void import_tile(const std::string &dst, const IVectorTile *src) = 0;
2484
2485 virtual void declare_argument(const std::string &name, const TensorInfo &tensor) = 0;
2486
2487 virtual void declare_tile(const std::string &name, const TileInfo &info) = 0;
2488
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002489 virtual void
2490 declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002491
2492 virtual void write_text(const std::string &x) = 0;
2493
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002494 virtual void compound_statement_begin() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002495
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002496 virtual void compound_statement_end() = 0;
2497
2498 // Operations
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002499 virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002500
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002501 virtual void
2502 op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002503
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002504 virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002505
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002506 virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002507
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002508 virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002509
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002510 virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002511
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002512 virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002513
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002514 virtual void
2515 op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002516
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002517 virtual void op_binary_elementwise_function(const Operand &dst_name,
2518 BinaryFunction func,
2519 const Operand &first_name,
2520 const Operand &second_name) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002521
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002522 virtual void op_ternary_elementwise_function(const Operand &dst_name,
2523 TernaryFunction func,
2524 const Operand &first_name,
2525 const Operand &second_name,
2526 const Operand &third_name) = 0;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002527
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002528 virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002529
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002530 virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002531
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002532 virtual void op_else_header() = 0;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002533
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002534 virtual void op_for_loop_header(const Operand &var_name,
2535 BinaryOp cond_op,
2536 const Operand &cond_value,
2537 const Operand &update_var,
2538 AssignmentOp update_op,
2539 const Operand &update_value) = 0;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01002540
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002541 virtual void op_load_indirect(const TensorOperand &tensor,
2542 const Operand &dst,
2543 const Operand &x,
2544 const Operand &y_indirect,
2545 const Operand &z,
2546 const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002547
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002548 virtual void op_load_immediate(const TensorOperand &tensor,
2549 const Operand &dst,
2550 const Operand &x,
2551 const Operand &y,
2552 const Operand &z,
2553 const Operand &b = Operand("0", OperandType::ScalarInt32),
2554 const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002555
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002556 virtual void op_store_immediate(const TensorOperand &tensor,
2557 const Operand &src,
2558 const Operand &x,
2559 const Operand &y,
2560 const Operand &z,
2561 const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002562
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002563 virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002564
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002565 virtual void op_return() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002566
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002567 // Utils
2568 // It is the process of converting
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002569 virtual void util_get_indirect_buffer(const Operand &dst,
2570 const TensorOperand &tensor,
2571 const Operand &x,
2572 const Operand &y,
2573 const Operand &x_off,
2574 const Operand &y_off) = 0;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002575};
2576
2577enum class GpuLoadStoreType
2578{
2579 Load = 1,
2580 Store = 2
2581};
2582
2583class IGpuLoadStoreHelperWriter
2584{
2585public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002586 IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type)
2587 : _writer(x), _mapper(mapper), _type(type)
2588 {
2589 }
2590
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002591 IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002592
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002593 IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002594
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002595 virtual ~IGpuLoadStoreHelperWriter() = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002596
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002597 virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002598
2599 virtual void write(const std::pair<int32_t, std::string> &y) = 0;
2600
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002601 virtual void finalize() = 0;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002602
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002603protected:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002604 IGpuKernelWriter *_writer;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002605 GpuTensor3dMapper _mapper;
2606 GpuLoadStoreType _type;
2607};
2608
2609class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter
2610{
2611public:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002612 ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
2613 : IGpuLoadStoreHelperWriter(x, mapper, type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002614 {
2615 }
2616
2617 ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002618
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002619 ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default;
2620
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002621 static bool validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002622 {
2623 CKW_UNUSED(x, type, dst);
2624
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002625 if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002626 {
2627 return false;
2628 }
2629 return true;
2630 }
2631
2632 void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
2633 {
2634 assert(validate(_writer, _mapper, _type, dst));
2635
2636 _dst = dst;
2637 _ls_width_full = dst->format().w;
2638
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002639 _coord_x = x->scalar(0, 0).str;
2640 _coord_z = z->scalar(0, 0).str;
2641 _coord_b = b->scalar(0, 0).str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002642 _coord_orig_z = _coord_z;
2643
2644 out_of_bound_initialize_x(_coord_x);
2645 out_of_bound_initialize_z(_coord_z);
2646
2647 /*
2648 meaning of else:
2649 - x: partial load/store
2650 - y: no load/store operation
2651 - z: no load/store operation
2652 if(x)
2653 {
2654 if(z)
2655 {
2656 if(y)
2657 {
2658 // full load/store width
2659 }
2660 else
2661 {
2662 // no load/store
2663 }
2664 }
2665 else
2666 {
2667 // no load/store
2668 }
2669 }
2670 else
2671 {
2672 if(z)
2673 {
2674 if(y)
2675 {
2676 // partial load/store width
2677 }
2678 else
2679 {
2680 // no load/store
2681 }
2682 }
2683 else
2684 {
2685 // no load/store
2686 }
2687 }
2688 */
2689 }
2690
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002691 void write(const std::pair<int32_t, std::string> &y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002692 {
2693 int32_t idx_y = y.first;
2694 std::string coord_y = y.second;
2695
2696 // The only check required is on Y.
2697 out_of_bound_initialize_y(coord_y);
2698
2699 const std::string dst = _dst->vector(idx_y).str;
2700 const std::string address = to_ls_buffer_address(_coord_x, coord_y, _coord_z, _coord_b);
2701 const std::string ls_buf = to_ls_buffer(_type, _ls_width_full, dst, address);
2702
2703 _writer->write_text(ls_buf);
2704 _writer->write_text(";\n");
2705
2706 out_of_bound_finalize_y(dst);
2707
2708 // The left over load/store will be written in the finalize stage
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002709 if (_ls_width_part.size() != 0)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002710 {
2711 int32_t w = 0;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002712 for (auto &p : _ls_width_part)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002713 {
2714 const std::string dst0 = _dst->vector(w, p, idx_y).str;
2715 const std::string coord_x = _coord_x + " + " + std::to_string(w);
2716 const std::string address = to_ls_buffer_address(coord_x, coord_y, _coord_z, _coord_b);
2717 const std::string ls_buf0 = to_ls_buffer(_type, p, dst0, address);
2718 _leftovers_x.push_back(std::make_pair(std::make_pair(dst0, coord_y), ls_buf0));
2719
2720 w += p;
2721 }
2722 }
2723 }
2724
2725 void finalize() override
2726 {
2727 out_of_bound_finalize_z();
2728 out_of_bound_finalize_x();
2729 }
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002730
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002731private:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002732 IVectorTile *_dst{nullptr};
2733 int32_t _ls_width_full{0};
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002734 std::vector<int32_t> _ls_width_part{};
2735 std::vector<std::pair<std::pair<std::string, std::string>, std::string>> _leftovers_x{};
2736 std::string _coord_x{};
2737 std::string _coord_z{};
2738 std::string _coord_orig_z{};
2739 std::string _coord_b{};
2740
2741 void out_of_bound_initialize_x(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002742 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002743 if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002744 {
2745 auto tensor_format = _mapper.tensor_argument()->format();
2746 auto shape = tensor_format.shape;
2747
2748 _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002749 if (_ls_width_part.size() != 0)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002750 {
2751 _writer->write_text("if(" + coord + " > 0)\n");
2752 _writer->compound_statement_begin();
2753 }
2754 }
2755 };
2756
2757 void out_of_bound_finalize_x()
2758 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002759 if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002760 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002761 if (_ls_width_part.size() != 0)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002762 {
2763 _writer->compound_statement_end();
2764 _writer->write_text("else\n");
2765 _writer->compound_statement_begin();
2766
2767 out_of_bound_initialize_z(_coord_orig_z);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002768 for (auto &i : _leftovers_x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002769 {
2770 out_of_bound_initialize_y(i.first.second);
2771 _writer->write_text(i.second);
2772 _writer->write_text(";\n");
2773 out_of_bound_finalize_y(i.first.first);
2774 }
2775 out_of_bound_finalize_z();
2776 _writer->compound_statement_end();
2777 }
2778 }
2779 };
2780
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002781 void out_of_bound_initialize_y(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002782 {
2783 std::string max = "";
2784
2785 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
2786
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002787 switch (address_mode_y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002788 {
2789 case TensorSamplerAddressModeY::Skip:
2790 case TensorSamplerAddressModeY::ClampToBorder:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002791 // NOTE: This line should not be moved outside of the switch statement.
2792 // The reason for that is because when we query the component, the component is marked as used
2793 // and added to the list of arguments of the kernel. Since, not in all cases this component is required,
2794 // we should request the component only when used
2795 max = _mapper.tensor_component_y();
2796 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
2797 _writer->compound_statement_begin();
2798 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002799 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
2800 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002801 _writer->write_text("if(" + coord + " >= 0)\n");
2802 _writer->compound_statement_begin();
2803 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002804 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
2805 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002806 max = _mapper.tensor_component_y();
2807 _writer->write_text("if(" + coord + " < " + max + ")\n");
2808 _writer->compound_statement_begin();
2809 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002810 case TensorSamplerAddressModeY::ClampToNearest:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002811 max = _mapper.tensor_component_y();
2812 coord = "clamp(" + coord + ", 0, " + max + " - 1)";
2813 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002814 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002815 max = _mapper.tensor_component_y();
2816 coord = "min(" + coord + ", " + max + " - 1)";
2817 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002818 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002819 coord = "max(" + coord + ", 0)";
2820 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002821 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002822 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002823 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002824 std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
2825 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002826 }
2827 };
2828
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002829 void out_of_bound_finalize_y(const std::string &dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002830 {
2831 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
2832
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002833 switch (address_mode_y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002834 {
2835 case TensorSamplerAddressModeY::ClampToBorder:
2836 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
2837 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
2838 case TensorSamplerAddressModeY::Skip:
2839 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
2840 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002841 _writer->compound_statement_end();
2842 break;
SiCong Li16b37522023-07-18 17:56:49 +01002843 case TensorSamplerAddressModeY::None:
2844 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002845
2846 default:
2847 assert(false);
2848 }
2849
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002850 switch (address_mode_y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002851 {
2852 case TensorSamplerAddressModeY::ClampToBorder:
2853 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
2854 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002855 _writer->write_text("else\n");
2856 _writer->compound_statement_begin();
2857 _writer->write_text(dst);
2858 _writer->write_text(" = 0.0f;\n");
2859 _writer->compound_statement_end();
2860 break;
SiCong Li16b37522023-07-18 17:56:49 +01002861 case TensorSamplerAddressModeY::None:
2862 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002863
2864 default:
2865 assert(false);
2866 }
2867 };
2868
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002869 void out_of_bound_initialize_z(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002870 {
2871 std::string max = "";
2872
2873 const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
2874
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002875 switch (address_mode_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002876 {
2877 case TensorSamplerAddressModeZ::Skip:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002878 max = _mapper.tensor_component_z();
2879 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
2880 _writer->compound_statement_begin();
2881 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002882 case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002883 _writer->write_text("if(" + coord + " >= 0)\n");
2884 _writer->compound_statement_begin();
2885 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002886 case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002887 max = _mapper.tensor_component_z();
2888 _writer->write_text("if(" + coord + " < " + max + ")\n");
2889 _writer->compound_statement_begin();
2890 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002891 case TensorSamplerAddressModeZ::ClampToNearest:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002892 max = _mapper.tensor_component_z();
2893 coord = "clamp(" + coord + ", 0, " + max + " - 1)";
2894 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002895 case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002896 max = _mapper.tensor_component_z();
2897 coord = "min(" + coord + ", " + max + " - 1)";
2898 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002899 case TensorSamplerAddressModeZ::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002900 coord = "max(" + coord + ", 0)";
2901 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002902 case TensorSamplerAddressModeZ::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002903 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002904 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002905 std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
2906 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002907 }
2908 };
2909
2910 void out_of_bound_finalize_z()
2911 {
2912 const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
2913
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002914 switch (address_mode_z)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002915 {
2916 case TensorSamplerAddressModeZ::Skip:
2917 case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
2918 case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01002919 _writer->compound_statement_end();
2920 break;
SiCong Li16b37522023-07-18 17:56:49 +01002921 case TensorSamplerAddressModeZ::None:
2922 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002923
2924 default:
2925 assert(false);
2926 }
2927 };
2928
2929 std::vector<int32_t> decompose_leftover_ls_vector_width(int32_t ls_leftover_vector_width) const
2930 {
2931 std::vector<int32_t> x;
2932
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002933 switch (ls_leftover_vector_width)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002934 {
2935 case 0:
2936 break;
2937 case 1:
2938 case 2:
2939 case 3:
2940 case 4:
2941 case 8:
2942 case 16:
2943 x.push_back(ls_leftover_vector_width);
2944 break;
2945 case 5:
2946 x.push_back(4);
2947 x.push_back(1);
2948 break;
2949 case 6:
2950 x.push_back(4);
2951 x.push_back(2);
2952 break;
2953 case 7:
2954 x.push_back(4);
2955 x.push_back(3);
2956 break;
2957 case 9:
2958 x.push_back(8);
2959 x.push_back(1);
2960 break;
2961 case 10:
2962 x.push_back(8);
2963 x.push_back(2);
2964 break;
2965 case 11:
2966 x.push_back(8);
2967 x.push_back(3);
2968 break;
2969 case 12:
2970 x.push_back(8);
2971 x.push_back(4);
2972 break;
2973 case 13:
2974 x.push_back(8);
2975 x.push_back(4);
2976 x.push_back(1);
2977 break;
2978 case 14:
2979 x.push_back(8);
2980 x.push_back(4);
2981 x.push_back(2);
2982 break;
2983 case 15:
2984 x.push_back(8);
2985 x.push_back(4);
2986 x.push_back(3);
2987 break;
2988
2989 default:
2990 assert(false);
2991 }
2992 return x;
2993 }
2994
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002995 std::string
2996 to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data, const std::string &address)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002997 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01002998 switch (type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01002999 {
3000 case GpuLoadStoreType::Load:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003001 if (vector_width != 1)
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003002 {
3003 return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
3004 }
3005 else
3006 {
3007 return data + " = *(" + address + ")";
3008 }
3009 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003010 case GpuLoadStoreType::Store:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003011 if (vector_width != 1)
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003012 {
3013 return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
3014 }
3015 else
3016 {
3017 return "*(" + address + ") = " + data;
3018 }
3019 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003020 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003021 std::cout << "Unsupported GpuLoadStoreType" << std::endl;
3022 assert(false);
3023 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003024 }
3025 }
3026
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003027 std::string
3028 to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003029 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003030 auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003031 assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003032 const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage);
3033 const std::string dst_type = get_cl_data_type(_dst->format().dt, 1);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003034
3035 std::string address;
3036 address += "(__global ";
3037 address += dst_type;
3038 address += "*)(";
3039 address += ptr_buf;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003040 if (x != "0" && (_mapper.is_one_component_x() != true))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003041 {
3042 address += " + (";
3043 address += x + ") * sizeof(" + dst_type + ")";
3044 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003045 if (y != "0")
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003046 {
3047 const std::string stride_y = _mapper.tensor_component_stride_y();
3048 address += " + (";
3049 address += y + ")";
3050 address += " * ";
3051 address += stride_y;
3052 }
Adnan AlSinanfde45d82023-10-24 12:03:21 +01003053 if (z != "0")
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003054 {
3055 const std::string stride_z = _mapper.tensor_component_stride_z();
3056 address += " + (";
3057 address += z + ")";
3058 address += " * ";
3059 address += stride_z;
3060 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003061 if (b != "0" && (_mapper.is_one_component_batch() != true))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003062 {
3063 const std::string stride_b = _mapper.tensor_component_stride_batch();
3064 address += " + (";
3065 address += b + ")";
3066 address += " * ";
3067 address += stride_b;
3068 }
3069 address += ")";
3070 return address;
3071 }
3072};
3073
3074class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter
3075{
3076public:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003077 static bool validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003078 {
3079 CKW_UNUSED(x);
3080
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003081 if (dst->format().w != 4)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003082 {
3083 return false;
3084 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003085 if (mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003086 {
3087 return false;
3088 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003089 if (mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003090 {
3091 return false;
3092 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003093 if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003094 {
3095 return false;
3096 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003097 if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly &&
3098 type == GpuLoadStoreType::Store)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003099 {
3100 return false;
3101 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003102 if ((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003103 {
3104 return false;
3105 }
3106 return true;
3107 /*
3108 - x: Only GpuSamplerAddressModeX::None is supported and vector length = 4
3109 - z: Only GpuSamplerAddressModeZ::None is supported
3110 */
3111 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003112
3113 ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
3114 : IGpuLoadStoreHelperWriter(x, mapper, type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003115 {
3116 }
3117
3118 ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003119
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003120 ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default;
3121
3122 void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
3123 {
3124 assert(validate(_writer, _mapper, _type, dst));
3125
3126 _dst = dst;
3127 _ls_width_full = dst->format().w;
3128 _coord_x = x->scalar(0, 0).str;
3129 _coord_z = z->scalar(0, 0).str;
3130 _coord_b = b->scalar(0, 0).str;
3131
3132 /*
3133 if(y)
3134 {
3135 // full load/store width
3136 }
3137 else
3138 {
3139 // no load/store
3140 }
3141 */
3142 }
3143
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003144 void write(const std::pair<int32_t, std::string> &y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003145 {
3146 int32_t idx_y = y.first;
3147 std::string coord_y = y.second;
3148
3149 // The only check required is on Y.
3150 out_of_bound_initialize_y(coord_y);
3151
3152 const std::string dst = _dst->vector(idx_y).str;
3153 const std::string sampler = to_ls_image2d_sampler();
3154 const std::string coord = to_ls_image2d_coord(_coord_x, coord_y, _coord_z, _coord_b);
3155 const std::string ls_buf = to_ls_image2d(_type, _ls_width_full, dst, sampler, coord);
3156
3157 _writer->write_text(ls_buf);
3158 _writer->write_text(";\n");
3159
3160 out_of_bound_finalize_y(dst);
3161 }
3162
3163 void finalize() override
3164 {
3165 }
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003166
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003167private:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003168 IVectorTile *_dst{nullptr};
3169 int32_t _ls_width_full{0};
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003170 std::string _coord_x{};
3171 std::string _coord_z{};
3172 std::string _coord_b{};
3173
3174 void out_of_bound_initialize_y(std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003175 {
3176 std::string max = "";
3177
3178 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3179
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003180 switch (address_mode_y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003181 {
3182 case TensorSamplerAddressModeY::Skip:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003183 max = _mapper.tensor_component_y();
3184 _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
3185 _writer->compound_statement_begin();
3186 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003187 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003188 _writer->write_text("if(" + coord + " >= 0)\n");
3189 _writer->compound_statement_begin();
3190 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003191 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003192 max = _mapper.tensor_component_y();
3193 _writer->write_text("if(" + coord + " < " + max + ")\n");
3194 _writer->compound_statement_begin();
3195 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003196 case TensorSamplerAddressModeY::ClampToBorder:
3197 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
3198 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
3199 case TensorSamplerAddressModeY::ClampToNearest:
3200 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
3201 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
3202 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003203 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003204 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003205 std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl;
3206 assert(false);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003207 }
3208 };
3209
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003210 void out_of_bound_finalize_y(const std::string &dst)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003211 {
3212 CKW_UNUSED(dst);
3213
3214 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3215
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003216 switch (address_mode_y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003217 {
3218 case TensorSamplerAddressModeY::Skip:
3219 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
3220 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003221 _writer->compound_statement_end();
3222 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003223
3224 default:
3225 assert(false);
3226 }
3227 };
3228
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003229 std::string to_ls_image2d(GpuLoadStoreType type,
3230 int32_t vector_width,
3231 const std::string &data,
3232 const std::string &sampler,
3233 const std::string &coord)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003234 {
3235 CKW_UNUSED(vector_width);
3236
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003237 auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
3238 const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003239 const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003240
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003241 switch (type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003242 {
3243 case GpuLoadStoreType::Load:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003244 return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")";
3245 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003246 case GpuLoadStoreType::Store:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003247 return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003248 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003249 assert(false);
3250 std::cout << "Unsupported GpuLoadStoreType" << std::endl;
3251 assert(false);
3252 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003253 }
3254 }
3255
3256 std::string to_ls_image2d_sampler() const
3257 {
3258 const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
3259
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003260 switch (address_mode_y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003261 {
3262 case TensorSamplerAddressModeY::None:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003263 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003264 case TensorSamplerAddressModeY::Skip:
3265 case TensorSamplerAddressModeY::SkipMinEdgeOnly:
3266 case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
3267 case TensorSamplerAddressModeY::ClampToBorder:
3268 case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
3269 case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003270 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003271 case TensorSamplerAddressModeY::ClampToNearest:
3272 case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
3273 case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003274 return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003275 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003276 std::cout << "Unsupported address_mode_coord" << std::endl;
3277 assert(false);
3278 return "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003279 }
3280 }
3281
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003282 std::string
3283 to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003284 {
3285 std::string coord_x = "(" + x + ") >> 2";
3286 std::string coord_y = "(";
3287
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003288 if (y != "0")
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003289 {
3290 coord_y += y;
3291 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003292 if (z != "0" && (_mapper.is_one_component_z() != true))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003293 {
3294 const std::string dim = _mapper.tensor_component_y();
3295 coord_y += " + (";
3296 coord_y += z + ")";
3297 coord_y += " * ";
3298 coord_y += dim;
3299 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003300 if (b != "0" && (_mapper.is_one_component_batch() != true))
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003301 {
3302 const std::string dim0 = _mapper.tensor_component_y();
3303 const std::string dim1 = _mapper.tensor_component_z();
3304 coord_y += " + (";
3305 coord_y += b + ")";
3306 coord_y += " * ";
3307 coord_y += dim0;
3308 coord_y += " * ";
3309 coord_y += dim1;
3310 }
3311 coord_y += ")";
3312 return "(int2)(" + coord_x + ", " + coord_y + ")";
3313 }
3314};
3315
3316/** IGpuLoadStoreHelperWriter factory class */
3317class ClLoadStoreHelperWriterFactory final
3318{
3319public:
3320 /** Static method to call the IGpuLoadStoreHelperWriter class accordingly with the tensor storage set in the mapper
3321 *
3322 *
3323 * @return IGpuLoadStoreHelperWriter
3324 */
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003325 static std::unique_ptr<IGpuLoadStoreHelperWriter>
3326 create(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003327 {
3328 const auto tensor_storage = mapper.gpu_sampler().storage;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003329 switch (tensor_storage)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003330 {
3331 case GpuSamplerTensorStorage::BufferUint8Ptr:
3332 return std::make_unique<ClLoadStoreBufferHelperWriter>(x, mapper, type);
3333 case GpuSamplerTensorStorage::Image2dReadOnly:
3334 case GpuSamplerTensorStorage::Image2dWriteOnly:
3335 return std::make_unique<ClLoadStoreImage2dHelperWriter>(x, mapper, type);
3336 default:
3337 std::cout << "Unsupported Gpu tensor storage" << std::endl;
3338 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01003339 return nullptr;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003340 }
3341 }
3342};
3343
3344// This utility method needs to go in utils.h
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003345inline bool is_tile_scalar(const IVectorTile *x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003346{
3347 return x->format().w == 1 && x->format().h == 1;
3348}
3349
3350class ClKernelWriter : public IGpuKernelWriter
3351{
3352public:
3353 ClKernelWriter(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
3354 {
3355 _data = x;
3356 _attr = attr;
3357 }
3358
3359 ClKernelWriter(const ClKernelWriter &) = default;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003360
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003361 ClKernelWriter &operator=(const ClKernelWriter &) = default;
3362
3363 // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure
3364 // there are no conflicts or ambiguity in the code
3365 void set_IdSpace(int32_t id) override
3366 {
3367 _data->tiles.set_IdSpace(id);
3368 _data->arguments.set_IdSpace(id);
3369 }
3370
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003371 void import_tile(const std::string &dst_name, const IVectorTile *src) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003372 {
3373 _data->tiles.insert(dst_name, src);
3374 }
3375
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003376 void declare_argument(const std::string &name, const TensorInfo &tensor) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003377 {
3378 assert(_data->arguments[name] == nullptr);
3379 _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value);
3380 }
3381
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003382 void declare_tile(const std::string &name, const TileInfo &format) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003383 {
3384 assert(_data->tiles[name] == nullptr);
3385 _data->tiles.insert(name, format);
3386
3387 IVectorTile *x = _data->tiles[name];
3388
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003389 for (auto &t : x->underlying_source_variables())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003390 {
3391 _data->code += t.type.str + " " + t.str + ";\n";
3392 }
3393 }
3394
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003395 void
3396 declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003397 {
3398 assert(_data->tiles[name] == nullptr);
3399 _data->tiles.insert(name, in, dt);
3400 // Note: A constant does not need to be declared in the code
3401 }
3402
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003403 void write_text(const std::string &x) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003404 {
3405 _data->code += x;
3406 }
3407
3408 void compound_statement_begin() override
3409 {
3410 _data->tiles.increment_registry_level();
3411 _data->code += "{\n";
3412 }
3413
3414 void compound_statement_end() override
3415 {
3416 _data->tiles.decrement_registry_level();
3417 _data->code += "}\n";
3418 }
3419
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003420 void op_get_global_id(const Operand &dst_var, int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003421 {
3422 assert(dst_var.type() == OperandType::Tile);
3423 assert(_data->tiles.has_tile(dst_var.value()));
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003424 assert(_data->tiles[dst_var.value()]->format().w == 1 &&
3425 _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003426
3427 auto var = _data->tiles[dst_var.value()];
3428
3429 _data->code += var->scalar(0, 0).str;
3430 _data->code += " = get_global_id(";
3431 _data->code += std::to_string(dim);
3432 _data->code += ");\n";
3433 };
3434
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003435 void op_get_global_coord(const Operand &o_dst,
3436 const Operand &o_step,
3437 const TensorOperand &o_tensor,
3438 int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003439 {
3440 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003441 auto dst = operands.unpack(o_dst);
3442 auto step = operands.unpack(o_step);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003443
3444 // Validation: Check that x, y and z are scalar
3445
3446 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003447 auto tensor = tensor_operands.unpack(o_tensor);
3448 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003449
3450 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3451
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003452 switch (dim)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003453 {
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003454 case 0:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003455 if (mapper.is_one_component_x())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003456 {
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003457 _data->code += dst->scalar(0, 0).str;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003458 _data->code += " = 0;\n";
3459 }
3460 else
3461 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003462 if (mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003463 {
3464 // Validation: Check: fixed tensor shape
3465 // TO BE CHANGED
3466 _data->code += dst->scalar(0, 0).str;
3467 _data->code += " = get_global_id(0) * ";
3468 _data->code += step->scalar(0, 0).str;
3469 _data->code += ";\n";
3470 }
3471 else
3472 {
3473 _data->code += dst->scalar(0, 0).str;
3474 _data->code += " = get_global_id(0) * ";
3475 _data->code += step->scalar(0, 0).str;
3476 _data->code += ";\n";
3477 }
3478 }
3479 break;
3480 case 1:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003481 if (mapper.is_one_component_y())
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003482 {
3483 _data->code += dst->scalar(0, 0).str;
3484 _data->code += " = 0;\n";
3485 }
3486 else
3487 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003488 if (mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin)
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003489 {
3490 }
3491 else
3492 {
3493 _data->code += dst->scalar(0, 0).str;
3494 _data->code += " = get_global_id(1) * ";
3495 _data->code += step->scalar(0, 0).str;
3496 _data->code += ";\n";
3497 }
3498 }
3499 break;
3500 case 2:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003501 if (mapper.is_one_component_z())
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003502 {
3503 _data->code += dst->scalar(0, 0).str;
3504 _data->code += " = 0;\n";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003505 }
3506 else
3507 {
3508 _data->code += dst->scalar(0, 0).str;
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003509 _data->code += " = get_global_id(2) * ";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003510 _data->code += step->scalar(0, 0).str;
3511 _data->code += ";\n";
3512 }
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003513 break;
3514 default:
3515 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003516 }
3517 };
3518
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003519 void op_get_global_batch(const Operand &o_dst, const TensorOperand &o_tensor) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003520 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003521 OperandUnpacker operands(_data->tiles, _data->arguments);
3522 const IVectorTile *dst = operands.unpack(o_dst);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003523
3524 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003525 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003526 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003527
3528 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3529
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003530 if (mapper.is_one_component_batch())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003531 {
3532 _data->code += dst->scalar(0, 0).str;
3533 _data->code += " = 0;\n";
3534 }
3535 else
3536 {
3537 std::cout << "Unsupported batched computation" << std::endl;
3538 assert(false);
3539 }
3540 };
3541
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003542 void op_get_global_size(const Operand &dst_var, int32_t dim) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003543 {
3544 assert(dst_var.type() == OperandType::Tile);
3545 assert(_data->tiles.has_tile(dst_var.value()));
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003546 assert(_data->tiles[dst_var.value()]->format().w == 1 &&
3547 _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003548
3549 auto var = _data->tiles[dst_var.value()];
3550
3551 _data->code += var->scalar(0, 0).str;
3552 _data->code += " = get_global_size(";
3553 _data->code += std::to_string(dim);
3554 _data->code += ");\n";
3555 }
3556
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003557 void op_unary_expression(const Operand &dst_name, UnaryOp op, const Operand &src_name) override
3558 {
3559 OperandUnpacker operands(_data->tiles, _data->arguments);
3560 const IVectorTile *src = operands.unpack(src_name);
3561 const IVectorTile *dst = operands.unpack(dst_name);
3562
3563 const int32_t dst_w = dst->format().w;
3564 const int32_t dst_h = dst->format().h;
3565 const int32_t src_w = src->format().w;
3566 const std::string dt = dst->underlying_source_variables()[0].type.str;
3567
3568 const bool broadcast_src_x = dst_w != 1 && src_w == 1;
3569
3570 const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
3571
3572 // Broadcasting on Y is automatic
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003573 for (int32_t y = 0; y < dst_h; ++y)
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003574 {
3575 _data->code += dst->vector(y).str;
3576 _data->code += " = ";
3577 _data->code += to_string(op);
3578 _data->code += src_prefix + src->vector(y).str;
3579 _data->code += ";\n";
3580 }
3581 }
3582
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003583 void op_binary_expression(const Operand &dst_name,
3584 const Operand &lhs_name,
3585 BinaryOp op,
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003586 const Operand &rhs_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003587 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003588 OperandUnpacker operands(_data->tiles, _data->arguments);
3589 const IVectorTile *lhs = operands.unpack(lhs_name);
3590 const IVectorTile *rhs = operands.unpack(rhs_name);
3591 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003592
3593 const int32_t dst_w = dst->format().w;
3594 const int32_t dst_h = dst->format().h;
3595 assert(lhs != nullptr);
3596 const int32_t lhs_w = lhs->format().w;
3597 const int32_t rhs_w = rhs->format().w;
3598
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003599 if (op == BinaryOp::MatMul_Nt_T)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003600 {
3601 assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16));
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003602 for (int32_t y = 0; y < dst_h; ++y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003603 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003604 for (int32_t x = 0; x < dst_w; ++x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003605 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003606 for (int32_t k = 0; k < lhs_w; ++k)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003607 {
3608 _data->code += dst->scalar(x, y).str;
3609 _data->code += " = fma(";
3610 _data->code += lhs->scalar(k, y).str;
3611 _data->code += ", ";
3612 _data->code += rhs->scalar(k, x).str;
3613 _data->code += ", ";
3614 _data->code += dst->scalar(x, y).str;
3615 _data->code += ");\n";
3616 }
3617 }
3618 }
3619
3620 return;
3621 }
3622
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003623 const bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1;
3624 const bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003625
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003626 const std::string lhs_prefix =
3627 broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
3628 const std::string rhs_prefix =
3629 broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
3630 const std::string op_str = to_string(op);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003631
3632 // Broadcasting on Y is automatic
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003633 for (int32_t y = 0; y < dst_h; ++y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003634 {
3635 _data->code += dst->vector(y).str;
3636 _data->code += " = ";
3637 _data->code += lhs_prefix + lhs->vector(y).str;
3638 _data->code += " ";
3639 _data->code += op_str;
3640 _data->code += " ";
3641 _data->code += rhs_prefix + rhs->vector(y).str;
3642 _data->code += ";\n";
3643 }
3644 };
3645
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003646 void op_cast_expression(const Operand &o_dst, const Operand &o_src, ConvertPolicy policy) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003647 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003648 OperandUnpacker operands(_data->tiles, _data->arguments);
3649 const IVectorTile *src = operands.unpack(o_src);
3650 const IVectorTile *dst = operands.unpack(o_dst);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003651 // const int32_t dst_w = dst->format().w;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003652 const int32_t dst_h = dst->format().h;
3653 const std::string dt = dst->underlying_source_variables()[0].type.str;
3654 const bool is_float = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16);
3655 const std::string sat = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : "");
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003656
3657 // Broadcasting on Y is automatic
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003658 for (int32_t y = 0; y < dst_h; ++y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003659 {
3660 _data->code += dst->vector(y).str;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003661 _data->code += " = convert_" + dt + sat + "(";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003662 _data->code += src->vector(y).str;
3663 _data->code += ");\n";
3664 }
3665 };
3666
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003667 void op_assign(const Operand &dst_name, const Operand &src_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003668 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003669 OperandUnpacker operands(_data->tiles, _data->arguments);
3670 const IVectorTile *src = operands.unpack(src_name);
3671 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003672
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003673 const int32_t dst_w = dst->format().w;
3674 const int32_t dst_h = dst->format().h;
3675 const int32_t src_w = src->format().w;
3676 const std::string dt = dst->underlying_source_variables()[0].type.str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003677
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003678 const bool broadcast_src_x = dst_w != 1 && src_w == 1;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003679
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003680 const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003681
3682 // Broadcasting on Y is automatic
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003683 for (int32_t y = 0; y < dst_h; ++y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003684 {
3685 _data->code += dst->vector(y).str;
3686 _data->code += " = ";
3687 _data->code += src_prefix + src->vector(y).str;
3688 _data->code += ";\n";
3689 }
3690 }
3691
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003692 void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003693 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003694 OperandUnpacker operands(_data->tiles, _data->arguments);
3695 const IVectorTile *src = operands.unpack(src_name);
3696 const IVectorTile *dst = operands.unpack(dst_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003697
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003698 const int32_t dst_h = dst->format().h;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003699 const std::string dt = dst->underlying_source_variables()[0].type.str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003700
SiCong Li16b37522023-07-18 17:56:49 +01003701 // Always perform an explicit cast. This automatically covers at least the 2 scenarios:
3702 // 1. Widen a scalar into a vector type. This enables scalar-vector broadcasting
3703 // 2. Ensure non-ambiguity over function overloads.
3704 // E.g. a constant tile may be accidentally initialized with a double literal. By casting it to single float,
3705 // it avoids ambiguous function calls
3706 const std::string src_prefix = "(" + dt + ")";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003707
3708 // Broadcasting on Y is automatic
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003709 for (int32_t y = 0; y < dst_h; ++y)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003710 {
3711 _data->code += dst->vector(y).str;
3712 _data->code += " = ";
3713
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003714 switch (func)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003715 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003716 case UnaryFunction::Exp:
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003717 _data->code += "exp(";
3718 break;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003719 case UnaryFunction::Tanh:
3720 _data->code += "tanh(";
3721 break;
3722 case UnaryFunction::Sqrt:
3723 _data->code += "sqrt(";
3724 break;
3725 case UnaryFunction::Erf:
3726 _data->code += "erf(";
3727 break;
3728 case UnaryFunction::Fabs:
3729 _data->code += "fabs(";
3730 break;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003731 case UnaryFunction::Log:
3732 _data->code += "log(";
3733 break;
3734 case UnaryFunction::SizeOf:
3735 _data->code += "sizeof(";
3736 break;
3737 case UnaryFunction::Round:
3738 _data->code += "round(";
3739 break;
Gunes Bayir91cb7332023-07-25 17:00:33 +01003740 case UnaryFunction::Floor:
3741 _data->code += "floor(";
3742 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003743 default:
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003744 CKW_ASSERT_MSG(false, "Unexpected UnaryFunction used.");
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003745 }
3746
3747 _data->code += src_prefix + src->vector(y).str;
3748 _data->code += ");\n";
3749 }
3750 }
3751
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003752 void op_binary_elementwise_function(const Operand &dst_name,
3753 BinaryFunction func,
3754 const Operand &first_name,
3755 const Operand &second_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003756 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003757 OperandUnpacker operands(_data->tiles, _data->arguments);
3758 const IVectorTile *first = operands.unpack(first_name);
3759 const IVectorTile *second = operands.unpack(second_name);
3760 const IVectorTile *dst = operands.unpack(dst_name);
3761
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003762 const int32_t dst_h = dst->format().h;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003763 const auto datatype = dst->underlying_source_variables()[0].type;
3764 const std::string datatype_str = datatype.str;
3765
SiCong Li16b37522023-07-18 17:56:49 +01003766 // Always perform an explicit cast. See similar comments in op_unary_elementwise_function
3767 const std::string first_prefix = "(" + datatype_str + ")";
3768 const std::string second_prefix = "(" + datatype_str + ")";
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003769
3770 const bool is_float = (datatype.dt == DataType::Fp32 || datatype.dt == DataType::Fp16);
3771
3772 // Broadcasting on Y is automatic
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003773 for (int32_t y = 0; y < dst_h; ++y)
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003774 {
3775 _data->code += dst->vector(y).str;
3776 _data->code += " = ";
3777
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003778 switch (func)
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003779 {
3780 case BinaryFunction::Min:
3781 _data->code += is_float ? "fmin(" : "min(";
3782 break;
3783 case BinaryFunction::Max:
3784 _data->code += is_float ? "fmax(" : "max(";
3785 break;
3786 default:
3787 CKW_ASSERT_MSG(false, "Unexpected BinaryFunction used.");
3788 }
3789
3790 _data->code += first_prefix + first->vector(y).str;
3791 _data->code += ", ";
3792 _data->code += second_prefix + second->vector(y).str;
3793 _data->code += ");\n";
3794 }
3795 }
3796
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003797 void op_ternary_elementwise_function(const Operand &dst_name,
3798 TernaryFunction func,
3799 const Operand &first_name,
3800 const Operand &second_name,
3801 const Operand &third_name) override
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003802 {
3803 OperandUnpacker operands(_data->tiles, _data->arguments);
3804 const IVectorTile *first = operands.unpack(first_name);
3805 const IVectorTile *second = operands.unpack(second_name);
3806 const IVectorTile *third = operands.unpack(third_name);
3807 const IVectorTile *dst = operands.unpack(dst_name);
3808
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003809 const int32_t dst_h = dst->format().h;
3810 const std::string dt = dst->underlying_source_variables()[0].type.str;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003811
SiCong Li16b37522023-07-18 17:56:49 +01003812 // Always perform an explicit cast. See similar comments in op_unary_elementwise_function
3813 const std::string first_prefix = "(" + dt + ")";
3814 const std::string second_prefix = "(" + dt + ")";
3815 const std::string third_prefix = "(" + dt + ")";
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003816
3817 // Broadcasting on Y is automatic
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003818 for (int32_t y = 0; y < dst_h; ++y)
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003819 {
3820 _data->code += dst->vector(y).str;
3821 _data->code += " = ";
3822
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003823 switch (func)
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003824 {
3825 case TernaryFunction::Select:
3826 _data->code += "select(";
3827 break;
Gunes Bayir91cb7332023-07-25 17:00:33 +01003828 case TernaryFunction::Clamp:
3829 _data->code += "clamp(";
3830 break;
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003831 default:
3832 CKW_ASSERT_MSG(false, "Unexpected TernaryFunction used.");
3833 }
3834
3835 _data->code += first_prefix + first->vector(y).str;
3836 _data->code += ", ";
3837 _data->code += second_prefix + second->vector(y).str;
3838 _data->code += ", ";
3839 _data->code += third_prefix + third->vector(y).str;
3840 _data->code += ");\n";
3841 }
3842 }
3843
3844 void op_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override
3845 {
3846 OperandUnpacker operands(_data->tiles, _data->arguments);
3847 const IVectorTile *lhs = operands.unpack(o_lhs);
3848 const IVectorTile *rhs = operands.unpack(o_rhs);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003849
3850 assert(is_tile_scalar(lhs));
3851 assert(is_tile_scalar(rhs));
3852
3853 _data->code += "if(";
3854 _data->code += lhs->scalar(0, 0).str;
3855 _data->code += " ";
3856 _data->code += to_string(op);
3857 _data->code += " ";
3858 _data->code += rhs->scalar(0, 0).str;
3859 _data->code += ")\n";
3860 }
3861
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003862 void op_else_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003863 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003864 _data->code += "else ";
3865 op_if_header(o_lhs, op, o_rhs);
3866 }
3867
3868 void op_else_header() override
3869 {
3870 _data->code += "else\n";
3871 }
3872
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003873 void op_for_loop_header(const Operand &var_name,
3874 BinaryOp cond_op,
3875 const Operand &cond_value_name,
3876 const Operand &update_var_name,
3877 AssignmentOp update_op,
3878 const Operand &update_value_name) override
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003879 {
3880 OperandUnpacker operands(_data->tiles, _data->arguments);
3881 const IVectorTile *var = operands.unpack(var_name);
3882 const IVectorTile *cond_value = operands.unpack(cond_value_name);
Nikolaj Jensenfab6c212023-06-27 14:13:24 +01003883 const IVectorTile *update_var = operands.unpack(update_var_name);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003884 const IVectorTile *update_value = operands.unpack(update_value_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003885
3886 const int32_t dst_w = var->format().w;
3887 const int32_t dst_h = var->format().h;
3888
3889 // It must be a scalar variable
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01003890 CKW_UNUSED(dst_w, dst_h);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003891 assert(dst_w == 1);
3892 assert(dst_h == 1);
3893
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003894 _data->code += "for(; ";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003895 _data->code += var->scalar(0, 0).str;
3896 _data->code += " ";
3897 _data->code += to_string(cond_op);
3898 _data->code += " " + cond_value->scalar(0, 0).str + "; ";
Nikolaj Jensenfab6c212023-06-27 14:13:24 +01003899 _data->code += update_var->scalar(0, 0).str;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003900 _data->code += " ";
3901 _data->code += to_string(update_op);
3902 _data->code += " " + update_value->scalar(0, 0).str + ")";
3903 _data->code += "\n";
3904 }
3905
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003906 void op_load_immediate(const TensorOperand &o_tensor,
3907 const Operand &o_dst,
3908 const Operand &o_x,
3909 const Operand &o_y,
3910 const Operand &o_z,
3911 const Operand &o_batch_idx,
3912 const Operand &dilation_y) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003913 {
3914 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003915
3916 // Not const as it requires changes to 'load_writer'.
3917 IVectorTile *dst = operands.unpack(o_dst);
3918 IVectorTile *x = operands.unpack(o_x);
3919 IVectorTile *y = operands.unpack(o_y);
3920 IVectorTile *z = operands.unpack(o_z);
3921 IVectorTile *dil_y = operands.unpack(dilation_y);
3922 IVectorTile *b = operands.unpack(o_batch_idx);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003923
3924 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003925 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003926 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003927
3928 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3929
3930 auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
3931
3932 // Initialize the constant part
3933 load_writer->initialize(dst, x, z, b);
3934
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003935 for (int i = 0; i < dst->format().h; ++i)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003936 {
3937 std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003938 if (dil_y->scalar(0, 0).str != "1")
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003939 {
3940 coord_y += " * " + dil_y->scalar(0, 0).str;
3941 }
3942 load_writer->write(std::make_pair(i, coord_y));
3943 }
3944
3945 load_writer->finalize();
3946 }
3947
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003948 void op_load_indirect(const TensorOperand &o_tensor,
3949 const Operand &o_dst,
3950 const Operand &o_x,
3951 const Operand &o_indirect_h,
3952 const Operand &o_z,
3953 const Operand &o_batch_idx) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003954 {
3955 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003956
3957 // Not const as it requires changes to 'load_writer'.
3958 IVectorTile *dst = operands.unpack(o_dst);
3959 IVectorTile *x = operands.unpack(o_x);
3960 IVectorTile *y_ind = operands.unpack(o_indirect_h);
3961 IVectorTile *z = operands.unpack(o_z);
3962 IVectorTile *b = operands.unpack(o_batch_idx);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003963
3964 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003965 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01003966 auto gpu_sampler = o_tensor.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003967
3968 GpuTensor3dMapper mapper(tensor, gpu_sampler);
3969
3970 auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
3971
3972 // Initialize the constant part
3973 load_writer->initialize(dst, x, z, b);
3974
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003975 for (int i = 0; i < dst->format().h; ++i)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003976 {
3977 load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str));
3978 }
3979
3980 load_writer->finalize();
3981 }
3982
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01003983 void op_store_immediate(const TensorOperand &tensor_name,
3984 const Operand &src_name,
3985 const Operand &x_name,
3986 const Operand &y_name,
3987 const Operand &z_name,
3988 const Operand &batch_index_name) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003989 {
3990 OperandUnpacker operands(_data->tiles, _data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01003991
3992 // Not const as it requires changes to 'load_writer'.
3993 IVectorTile *src = operands.unpack(src_name);
3994 IVectorTile *x = operands.unpack(x_name);
3995 IVectorTile *y = operands.unpack(y_name);
3996 IVectorTile *z = operands.unpack(z_name);
3997 IVectorTile *b = operands.unpack(batch_index_name);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01003998
3999 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01004000 IGpuTensorArgument *tensor = tensor_operands.unpack(tensor_name);
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004001 auto gpu_sampler = tensor_name.sampler();
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004002
4003 GpuTensor3dMapper mapper(tensor, gpu_sampler);
4004
4005 auto store_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Store);
4006
4007 // Initialize the constant part
4008 store_writer->initialize(src, x, z, b);
4009
4010 int32_t tile_h = src->format().h;
4011
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01004012 for (int m0 = tile_h - 1; m0 >= 0; m0--)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004013 {
4014 store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0)));
4015 }
4016
4017 store_writer->finalize();
4018 }
4019
4020 void op_return() override
4021 {
4022 _data->code += "return;\n";
4023 }
4024
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01004025 void util_get_indirect_buffer(const Operand &o_dst,
4026 const TensorOperand &o_tensor,
4027 const Operand &o_x,
4028 const Operand &o_y,
4029 const Operand &o_x_off,
4030 const Operand &o_y_off) override
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004031 {
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01004032 OperandUnpacker operands(_data->tiles, _data->arguments);
4033 const IVectorTile *dst = operands.unpack(o_dst);
4034 const IVectorTile *x = operands.unpack(o_x);
4035 const IVectorTile *y = operands.unpack(o_y);
4036 const IVectorTile *x_off = operands.unpack(o_x_off);
4037 const IVectorTile *y_off = operands.unpack(o_y_off);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004038
4039 TensorOperandUnpacker tensor_operands(_data->arguments);
Nikolaj Jensen5ff48022023-06-27 14:13:24 +01004040 IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004041
4042 assert(dst->format().w == 1);
4043 assert(x->format().w == 1);
4044 assert(y->format().w == 1);
4045 assert(x_off->format().w == 1);
4046 assert(y_off->format().w == 1);
4047 assert(dst->format().dt == DataType::Int32);
4048 assert(x->format().dt == DataType::Int32);
4049 assert(y->format().dt == DataType::Int32);
4050 assert(x_off->format().dt == DataType::Int32);
4051 assert(y_off->format().dt == DataType::Int32);
4052
Viet-Hoa Doc8e16172023-06-27 14:09:46 +01004053 const std::string width = tensor->component(TensorComponentType::Dim1);
4054 const std::string height = tensor->component(TensorComponentType::Dim2);
4055 const std::string wxh = tensor->component(TensorComponentType::Dim1xDim2);
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004056 /*
4057 int x_s;
4058 int y_s;
4059 x_s = (xi_0 + x_k);
4060 y_s = (yi_0 + y_k);
4061 mi_0 = x_s + y_s * width + b * widthxheight;
4062 mi_0 = select(-1, mi_0, x_s >= 0);
4063 mi_0 = select(-1, mi_0, y_s >= 0);
4064 mi_0 = select(-1, mi_0, x_s < 128);
4065 mi_0 = select(-1, mi_0, y_s < 128);
4066 */
4067 compound_statement_begin();
4068 declare_tile("_x_s", TileInfo(DataType::Int32));
4069 declare_tile("_y_s", TileInfo(DataType::Int32));
4070 auto x_s = operands.unpack(Operand("_x_s"));
4071 auto y_s = operands.unpack(Operand("_y_s"));
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01004072 for (int i = 0; i < dst->format().h; ++i)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004073 {
4074 // x_s = (xi_0 + x_k);
4075 // y_s = (yi_0 + y_k);
4076 _data->code += x_s->scalar(0, i).str;
4077 _data->code += " = (";
4078 _data->code += x->scalar(0, i).str;
4079 _data->code += " + ";
4080 _data->code += x_off->scalar(0, i).str;
4081 _data->code += ");\n";
4082 _data->code += y_s->scalar(0, i).str;
4083 _data->code += " = (";
4084 _data->code += y->scalar(0, i).str;
4085 _data->code += " + ";
4086 _data->code += y_off->scalar(0, i).str;
4087 _data->code += ");\n";
4088 // mi_0 = x_s + y_s * width;
4089 _data->code += dst->scalar(0, i).str;
4090 _data->code += " = ";
4091 _data->code += x_s->scalar(0, i).str;
4092 _data->code += " + ";
4093 _data->code += y_s->scalar(0, i).str;
4094 _data->code += " * " + width + ";\n";
4095 // mi_0 = select(wxh, mi_0, x_s >= 0);
4096 _data->code += dst->scalar(0, i).str;
4097 _data->code += " = select(-1, ";
4098 _data->code += dst->scalar(0, i).str;
4099 _data->code += ", ";
4100 _data->code += x_s->scalar(0, i).str;
4101 _data->code += " >= 0);\n";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004102 // mi_0 = select(wxh, mi_0, x_s < width);
4103 _data->code += dst->scalar(0, i).str;
4104 _data->code += " = select(-1, ";
4105 _data->code += dst->scalar(0, i).str;
4106 _data->code += ", ";
4107 _data->code += x_s->scalar(0, i).str;
4108 _data->code += " < ";
4109 _data->code += width + ");\n";
Jakub Sujake1c96e72023-07-31 13:36:58 +01004110 // mi_0 = select(wxh, mi_0, y_s >= 0);
4111 _data->code += dst->scalar(0, i).str;
4112 _data->code += " = select(-1, ";
4113 _data->code += dst->scalar(0, i).str;
4114 _data->code += ", ";
4115 _data->code += y_s->scalar(0, i).str;
4116 _data->code += " >= 0);\n";
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004117 // mi_0 = select(wxh, mi_0, y_s < height);
4118 _data->code += dst->scalar(0, i).str;
4119 _data->code += " = select(-1, ";
4120 _data->code += dst->scalar(0, i).str;
4121 _data->code += ", ";
4122 _data->code += y_s->scalar(0, i).str;
4123 _data->code += " < ";
4124 _data->code += height + ");\n";
4125 }
4126 compound_statement_end();
4127 }
4128
4129private:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01004130 GpuKernelWriterDataHolder *_data{nullptr};
4131 GpuKernelWriterAttribute *_attr{nullptr};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004132};
4133
4134/** IGpuKernelWriter factory class */
4135class GpuKernelWriterFactory final
4136{
4137public:
4138 /** Static method to call the IGpuKernelWriter class accordingly with the Gpu programming language
4139 *
4140 * @param[in] gpu GPU target
4141 *
4142 * @return IGpuKernelWriter
4143 */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01004144 static std::unique_ptr<IGpuKernelWriter> create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004145 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01004146 switch (x->programming_language())
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004147 {
4148 case GpuTargetLanguage::OpenCL:
4149 return std::make_unique<ClKernelWriter>(attr, x);
4150 default:
4151 std::cout << "Unsupported Gpu programming language" << std::endl;
4152 assert(false);
Viet-Hoa Doe1880f02023-06-28 10:25:35 +01004153 return nullptr;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004154 }
4155 }
4156};
4157
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004158inline int32_t
4159adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004160{
4161 auto tensor = tensor_info_id->shape;
4162
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01004163 int32_t dim[3] = {0};
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004164
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +01004165 switch (tensor_format)
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004166 {
4167 case TensorSamplerFormat::C_W_H:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004168 dim[0] = tensor[0];
4169 dim[1] = tensor[1];
4170 dim[2] = tensor[2];
4171 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004172 case TensorSamplerFormat::C_WH_1:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004173 dim[0] = tensor[0];
4174 dim[1] = tensor[1] * tensor[2];
4175 dim[2] = 1;
4176 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004177 default:
Nikolaj Jensenacea4072023-07-03 09:44:42 +01004178 std::cout << "Unsupported tensor format" << std::endl;
4179 assert(false);
4180 break;
Viet-Hoa Dobd4f6b92023-05-30 09:34:32 +01004181 }
4182
4183 return std::min(step, dim[idx]);
4184}
4185
4186} // namespace prototype
4187} // namespace ckw
4188
Viet-Hoa Doce3c48c2023-07-03 13:44:43 +01004189#endif // CKW_PROTOTYPE_SRC_PROTOTYPE_H