blob: 7d16f35fbe0704b0d06400b19da8a8ec41f5fcb5 [file] [log] [blame]
Gunes Bayirc4117a32023-08-07 16:52:33 +01001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "src/cl/helpers/CLMemoryOpBufferHelper.h"
25
26#include "ckw/Error.h"
27#include "ckw/TensorSampler.h"
28#include "ckw/types/MemoryOperation.h"
29#include "ckw/types/TensorStorageType.h"
30
Gunes Bayirc4117a32023-08-07 16:52:33 +010031#include "src/cl/CLHelpers.h"
32#include "src/cl/CLKernelWriter.h"
33#include "src/cl/CLTensorArgument.h"
34#include "src/cl/CLTile.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010035#include "src/ITensor.h"
36#include "src/Tensor3dMapper.h"
Gunes Bayir2b9fa592024-01-17 16:07:03 +000037#include "src/TileView.h"
Gunes Bayirc4117a32023-08-07 16:52:33 +010038
39namespace ckw
40{
Gunes Bayir2b9fa592024-01-17 16:07:03 +000041bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer,
42 const ITensor *tensor,
43 const TensorSampler *sampler,
44 const Tensor3dMapper *mapper,
45 MemoryOperation op,
46 const TileView<CLTile> &dst)
Gunes Bayirc4117a32023-08-07 16:52:33 +010047{
48 CKW_UNUSED(writer, tensor, mapper, op, dst);
49
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010050 if (sampler->storage() != TensorStorageType::BufferUint8Ptr)
Gunes Bayirc4117a32023-08-07 16:52:33 +010051 {
52 return false;
53 }
54 return true;
55}
56
57/** Initialization and Finalizing Logic
58 *
59 * The meanings of if/elses in different dimensions and how they're constructed:
60 * - x: partial load/store
61 * - y: no load/store operation
62 * - z: no load/store operation
63 * if(x)
64 * {
65 * if(z)
66 * {
67 * if(y)
68 * {
69 * // full load/store width
70 * }
71 * else
72 * {
73 * // no load/store
74 * }
75 * }
76 * else
77 * {
78 * // no load/store
79 * }
80 * }
81 * else
82 * {
83 * if(z)
84 * {
85 * if(y)
86 * {
87 * // partial load/store width
88 * }
89 * else
90 * {
91 * // no load/store
92 * }
93 * }
94 * else
95 * {
96 * // no load/store
97 * }
98 * }
99 *
100 * In general, initialize() writes if conditions, and finalize() writes else conditions.
101 * The outermost block is x, then z and then y. This is why, if/else's covering for y are initialized
102 * at each row write. In some addressing modes, such as None, no if/else conditions are written.
103 */
Gunes Bayir2b9fa592024-01-17 16:07:03 +0000104void CLMemoryOpBufferHelper::initialize(const CLTile *x, const CLTile *z, const CLTile *b)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100105{
Gunes Bayirc4117a32023-08-07 16:52:33 +0100106 CKW_ASSERT(validate(_writer, _tensor, _sampler, _mapper.get(), _op, _dst));
107
Gunes Bayir2b9fa592024-01-17 16:07:03 +0000108 _coord_x = x->scalar(0, 0).str;
109 _coord_z = z->scalar(0, 0).str;
110 _coord_b = b->scalar(0, 0).str;
111 _coord_orig_z = _coord_z;
Gunes Bayirc4117a32023-08-07 16:52:33 +0100112
113 out_of_bound_initialize_x(_coord_x);
114 out_of_bound_initialize_z(_coord_z);
115}
116
117void CLMemoryOpBufferHelper::write_row(int32_t row_id, const std::string &coord_y)
118{
119 // The only check required is on Y.
120 out_of_bound_initialize_y(coord_y);
121
Gunes Bayir2b9fa592024-01-17 16:07:03 +0000122 const std::string dst = _dst.vector(row_id).str;
Gunes Bayirc4117a32023-08-07 16:52:33 +0100123 const std::string address = to_buffer_address(_coord_x, coord_y, _coord_z, _coord_b);
124 const std::string ls_buf = to_statement(_op, _ls_width_full, dst, address);
125
126 _writer->op_write_raw_code(ls_buf);
127 _writer->op_write_raw_code(";\n");
128
129 out_of_bound_finalize_y(dst);
130
131 // The left over load/store will be written in the finalize stage
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100132 if (_ls_width_part.size() != 0)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100133 {
Gunes Bayir2b9fa592024-01-17 16:07:03 +0000134 int32_t col_start = 0;
135 const TileArea original_area = _dst.area();
136
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100137 for (int32_t partial_width : _ls_width_part)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100138 {
Gunes Bayir2b9fa592024-01-17 16:07:03 +0000139 // Set the active area
140 const TileArea area(original_area.row_start(), original_area.row_end(), col_start,
141 col_start + partial_width);
142 _dst.area(area);
143
144 const std::string dst = _dst.vector(row_id).str;
Gunes Bayirc4117a32023-08-07 16:52:33 +0100145 const std::string coord_x = _coord_x + " + " + std::to_string(col_start);
146 const std::string address = to_buffer_address(coord_x, coord_y, _coord_z, _coord_b);
147 const std::string statement = to_statement(_op, partial_width, dst, address);
148 _leftovers_x.emplace_back(dst, coord_y, statement);
149
150 col_start += partial_width;
151 }
Gunes Bayir2b9fa592024-01-17 16:07:03 +0000152 // Restore the original area
153 _dst.area(original_area);
Gunes Bayirc4117a32023-08-07 16:52:33 +0100154 }
155}
156
157void CLMemoryOpBufferHelper::finalize()
158{
159 out_of_bound_finalize_z();
160 out_of_bound_finalize_x();
161}
162
163void CLMemoryOpBufferHelper::out_of_bound_initialize_x(const std::string &coord)
164{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100165 if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100166 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100167 TensorInfo tensor_info = _tensor->info();
168 TensorShape shape = tensor_info.shape();
Gunes Bayirc4117a32023-08-07 16:52:33 +0100169
170 _ls_width_part = cl_decompose_vector_width(shape[0] % _ls_width_full);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100171 if (_ls_width_part.size() != 0)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100172 {
173 _writer->op_write_raw_code("if(" + coord + " > 0)\n{\n");
174 }
175 }
176}
177
178void CLMemoryOpBufferHelper::out_of_bound_finalize_x()
179{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100180 if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100181 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100182 if (_ls_width_part.size() != 0)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100183 {
184 _writer->op_write_raw_code("}\nelse\n{\n");
185
186 out_of_bound_initialize_z(_coord_orig_z);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100187 for (LeftoverDescriptor leftover_desc : _leftovers_x)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100188 {
189 out_of_bound_initialize_y(leftover_desc.coord);
190 _writer->op_write_raw_code(leftover_desc.statement);
191 _writer->op_write_raw_code(";\n");
192 out_of_bound_finalize_y(leftover_desc.dst);
193 }
194 out_of_bound_finalize_z();
195 _writer->op_write_raw_code("}\n");
196 }
197 }
198}
199
200void CLMemoryOpBufferHelper::out_of_bound_initialize_y(const std::string &coord)
201{
202 std::string max = "";
203
204 const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
205
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100206 switch (address_mode_y)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100207 {
208 case TensorSamplerAddressModeY::ClampToBorderMaxOnly:
209 // Not to be moved outside the case because it marks the relevant tensor component as used even if we dont't use the variable
210 max = _mapper->dim_y().str;
211 _writer->op_write_raw_code("if(" + coord + " < " + max + ")\n{\n");
212 break;
Gunes Bayird5f9a1c2023-08-17 11:04:02 +0100213 case TensorSamplerAddressModeY::SkipLessThanZero:
214 _writer->op_write_raw_code("if(" + coord + " >= 0)\n{\n");
215 break;
Gunes Bayirc4117a32023-08-07 16:52:33 +0100216 case TensorSamplerAddressModeY::None:
217 break;
218 default:
219 CKW_THROW_MSG("Unsupported address mode for Y dimension");
220 }
221}
222
223void CLMemoryOpBufferHelper::out_of_bound_finalize_y(const std::string &dst)
224{
225 const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
226
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100227 switch (address_mode_y)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100228 {
229 case TensorSamplerAddressModeY::ClampToBorderMaxOnly:
230 _writer->op_write_raw_code("}\nelse\n{\n");
231 _writer->op_write_raw_code(dst);
232 _writer->op_write_raw_code(" = 0.0f;\n}\n");
233 break;
Gunes Bayird5f9a1c2023-08-17 11:04:02 +0100234 case TensorSamplerAddressModeY::SkipLessThanZero:
235 _writer->op_write_raw_code("}\n");
236 break;
Gunes Bayirc4117a32023-08-07 16:52:33 +0100237 case TensorSamplerAddressModeY::None:
238 break;
239 default:
240 CKW_THROW_MSG("Unsupported address mode for Y dimension");
241 }
242}
243
244void CLMemoryOpBufferHelper::out_of_bound_initialize_z(const std::string &coord)
245{
246 CKW_UNUSED(coord);
247
248 const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100249 switch (address_mode_z)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100250 {
251 case TensorSamplerAddressModeZ::None:
252 break;
253 default:
254 CKW_THROW_MSG("Unsupported address mode for Z dimension");
255 }
256}
257
258void CLMemoryOpBufferHelper::out_of_bound_finalize_z()
259{
260 const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z();
261
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100262 switch (address_mode_z)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100263 {
264 case TensorSamplerAddressModeZ::None:
265 break;
266 default:
267 CKW_THROW_MSG("Unsupported address mode for Z dimension");
268 }
269}
270
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100271std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op,
272 int32_t vector_width,
273 const std::string &data,
274 const std::string &address) const
Gunes Bayirc4117a32023-08-07 16:52:33 +0100275{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100276 switch (op)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100277 {
278 case MemoryOperation::Load:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100279 if (vector_width != 1)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100280 {
281 return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
282 }
283 else
284 {
285 return data + " = *(" + address + ")";
286 }
287 break;
288 case MemoryOperation::Store:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100289 if (vector_width != 1)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100290 {
291 return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
292 }
293 else
294 {
295 return "*(" + address + ") = " + data;
296 }
297 break;
298 default:
299 CKW_THROW_MSG("Unsupported MemoryOperation");
300 }
301
302 return "";
303}
304
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100305std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x,
306 const std::string &y,
307 const std::string &z,
308 const std::string &b) const
Gunes Bayirc4117a32023-08-07 16:52:33 +0100309{
310 TensorStorageType tensor_storage = _sampler->storage();
311 CKW_ASSERT(tensor_storage == TensorStorageType::BufferUint8Ptr);
312
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100313 const std::string ptr_buf = _tensor->storage(tensor_storage).val;
Gunes Bayir2b9fa592024-01-17 16:07:03 +0000314 const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst.data_type(), 1);
Gunes Bayirc4117a32023-08-07 16:52:33 +0100315
316 std::string address;
317 address += "(__global ";
318 address += dst_type;
319 address += "*)(";
320 address += ptr_buf;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100321 if (x != "0" && (_mapper->dim_x().str != "1"))
Gunes Bayirc4117a32023-08-07 16:52:33 +0100322 {
323 address += " + (";
324 address += x + ") * sizeof(" + dst_type + ")";
325 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100326 if (y != "0")
Gunes Bayirc4117a32023-08-07 16:52:33 +0100327 {
328 const std::string stride_y = _mapper->stride_y().str;
329 address += " + (";
330 address += y + ")";
331 address += " * ";
332 address += stride_y;
333 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100334 if (z != "0" && (_mapper->dim_z().str != "1"))
Gunes Bayirc4117a32023-08-07 16:52:33 +0100335 {
336 const std::string stride_z = _mapper->stride_z().str;
337 address += " + (";
338 address += z + ")";
339 address += " * ";
340 address += stride_z;
341 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100342 if (b != "0" && (_mapper->dim_batch().str != "1"))
Gunes Bayirc4117a32023-08-07 16:52:33 +0100343 {
344 const std::string stride_b = _mapper->stride_batch().str;
345 address += " + (";
346 address += b + ")";
347 address += " * ";
348 address += stride_b;
349 }
350 address += ")";
351 return address;
352}
353} // namespace ckw