blob: a98ebed8faeb9f907de63563748d08503d4c946f [file] [log] [blame]
Gunes Bayirc4117a32023-08-07 16:52:33 +01001/*
2 * Copyright (c) 2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "src/cl/helpers/CLMemoryOpBufferHelper.h"
25
26#include "ckw/Error.h"
27#include "ckw/TensorSampler.h"
28#include "ckw/types/MemoryOperation.h"
29#include "ckw/types/TensorStorageType.h"
30
Gunes Bayirc4117a32023-08-07 16:52:33 +010031#include "src/cl/CLHelpers.h"
32#include "src/cl/CLKernelWriter.h"
33#include "src/cl/CLTensorArgument.h"
34#include "src/cl/CLTile.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010035#include "src/ITensor.h"
36#include "src/Tensor3dMapper.h"
Gunes Bayirc4117a32023-08-07 16:52:33 +010037
38namespace ckw
39{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010040bool CLMemoryOpBufferHelper::validate(const CLKernelWriter *writer,
41 const ITensor *tensor,
42 const TensorSampler *sampler,
43 const Tensor3dMapper *mapper,
44 MemoryOperation op,
45 const CLTile *dst)
Gunes Bayirc4117a32023-08-07 16:52:33 +010046{
47 CKW_UNUSED(writer, tensor, mapper, op, dst);
48
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010049 if (sampler->storage() != TensorStorageType::BufferUint8Ptr)
Gunes Bayirc4117a32023-08-07 16:52:33 +010050 {
51 return false;
52 }
53 return true;
54}
55
56/** Initialization and Finalizing Logic
57 *
58 * The meanings of if/elses in different dimensions and how they're constructed:
59 * - x: partial load/store
60 * - y: no load/store operation
61 * - z: no load/store operation
62 * if(x)
63 * {
64 * if(z)
65 * {
66 * if(y)
67 * {
68 * // full load/store width
69 * }
70 * else
71 * {
72 * // no load/store
73 * }
74 * }
75 * else
76 * {
77 * // no load/store
78 * }
79 * }
80 * else
81 * {
82 * if(z)
83 * {
84 * if(y)
85 * {
86 * // partial load/store width
87 * }
88 * else
89 * {
90 * // no load/store
91 * }
92 * }
93 * else
94 * {
95 * // no load/store
96 * }
97 * }
98 *
99 * In general, initialize() writes if conditions, and finalize() writes else conditions.
100 * The outermost block is x, then z and then y. This is why, if/else's covering for y are initialized
101 * at each row write. In some addressing modes, such as None, no if/else conditions are written.
102 */
103void CLMemoryOpBufferHelper::initialize(const CLTile *dst, const CLTile *x, const CLTile *z, const CLTile *b)
104{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100105 _dst = dst;
Gunes Bayirc4117a32023-08-07 16:52:33 +0100106
107 CKW_ASSERT(validate(_writer, _tensor, _sampler, _mapper.get(), _op, _dst));
108
109 _ls_width_full = dst->info().width();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100110 _coord_x = x->scalar(0, 0).str;
111 _coord_z = z->scalar(0, 0).str;
112 _coord_b = b->scalar(0, 0).str;
113 _coord_orig_z = _coord_z;
Gunes Bayirc4117a32023-08-07 16:52:33 +0100114
115 out_of_bound_initialize_x(_coord_x);
116 out_of_bound_initialize_z(_coord_z);
117}
118
119void CLMemoryOpBufferHelper::write_row(int32_t row_id, const std::string &coord_y)
120{
121 // The only check required is on Y.
122 out_of_bound_initialize_y(coord_y);
123
124 const std::string dst = _dst->vector(row_id).str;
125 const std::string address = to_buffer_address(_coord_x, coord_y, _coord_z, _coord_b);
126 const std::string ls_buf = to_statement(_op, _ls_width_full, dst, address);
127
128 _writer->op_write_raw_code(ls_buf);
129 _writer->op_write_raw_code(";\n");
130
131 out_of_bound_finalize_y(dst);
132
133 // The left over load/store will be written in the finalize stage
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100134 if (_ls_width_part.size() != 0)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100135 {
136 int32_t col_start = 0;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100137 for (int32_t partial_width : _ls_width_part)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100138 {
139 const std::string dst = _dst->vector(row_id, col_start, partial_width).str;
140 const std::string coord_x = _coord_x + " + " + std::to_string(col_start);
141 const std::string address = to_buffer_address(coord_x, coord_y, _coord_z, _coord_b);
142 const std::string statement = to_statement(_op, partial_width, dst, address);
143 _leftovers_x.emplace_back(dst, coord_y, statement);
144
145 col_start += partial_width;
146 }
147 }
148}
149
150void CLMemoryOpBufferHelper::finalize()
151{
152 out_of_bound_finalize_z();
153 out_of_bound_finalize_x();
154}
155
156void CLMemoryOpBufferHelper::out_of_bound_initialize_x(const std::string &coord)
157{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100158 if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100159 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100160 TensorInfo tensor_info = _tensor->info();
161 TensorShape shape = tensor_info.shape();
Gunes Bayirc4117a32023-08-07 16:52:33 +0100162
163 _ls_width_part = cl_decompose_vector_width(shape[0] % _ls_width_full);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100164 if (_ls_width_part.size() != 0)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100165 {
166 _writer->op_write_raw_code("if(" + coord + " > 0)\n{\n");
167 }
168 }
169}
170
171void CLMemoryOpBufferHelper::out_of_bound_finalize_x()
172{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100173 if (_sampler->address_mode_x() == TensorSamplerAddressModeX::OverlappingMin)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100174 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100175 if (_ls_width_part.size() != 0)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100176 {
177 _writer->op_write_raw_code("}\nelse\n{\n");
178
179 out_of_bound_initialize_z(_coord_orig_z);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100180 for (LeftoverDescriptor leftover_desc : _leftovers_x)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100181 {
182 out_of_bound_initialize_y(leftover_desc.coord);
183 _writer->op_write_raw_code(leftover_desc.statement);
184 _writer->op_write_raw_code(";\n");
185 out_of_bound_finalize_y(leftover_desc.dst);
186 }
187 out_of_bound_finalize_z();
188 _writer->op_write_raw_code("}\n");
189 }
190 }
191}
192
193void CLMemoryOpBufferHelper::out_of_bound_initialize_y(const std::string &coord)
194{
195 std::string max = "";
196
197 const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
198
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100199 switch (address_mode_y)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100200 {
201 case TensorSamplerAddressModeY::ClampToBorderMaxOnly:
202 // Not to be moved outside the case because it marks the relevant tensor component as used even if we dont't use the variable
203 max = _mapper->dim_y().str;
204 _writer->op_write_raw_code("if(" + coord + " < " + max + ")\n{\n");
205 break;
Gunes Bayird5f9a1c2023-08-17 11:04:02 +0100206 case TensorSamplerAddressModeY::SkipLessThanZero:
207 _writer->op_write_raw_code("if(" + coord + " >= 0)\n{\n");
208 break;
Gunes Bayirc4117a32023-08-07 16:52:33 +0100209 case TensorSamplerAddressModeY::None:
210 break;
211 default:
212 CKW_THROW_MSG("Unsupported address mode for Y dimension");
213 }
214}
215
216void CLMemoryOpBufferHelper::out_of_bound_finalize_y(const std::string &dst)
217{
218 const TensorSamplerAddressModeY address_mode_y = _sampler->address_mode_y();
219
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100220 switch (address_mode_y)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100221 {
222 case TensorSamplerAddressModeY::ClampToBorderMaxOnly:
223 _writer->op_write_raw_code("}\nelse\n{\n");
224 _writer->op_write_raw_code(dst);
225 _writer->op_write_raw_code(" = 0.0f;\n}\n");
226 break;
Gunes Bayird5f9a1c2023-08-17 11:04:02 +0100227 case TensorSamplerAddressModeY::SkipLessThanZero:
228 _writer->op_write_raw_code("}\n");
229 break;
Gunes Bayirc4117a32023-08-07 16:52:33 +0100230 case TensorSamplerAddressModeY::None:
231 break;
232 default:
233 CKW_THROW_MSG("Unsupported address mode for Y dimension");
234 }
235}
236
237void CLMemoryOpBufferHelper::out_of_bound_initialize_z(const std::string &coord)
238{
239 CKW_UNUSED(coord);
240
241 const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100242 switch (address_mode_z)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100243 {
244 case TensorSamplerAddressModeZ::None:
245 break;
246 default:
247 CKW_THROW_MSG("Unsupported address mode for Z dimension");
248 }
249}
250
251void CLMemoryOpBufferHelper::out_of_bound_finalize_z()
252{
253 const TensorSamplerAddressModeZ address_mode_z = _sampler->address_mode_z();
254
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100255 switch (address_mode_z)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100256 {
257 case TensorSamplerAddressModeZ::None:
258 break;
259 default:
260 CKW_THROW_MSG("Unsupported address mode for Z dimension");
261 }
262}
263
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100264std::string CLMemoryOpBufferHelper::to_statement(MemoryOperation op,
265 int32_t vector_width,
266 const std::string &data,
267 const std::string &address) const
Gunes Bayirc4117a32023-08-07 16:52:33 +0100268{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100269 switch (op)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100270 {
271 case MemoryOperation::Load:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100272 if (vector_width != 1)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100273 {
274 return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
275 }
276 else
277 {
278 return data + " = *(" + address + ")";
279 }
280 break;
281 case MemoryOperation::Store:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100282 if (vector_width != 1)
Gunes Bayirc4117a32023-08-07 16:52:33 +0100283 {
284 return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
285 }
286 else
287 {
288 return "*(" + address + ") = " + data;
289 }
290 break;
291 default:
292 CKW_THROW_MSG("Unsupported MemoryOperation");
293 }
294
295 return "";
296}
297
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100298std::string CLMemoryOpBufferHelper::to_buffer_address(const std::string &x,
299 const std::string &y,
300 const std::string &z,
301 const std::string &b) const
Gunes Bayirc4117a32023-08-07 16:52:33 +0100302{
303 TensorStorageType tensor_storage = _sampler->storage();
304 CKW_ASSERT(tensor_storage == TensorStorageType::BufferUint8Ptr);
305
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100306 const std::string ptr_buf = _tensor->storage(tensor_storage).val;
307 const std::string dst_type = cl_data_type_rounded_up_to_valid_vector_width(_dst->info().data_type(), 1);
Gunes Bayirc4117a32023-08-07 16:52:33 +0100308
309 std::string address;
310 address += "(__global ";
311 address += dst_type;
312 address += "*)(";
313 address += ptr_buf;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100314 if (x != "0" && (_mapper->dim_x().str != "1"))
Gunes Bayirc4117a32023-08-07 16:52:33 +0100315 {
316 address += " + (";
317 address += x + ") * sizeof(" + dst_type + ")";
318 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100319 if (y != "0")
Gunes Bayirc4117a32023-08-07 16:52:33 +0100320 {
321 const std::string stride_y = _mapper->stride_y().str;
322 address += " + (";
323 address += y + ")";
324 address += " * ";
325 address += stride_y;
326 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100327 if (z != "0" && (_mapper->dim_z().str != "1"))
Gunes Bayirc4117a32023-08-07 16:52:33 +0100328 {
329 const std::string stride_z = _mapper->stride_z().str;
330 address += " + (";
331 address += z + ")";
332 address += " * ";
333 address += stride_z;
334 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100335 if (b != "0" && (_mapper->dim_batch().str != "1"))
Gunes Bayirc4117a32023-08-07 16:52:33 +0100336 {
337 const std::string stride_b = _mapper->stride_batch().str;
338 address += " + (";
339 address += b + ")";
340 address += " * ";
341 address += stride_b;
342 }
343 address += ")";
344 return address;
345}
346} // namespace ckw