blob: 2ab63169476e1cd1f4584f4cf3cfa11786b64c20 [file] [log] [blame]
SiCong Lif44bbc52022-08-29 18:25:51 +01001/*
2 * Copyright (c) 2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "ClTemplateWriter.h"
25
26#include "arm_compute/core/CL/CLKernelLibrary.h"
27#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
28#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
29
30namespace arm_compute
31{
32namespace experimental
33{
34namespace dynamic_fusion
35{
36/// @note: some tags can be unused since they could be used only for the macros, or only for the component code
37std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags)
38{
39 std::string replaced_code = "";
40 bool scanning_pattern = false;
41 std::string pattern_found = "";
42 for(size_t i = 0; i < code_template.size() - 1; ++i)
43 {
44 if(!scanning_pattern)
45 {
46 if(code_template[i] == '{' && code_template[i + 1] == '{')
47 {
48 i += 1;
49 scanning_pattern = true;
50 pattern_found = "";
51 }
52 else
53 {
54 replaced_code += code_template[i];
55 }
56 }
57 else
58 {
59 if(code_template[i] == '}' && code_template[i + 1] == '}')
60 {
61 i += 1;
62 scanning_pattern = false;
63 std::string err = "Pattern " + pattern_found + " not found in tags";
64 ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
65 replaced_code += tags.find(pattern_found)->second.value;
66 }
67 else
68 {
69 pattern_found += code_template[i];
70 }
71 }
72 }
73
74 return replaced_code;
75}
76ClTemplateWriter::~ClTemplateWriter()
77{
78}
79ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components)
80 : _components{ components }
81{
82}
83std::string ClTemplateWriter::get_name()
84{
85 return write_kernel_name();
86}
87std::string ClTemplateWriter::get_code()
88{
89 return write_code();
90}
91std::string ClTemplateWriter::get_config_id()
92{
93 std::string config_id = get_name();
94 for(const auto &comp : _components)
95 {
96 config_id += "--" + comp->template_writer()->get_config_id() + "--";
97 }
98
99 return config_id;
100}
101
102CLBuildOptions ClTemplateWriter::get_build_options()
103{
104 CLBuildOptions build_opts{};
105
106 for(const auto &comp : _components)
107 {
108 build_opts.add_options(comp->template_writer()->get_build_options(_components).options());
109 }
110
111 return build_opts;
112}
113
114Window ClTemplateWriter::get_window() const
115{
116 const auto root_comp = _components.get_root_component();
117 ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found");
118 return root_comp->template_writer()->get_window();
119}
120
121std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors()
122{
123 // Assemble GpuKernelArguments
124 std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
125 for(const auto t : _components.get_argument_tensors())
126 {
127 tensors.emplace(
128 t->id(),
129 GpuKernelArgument{ *t, _vtable.get_variable(t).kernel_argument_info });
130 }
131 return tensors;
132}
133
134std::string ClTemplateWriter::write_code()
135{
136 ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found");
137
138 // These data structures will hold the data from all the components in the blueprint
139 std::set<std::string> headers_list{};
140 std::set<std::string> additional_macros{};
141 std::vector<std::string> component_codes{}; // vector because order matters
142
143 // Pass 1: Declare all kernel variables
144 for(auto &component : _components)
145 {
146 component->template_writer()->declare_variables(_vtable, _components);
147 }
148 // Pass 2: Generate component codes
149 for(auto &component : _components)
150 {
151 const auto component_writer = component->template_writer();
152 auto curr_headers_list = component_writer->get_headers_list();
153 auto curr_additional_macros = component_writer->get_additional_macros();
154 auto curr_component_code = component_writer->get_component_code(_components);
155 const auto var_lut = component_writer->get_tag_lut(_vtable, _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
156 component_codes.push_back(replace_tags(curr_component_code, var_lut));
157
158 headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
159 if(!additional_macros.empty()) // Some components might not have any
160 {
161 additional_macros.insert(replace_tags(curr_additional_macros, var_lut));
162 }
163 }
164
165 // Step 3: Assemble the data gathered by traversing the graph into the string "code"
166 std::string code = "";
167
168 for(auto &header : headers_list)
169 {
170#if defined(EMBEDDED_KERNELS)
171 code += CLKernelLibrary::get().get_program(header).first;
172#else // defined(EMBEDDED_KERNELS)
173 code += "#include \"" + header + "\"\n";
174#endif // defined(EMBEDDED_KERNELS)
175 }
176
177 for(auto &macros : additional_macros)
178 {
179 code += macros;
180 }
181
Viet-Hoa Dob84e2532022-12-13 13:09:10 +0000182 auto arguments = _components.get_argument_tensors();
183 std::sort(arguments.begin(), arguments.end(), [](const ITensorInfo *l, const ITensorInfo *r) {
184 return l->id() < r->id();
185 });
186 code += write_kernel_signature(_vtable.get_variable_list(arguments));
SiCong Lif44bbc52022-08-29 18:25:51 +0100187
188 code += "\n{\n\n";
189
190 code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
191 code += write_global_section();
192 code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
193
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000194 {
195 const auto tiles = _components.get_tiles();
196 std::stringstream tiles_ss;
197
198 tiles_ss << " //------------------ START TILE DECLARATION ---------------------\n";
199
200 for(auto tile : tiles)
201 {
202 const auto var = _vtable.get_variable(tile);
203 const auto data_type = get_cl_type_from_data_type(tile->data_type());
204 const auto var_name = var.uniq_name;
205
206 tiles_ss << " TILE(" << data_type << ", M0, N0, " << var_name << ");\n";
207 }
208
209 tiles_ss << " //------------------ END TILE DECLARATION ---------------------\n";
210
211 code += tiles_ss.str();
212 }
213
SiCong Lif44bbc52022-08-29 18:25:51 +0100214 for(const auto &component_code : component_codes)
215 {
216 code += component_code;
Viet-Hoa Dob84e2532022-12-13 13:09:10 +0000217 code += "\n";
SiCong Lif44bbc52022-08-29 18:25:51 +0100218 }
219
220 code += "}\n";
221
222 return code;
223}
224std::string ClTemplateWriter::write_global_section() const
225{
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000226 const auto dst_info = _components.get_any_dst_tensor();
SiCong Lif44bbc52022-08-29 18:25:51 +0100227 const auto dst_w = dst_info->dimension(0);
228 const auto tile_w = std::max(1, get_window().x().step());
229 const auto tile_h = std::max(1, get_window().y().step());
230 auto leftover_w = dst_w % tile_w;
231
232 std::string code = "";
233 code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
234 code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
235 code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
236
237 code += " const bool g_cond_x = (g_ind_0 == 0);\n";
238 code += " const bool g_cond_y = (g_ind_1 == 0);\n";
239
240 return code;
241}
242std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const
243{
244 std::string code;
245 switch(var.kernel_argument_info.type)
246 {
247 case GpuKernelArgumentInfo::Type::Vector:
248 {
249 code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")";
250 break;
251 }
252 case GpuKernelArgumentInfo::Type::Image:
253 {
254 code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")";
255 break;
256 }
257 case GpuKernelArgumentInfo::Type::Image_3D:
258 {
259 code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),";
260 code += "\n unsigned int " + var.uniq_name + "_stride_z";
261 break;
262 }
263 case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
264 {
265 code += "\n __read_only image2d_t " + var.uniq_name + "_img,";
266 code += "\n unsigned int " + var.uniq_name + "_stride_z";
267 break;
268 }
269 case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
270 {
271 code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
272 break;
273 }
274 case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
275 {
276 code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
277 break;
278 }
279 default:
280 {
281 ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type");
282 }
283 }
284 return code;
285}
286std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const
287{
288 std::string code = "\n__kernel void " + write_kernel_name() + "(";
289
290 for(int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
291 {
292 code += write_argument_declaration(argument_list[i]) + ",";
293 }
294 if(static_cast<int>(argument_list.size()) - 1 >= 0)
295 {
296 code += write_argument_declaration(argument_list[argument_list.size() - 1]);
297 }
298
299 code += ')';
300
301 return code;
302}
303std::string ClTemplateWriter::write_kernel_name() const
304{
305 if(_components.empty())
306 {
307 return "empty_kernel";
308 }
309 std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name();
310 for(size_t i = 1; i < _components.size(); ++i)
311 {
312 name += "___";
313 name += _components[i]->template_writer()->get_name();
314 }
315
316 return name;
317}
318} // namespace dynamic_fusion
319} // namespace experimental
320} // namespace arm_compute