blob: eda15f1d9541889b60b1585f553d84b0d7e6cb9a [file] [log] [blame]
SiCong Lif44bbc52022-08-29 18:25:51 +01001/*
Ramy Elgammal002e6532023-01-11 18:48:04 +00002 * Copyright (c) 2022-2023 Arm Limited.
SiCong Lif44bbc52022-08-29 18:25:51 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "ClTemplateWriter.h"
25
26#include "arm_compute/core/CL/CLKernelLibrary.h"
27#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
28#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
29
30namespace arm_compute
31{
32namespace experimental
33{
34namespace dynamic_fusion
35{
36/// @note: some tags can be unused since they could be used only for the macros, or only for the component code
37std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags)
38{
39 std::string replaced_code = "";
40 bool scanning_pattern = false;
41 std::string pattern_found = "";
42 for(size_t i = 0; i < code_template.size() - 1; ++i)
43 {
44 if(!scanning_pattern)
45 {
46 if(code_template[i] == '{' && code_template[i + 1] == '{')
47 {
48 i += 1;
49 scanning_pattern = true;
50 pattern_found = "";
51 }
52 else
53 {
54 replaced_code += code_template[i];
55 }
56 }
57 else
58 {
59 if(code_template[i] == '}' && code_template[i + 1] == '}')
60 {
61 i += 1;
62 scanning_pattern = false;
63 std::string err = "Pattern " + pattern_found + " not found in tags";
64 ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str());
65 replaced_code += tags.find(pattern_found)->second.value;
66 }
67 else
68 {
69 pattern_found += code_template[i];
70 }
71 }
72 }
73
74 return replaced_code;
75}
76ClTemplateWriter::~ClTemplateWriter()
77{
78}
79ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components)
80 : _components{ components }
81{
82}
83std::string ClTemplateWriter::get_name()
84{
85 return write_kernel_name();
86}
87std::string ClTemplateWriter::get_code()
88{
89 return write_code();
90}
91std::string ClTemplateWriter::get_config_id()
92{
93 std::string config_id = get_name();
94 for(const auto &comp : _components)
95 {
96 config_id += "--" + comp->template_writer()->get_config_id() + "--";
97 }
98
99 return config_id;
100}
101
102CLBuildOptions ClTemplateWriter::get_build_options()
103{
104 CLBuildOptions build_opts{};
105
106 for(const auto &comp : _components)
107 {
108 build_opts.add_options(comp->template_writer()->get_build_options(_components).options());
109 }
110
111 return build_opts;
112}
113
114Window ClTemplateWriter::get_window() const
115{
116 const auto root_comp = _components.get_root_component();
117 ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found");
118 return root_comp->template_writer()->get_window();
119}
120
121std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors()
122{
123 // Assemble GpuKernelArguments
124 std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
125 for(const auto t : _components.get_argument_tensors())
126 {
127 tensors.emplace(
128 t->id(),
129 GpuKernelArgument{ *t, _vtable.get_variable(t).kernel_argument_info });
130 }
131 return tensors;
132}
133
134std::string ClTemplateWriter::write_code()
135{
136 ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found");
137
138 // These data structures will hold the data from all the components in the blueprint
139 std::set<std::string> headers_list{};
140 std::set<std::string> additional_macros{};
141 std::vector<std::string> component_codes{}; // vector because order matters
142
143 // Pass 1: Declare all kernel variables
144 for(auto &component : _components)
145 {
146 component->template_writer()->declare_variables(_vtable, _components);
147 }
148 // Pass 2: Generate component codes
149 for(auto &component : _components)
150 {
151 const auto component_writer = component->template_writer();
152 auto curr_headers_list = component_writer->get_headers_list();
153 auto curr_additional_macros = component_writer->get_additional_macros();
154 auto curr_component_code = component_writer->get_component_code(_components);
155 const auto var_lut = component_writer->get_tag_lut(_vtable, _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
156 component_codes.push_back(replace_tags(curr_component_code, var_lut));
157
158 headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
159 if(!additional_macros.empty()) // Some components might not have any
160 {
161 additional_macros.insert(replace_tags(curr_additional_macros, var_lut));
162 }
163 }
164
165 // Step 3: Assemble the data gathered by traversing the graph into the string "code"
166 std::string code = "";
167
168 for(auto &header : headers_list)
169 {
170#if defined(EMBEDDED_KERNELS)
171 code += CLKernelLibrary::get().get_program(header).first;
172#else // defined(EMBEDDED_KERNELS)
173 code += "#include \"" + header + "\"\n";
174#endif // defined(EMBEDDED_KERNELS)
175 }
176
177 for(auto &macros : additional_macros)
178 {
179 code += macros;
180 }
181
Viet-Hoa Dob84e2532022-12-13 13:09:10 +0000182 auto arguments = _components.get_argument_tensors();
Ramy Elgammal002e6532023-01-11 18:48:04 +0000183 std::sort(arguments.begin(), arguments.end(), [](const ITensorInfo * l, const ITensorInfo * r)
184 {
Viet-Hoa Dob84e2532022-12-13 13:09:10 +0000185 return l->id() < r->id();
186 });
187 code += write_kernel_signature(_vtable.get_variable_list(arguments));
SiCong Lif44bbc52022-08-29 18:25:51 +0100188
189 code += "\n{\n\n";
190
191 code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n";
192 code += write_global_section();
193 code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n";
194
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000195 {
Ramy Elgammal002e6532023-01-11 18:48:04 +0000196 const auto tiles = _components.get_tiles();
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000197 std::stringstream tiles_ss;
198
199 tiles_ss << " //------------------ START TILE DECLARATION ---------------------\n";
200
201 for(auto tile : tiles)
202 {
Ramy Elgammal002e6532023-01-11 18:48:04 +0000203 const auto var = _vtable.get_variable(tile);
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000204 const auto data_type = get_cl_type_from_data_type(tile->data_type());
Ramy Elgammal002e6532023-01-11 18:48:04 +0000205 const auto var_name = var.uniq_name;
Viet-Hoa Do3558c582022-12-16 14:45:57 +0000206
207 tiles_ss << " TILE(" << data_type << ", M0, N0, " << var_name << ");\n";
208 }
209
210 tiles_ss << " //------------------ END TILE DECLARATION ---------------------\n";
211
212 code += tiles_ss.str();
213 }
214
SiCong Lif44bbc52022-08-29 18:25:51 +0100215 for(const auto &component_code : component_codes)
216 {
217 code += component_code;
Viet-Hoa Dob84e2532022-12-13 13:09:10 +0000218 code += "\n";
SiCong Lif44bbc52022-08-29 18:25:51 +0100219 }
220
221 code += "}\n";
222
223 return code;
224}
225std::string ClTemplateWriter::write_global_section() const
226{
Viet-Hoa Do04f46202022-12-14 14:49:56 +0000227 const auto dst_info = _components.get_any_dst_tensor();
SiCong Lif44bbc52022-08-29 18:25:51 +0100228 const auto dst_w = dst_info->dimension(0);
229 const auto tile_w = std::max(1, get_window().x().step());
230 const auto tile_h = std::max(1, get_window().y().step());
231 auto leftover_w = dst_w % tile_w;
232
233 std::string code = "";
234 code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
235 code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
236 code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
237
238 code += " const bool g_cond_x = (g_ind_0 == 0);\n";
239 code += " const bool g_cond_y = (g_ind_1 == 0);\n";
240
241 return code;
242}
243std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const
244{
245 std::string code;
246 switch(var.kernel_argument_info.type)
247 {
248 case GpuKernelArgumentInfo::Type::Vector:
249 {
250 code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")";
251 break;
252 }
253 case GpuKernelArgumentInfo::Type::Image:
254 {
255 code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")";
256 break;
257 }
258 case GpuKernelArgumentInfo::Type::Image_3D:
259 {
260 code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),";
261 code += "\n unsigned int " + var.uniq_name + "_stride_z";
262 break;
263 }
264 case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
265 {
266 code += "\n __read_only image2d_t " + var.uniq_name + "_img,";
267 code += "\n unsigned int " + var.uniq_name + "_stride_z";
268 break;
269 }
270 case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer:
271 {
272 code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)";
273 break;
274 }
275 case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
276 {
277 code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)";
278 break;
279 }
Ramy Elgammal002e6532023-01-11 18:48:04 +0000280 case GpuKernelArgumentInfo::Type::Tensor_3D:
281 {
282 code += "\n TENSOR3D_DECLARATION(" + var.uniq_name + ")";
283 break;
284 }
SiCong Lif44bbc52022-08-29 18:25:51 +0100285 default:
286 {
287 ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type");
288 }
289 }
290 return code;
291}
292std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const
293{
294 std::string code = "\n__kernel void " + write_kernel_name() + "(";
295
296 for(int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
297 {
298 code += write_argument_declaration(argument_list[i]) + ",";
299 }
300 if(static_cast<int>(argument_list.size()) - 1 >= 0)
301 {
302 code += write_argument_declaration(argument_list[argument_list.size() - 1]);
303 }
304
305 code += ')';
306
307 return code;
308}
309std::string ClTemplateWriter::write_kernel_name() const
310{
311 if(_components.empty())
312 {
313 return "empty_kernel";
314 }
315 std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name();
316 for(size_t i = 1; i < _components.size(); ++i)
317 {
318 name += "___";
319 name += _components[i]->template_writer()->get_name();
320 }
321
322 return name;
323}
324} // namespace dynamic_fusion
325} // namespace experimental
326} // namespace arm_compute