blob: c9dd13e25c1cab02122a56e95bf7370e4b7a3318 [file] [log] [blame]
Michalis Spyrou11d49182020-03-26 10:31:32 +00001/*
Manuel Bottinibe9f9f92021-01-25 15:07:17 +00002 * Copyright (c) 2020-2021 Arm Limited.
Michalis Spyrou11d49182020-03-26 10:31:32 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/core/CL/CLCompileContext.h"
25#include "arm_compute/core/CL/OpenCL.h"
26
27#include "arm_compute/core/CL/CLHelpers.h"
28#include "arm_compute/core/Error.h"
29#include "arm_compute/core/Utils.h"
30#include "support/StringSupport.h"
31
Giorgio Arenaea8d2662021-05-20 11:36:56 +010032#include <regex>
33
Michalis Spyrou11d49182020-03-26 10:31:32 +000034namespace arm_compute
35{
36CLBuildOptions::CLBuildOptions()
37 : _build_opts()
38{
39}
40
41void CLBuildOptions::add_option(std::string option)
42{
43 _build_opts.emplace(std::move(option));
44}
45
46void CLBuildOptions::add_option_if(bool cond, std::string option)
47{
48 if(cond)
49 {
50 add_option(std::move(option));
51 }
52}
53
54void CLBuildOptions::add_option_if_else(bool cond, std::string option_true, std::string option_false)
55{
56 (cond) ? add_option(std::move(option_true)) : add_option(std::move(option_false));
57}
58
59void CLBuildOptions::add_options(const StringSet &options)
60{
61 _build_opts.insert(options.begin(), options.end());
62}
63
64void CLBuildOptions::add_options_if(bool cond, const StringSet &options)
65{
66 if(cond)
67 {
68 add_options(options);
69 }
70}
71
72const CLBuildOptions::StringSet &CLBuildOptions::options() const
73{
74 return _build_opts;
75}
76
77Program::Program()
78 : _context(), _device(), _is_binary(false), _name(), _source(), _binary()
79{
80}
81
82Program::Program(cl::Context context, std::string name, std::string source)
83 : _context(std::move(context)), _device(), _is_binary(false), _name(std::move(name)), _source(std::move(source)), _binary()
84{
85}
86
87Program::Program(cl::Context context, cl::Device device, std::string name, std::vector<unsigned char> binary)
88 : _context(std::move(context)), _device(std::move(device)), _is_binary(true), _name(std::move(name)), _source(), _binary(std::move(binary))
89{
90}
91
92Program::operator cl::Program() const
93{
94 if(_is_binary)
95 {
96 return cl::Program(_context, { _device }, { _binary });
97 }
98 else
99 {
100 return cl::Program(_context, _source, false);
101 }
102}
103
104bool Program::build(const cl::Program &program, const std::string &build_options)
105{
106 try
107 {
108 return program.build(build_options.c_str()) == CL_SUCCESS;
109 }
110 catch(const cl::Error &e)
111 {
112 cl_int err = CL_SUCCESS;
113 const auto build_info = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(&err);
114
115 for(auto &pair : build_info)
116 {
117 std::cerr << pair.second << std::endl;
118 }
119
120 return false;
121 }
122}
123
124cl::Program Program::build(const std::string &build_options) const
125{
126 cl::Program cl_program = static_cast<cl::Program>(*this);
127 build(cl_program, build_options);
128 return cl_program;
129}
130
131Kernel::Kernel()
132 : _name(), _kernel()
133{
134}
135
136Kernel::Kernel(std::string name, const cl::Program &program)
137 : _name(std::move(name)),
138 _kernel(cl::Kernel(program, _name.c_str()))
139{
140}
141CLCompileContext::CLCompileContext()
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000142 : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
Michalis Spyrou11d49182020-03-26 10:31:32 +0000143{
144}
145
146CLCompileContext::CLCompileContext(cl::Context context, const cl::Device &device)
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000147 : _context(), _device(), _programs_map(), _built_programs_map(), _is_wbsm_supported()
Michalis Spyrou11d49182020-03-26 10:31:32 +0000148{
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000149 _context = std::move(context);
150 _device = CLDevice(device);
151 _is_wbsm_supported = get_wbsm_support_info(device);
Michalis Spyrou11d49182020-03-26 10:31:32 +0000152}
153
154Kernel CLCompileContext::create_kernel(const std::string &kernel_name, const std::string &program_name, const std::string &program_source,
155 const std::string &kernel_path, const StringSet &build_options_set, bool is_binary) const
156{
157 const std::string build_options = generate_build_options(build_options_set, kernel_path);
158 const std::string built_program_name = program_name + "_" + build_options;
159 auto built_program_it = _built_programs_map.find(built_program_name);
160 cl::Program cl_program;
161
162 if(_built_programs_map.end() != built_program_it)
163 {
164 // If program has been built, retrieve to create kernel from it
165 cl_program = built_program_it->second;
166 }
167 else
168 {
169 Program program = load_program(program_name, program_source, is_binary);
170
171 // Build program
172 cl_program = program.build(build_options);
173
174 // Add built program to internal map
Michalis Spyrou12910f22020-04-14 13:54:45 +0100175 _built_programs_map.emplace(built_program_name, cl_program);
Michalis Spyrou11d49182020-03-26 10:31:32 +0000176 }
177
178 // Create and return kernel
179 return Kernel(kernel_name, cl_program);
180}
181
182const Program &CLCompileContext::load_program(const std::string &program_name, const std::string &program_source, bool is_binary) const
183{
184 const auto program_it = _programs_map.find(program_name);
185
186 if(program_it != _programs_map.end())
187 {
188 return program_it->second;
189 }
190
191 Program program;
192
193#ifdef EMBEDDED_KERNELS
194 ARM_COMPUTE_UNUSED(is_binary);
195 program = Program(_context, program_name, program_source);
196#else /* EMBEDDED_KERNELS */
197 if(is_binary)
198 {
199 program = Program(_context, _device.cl_device(), program_name, std::vector<unsigned char>(program_source.begin(), program_source.end()));
200 }
201 else
202 {
203 program = Program(_context, program_name, program_source);
204 }
205#endif /* EMBEDDED_KERNELS */
206
207 // Insert program to program map
208 const auto new_program = _programs_map.emplace(program_name, std::move(program));
209
210 return new_program.first->second;
211}
212
213void CLCompileContext::set_context(cl::Context context)
214{
215 _context = std::move(context);
216 if(_context.get() != nullptr)
217 {
218 const auto cl_devices = _context.getInfo<CL_CONTEXT_DEVICES>();
219
220 if(!cl_devices.empty())
221 {
222 _device = CLDevice(cl_devices[0]);
223 }
224 }
225}
226
227std::string CLCompileContext::generate_build_options(const StringSet &build_options_set, const std::string &kernel_path) const
228{
229 std::string concat_str;
230
231#if defined(ARM_COMPUTE_DEBUG_ENABLED)
232 // Enable debug properties in CL kernels
233 concat_str += " -DARM_COMPUTE_DEBUG_ENABLED";
234#endif // defined(ARM_COMPUTE_DEBUG_ENABLED)
235
236 GPUTarget gpu_arch = get_arch_from_target(_device.target());
237 concat_str += " -DGPU_ARCH=" + support::cpp11::to_string(
238 static_cast<std::underlying_type<GPUTarget>::type>(gpu_arch));
239
240 if(_device.supported("cl_khr_fp16"))
241 {
242 concat_str += " -DARM_COMPUTE_OPENCL_FP16_ENABLED=1 ";
243 }
244
245 if(_device.supported("cl_arm_integer_dot_product_int8"))
246 {
247 concat_str += " -DARM_COMPUTE_OPENCL_DOT8_ENABLED=1 ";
248 }
249
250 if(_device.supported("cl_arm_integer_dot_product_accumulate_int8"))
251 {
252 concat_str += " -DARM_COMPUTE_OPENCL_DOT8_ACC_ENABLED=1 ";
253 }
254
255 if(_device.version() == CLVersion::CL20)
256 {
257 concat_str += " -cl-std=CL2.0 ";
258 }
259 else if(_device.supported("cl_arm_non_uniform_work_group_size"))
260 {
261 concat_str += " -cl-arm-non-uniform-work-group-size ";
262 }
263 else
264 {
265 ARM_COMPUTE_ERROR("Non uniform workgroup size is not supported!!");
266 }
267
Giorgio Arenabf1dbd82021-05-20 14:17:23 +0100268 if(gpu_arch != GPUTarget::UNKNOWN && gpu_arch != GPUTarget::MIDGARD)
Giorgio Arenaea8d2662021-05-20 11:36:56 +0100269 {
270 const std::string device_vers = _device.device_version();
271 const std::regex ddk_regex("r([0-9]*)p[0-9]");
272 std::smatch ddk_match;
273
Giorgio Arenaf7f64502021-06-30 14:26:40 +0100274 if(std::regex_search(device_vers, ddk_match, ddk_regex) && std::stoi(ddk_match[1]) >= 11)
Giorgio Arenaea8d2662021-05-20 11:36:56 +0100275 {
276 concat_str += " -DUNROLL_WITH_PRAGMA ";
277 }
278 }
279
Michalis Spyrou11d49182020-03-26 10:31:32 +0000280 std::string build_options = stringify_set(build_options_set, kernel_path) + concat_str;
281
282 return build_options;
283}
284
285bool CLCompileContext::fp16_supported() const
286{
287 return _device.supported("cl_khr_fp16");
288}
289
290std::string CLCompileContext::stringify_set(const StringSet &s, const std::string &kernel_path) const
291{
292 std::string concat_set;
293#ifndef EMBEDDED_KERNELS
294 concat_set += "-I" + kernel_path + " ";
295#else /* EMBEDDED_KERNELS */
296 ARM_COMPUTE_UNUSED(kernel_path);
297#endif /* EMBEDDED_KERNELS */
298
299 // Concatenate set
300 for(const auto &el : s)
301 {
302 concat_set += " " + el;
303 }
304
305 return concat_set;
306}
307
308void CLCompileContext::add_built_program(const std::string &built_program_name, const cl::Program &program) const
309{
310 _built_programs_map.emplace(built_program_name, program);
311}
312
313void CLCompileContext::clear_programs_cache()
314{
315 _programs_map.clear();
316 _built_programs_map.clear();
317}
318
319const std::map<std::string, cl::Program> &CLCompileContext::get_built_programs() const
320{
321 return _built_programs_map;
322}
323
324cl::Context &CLCompileContext::context()
325{
326 return _context;
327}
328
329const cl::Device &CLCompileContext::get_device() const
330{
331 return _device.cl_device();
332}
333
334void CLCompileContext::set_device(cl::Device device)
335{
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000336 _device = std::move(device);
337 _is_wbsm_supported = get_wbsm_support_info(device);
Michalis Spyrou11d49182020-03-26 10:31:32 +0000338}
339
340cl::NDRange CLCompileContext::default_ndrange() const
341{
342 GPUTarget _target = get_target_from_device(_device.cl_device());
343 cl::NDRange default_range;
344
345 switch(_target)
346 {
347 case GPUTarget::MIDGARD:
348 case GPUTarget::T600:
349 case GPUTarget::T700:
350 case GPUTarget::T800:
351 default_range = cl::NDRange(128u, 1);
352 break;
353 default:
354 default_range = cl::NullRange;
355 }
356
357 return default_range;
358}
359
360bool CLCompileContext::int64_base_atomics_supported() const
361{
362 return _device.supported("cl_khr_int64_base_atomics");
363}
364
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000365bool CLCompileContext::is_wbsm_supported() const
366{
367 return _is_wbsm_supported;
368}
369
Michalis Spyrou11d49182020-03-26 10:31:32 +0000370size_t CLCompileContext::max_local_workgroup_size(const cl::Kernel &kernel) const
371{
372 size_t result;
373
374 size_t err = kernel.getWorkGroupInfo(_device.cl_device(), CL_KERNEL_WORK_GROUP_SIZE, &result);
375 ARM_COMPUTE_ERROR_ON_MSG(err != 0, "clGetKernelWorkGroupInfo failed to return the maximum workgroup size for the kernel");
376 ARM_COMPUTE_UNUSED(err);
377
378 return result;
379}
380
381std::string CLCompileContext::get_device_version() const
382{
383 return _device.device_version();
384}
385
386cl_uint CLCompileContext::get_num_compute_units() const
387{
388 return _device.compute_units();
389}
390} // namespace arm_compute