blob: 0d62fe3afe2e69411a5b7581ba4a1c63d40ffb15 [file] [log] [blame]
Gian Marcode691f02017-09-08 16:13:11 +01001/*
SiCong Li47f177e2023-02-22 17:24:09 +00002 * Copyright (c) 2017-2023 Arm Limited.
Gian Marcode691f02017-09-08 16:13:11 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CL/CLTuner.h"
25
Gian Marco85e6f512018-02-01 16:57:48 +000026#include "arm_compute/core/Error.h"
Gian Marcode691f02017-09-08 16:13:11 +010027#include "arm_compute/runtime/CL/CLScheduler.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010028#include "arm_compute/runtime/CL/tuners/CLTuningParametersList.h"
29
SiCong Li47f177e2023-02-22 17:24:09 +000030#include "src/common/utils/Log.h"
Sang-Hoon Parkbef7fa22020-10-21 15:58:54 +010031#include "src/core/CL/ICLKernel.h"
Matthew Bentham758b5ba2020-03-05 23:37:48 +000032#include "support/StringSupport.h"
Gian Marcode691f02017-09-08 16:13:11 +010033
Anthony Barbier317fa7f2018-03-01 10:11:22 +000034#include <cerrno>
Anthony Barbier8db83182018-02-27 13:08:00 +000035#include <fstream>
Gian Marcode691f02017-09-08 16:13:11 +010036#include <limits>
Gian Marcode691f02017-09-08 16:13:11 +010037
Gian Marco Iodicea74923c2019-01-31 17:06:54 +000038namespace arm_compute
39{
Manuel Bottinib56c1752020-11-18 17:56:30 +000040CLTuner::CLTuner(bool tune_new_kernels, CLTuningInfo tuning_info)
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010041 : real_clEnqueueNDRangeKernel(nullptr),
42 _tuning_params_table(),
43 _lws_table(),
44 _kernel_event(),
45 _tune_new_kernels(tune_new_kernels),
46 _tuning_info(tuning_info)
Gian Marcode691f02017-09-08 16:13:11 +010047{
48}
49
SiCong Li0a486cf2022-04-07 17:41:51 +010050struct CLTuner::IKernelData
51{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010052 virtual ~IKernelData() = default;
SiCong Li0a486cf2022-04-07 17:41:51 +010053 virtual void do_run(ICLKernel &kernel, cl::CommandQueue &queue) = 0;
54};
55struct DefaultKernelData : public CLTuner::IKernelData
56{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010057 DefaultKernelData(ITensorPack &tensors) : _tensors{tensors}
SiCong Li0a486cf2022-04-07 17:41:51 +010058 {
59 }
60 ~DefaultKernelData() override = default;
61 void do_run(ICLKernel &kernel, cl::CommandQueue &queue) override
62 {
63 const bool inject_memory = !_tensors.empty();
64 inject_memory ? kernel.run_op(_tensors, kernel.window(), queue) : kernel.run(kernel.window(), queue);
65 }
66
67private:
68 ITensorPack &_tensors;
69};
70
Anthony Barbierf5dcf792018-02-28 18:04:45 +000071bool CLTuner::kernel_event_is_set() const
72{
73 return _kernel_event() != nullptr;
74}
Gian Marco85e6f512018-02-01 16:57:48 +000075void CLTuner::set_cl_kernel_event(cl_event kernel_event)
76{
77 _kernel_event = kernel_event;
78}
79
Anthony Barbier8db83182018-02-27 13:08:00 +000080void CLTuner::set_tune_new_kernels(bool tune_new_kernels)
81{
82 _tune_new_kernels = tune_new_kernels;
83}
Anthony Barbier8b811952018-02-28 13:47:58 +000084bool CLTuner::tune_new_kernels() const
85{
86 return _tune_new_kernels;
87}
Anthony Barbier8db83182018-02-27 13:08:00 +000088
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +010089void CLTuner::set_tuner_mode(CLTunerMode mode)
90{
Manuel Bottinibe9f9f92021-01-25 15:07:17 +000091 _tuning_info.tuner_mode = mode;
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +010092}
Manuel Bottinib56c1752020-11-18 17:56:30 +000093
Georgios Pinitasc0d1c862018-03-23 15:13:15 +000094void CLTuner::tune_kernel_static(ICLKernel &kernel)
95{
96 ARM_COMPUTE_UNUSED(kernel);
97}
98
99void CLTuner::tune_kernel_dynamic(ICLKernel &kernel)
Gian Marcode691f02017-09-08 16:13:11 +0100100{
Georgios Pinitas0499dff2020-07-31 22:21:38 +0100101 ITensorPack pack;
102 tune_kernel_dynamic(kernel, pack);
Georgios Pinitas9c82e012020-07-17 12:47:56 +0100103}
104
SiCong Li0a486cf2022-04-07 17:41:51 +0100105void CLTuner::do_tune_kernel_dynamic(ICLKernel &kernel, IKernelData *data)
Georgios Pinitas9c82e012020-07-17 12:47:56 +0100106{
Giorgio Arena5d42b462019-07-26 15:54:20 +0100107 // Get the configuration ID from the kernel and append GPU target name and number of available compute units
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100108 const std::string config_id = kernel.config_id() + "_" + string_from_target(kernel.get_target()) + "_MP" +
109 support::cpp11::to_string(CLKernelLibrary::get().get_num_compute_units());
Anthony Barbier8db83182018-02-27 13:08:00 +0000110
Giorgio Arena5d42b462019-07-26 15:54:20 +0100111 // Check if we need to find the Optimal LWS. If the kernel's config_id is equal to default_config_id, the kernel does not require to be tuned
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100112 if (kernel.config_id() != arm_compute::default_config_id)
Gian Marco85e6f512018-02-01 16:57:48 +0000113 {
Manuel Bottinib56c1752020-11-18 17:56:30 +0000114 auto p = _tuning_params_table.find(config_id);
Anthony Barbier8db83182018-02-27 13:08:00 +0000115
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100116 if (p == _tuning_params_table.end())
Anthony Barbier8db83182018-02-27 13:08:00 +0000117 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100118 if (_tune_new_kernels)
Anthony Barbier8db83182018-02-27 13:08:00 +0000119 {
120 // Find the optimal LWS for the kernel
SiCong Li0a486cf2022-04-07 17:41:51 +0100121 CLTuningParams opt_tuning_params = find_optimal_tuning_params(kernel, data);
Anthony Barbier8db83182018-02-27 13:08:00 +0000122
123 // Insert the optimal LWS in the table
Manuel Bottinib56c1752020-11-18 17:56:30 +0000124 add_tuning_params(config_id, opt_tuning_params);
Anthony Barbier8db83182018-02-27 13:08:00 +0000125
126 // Set Local-Workgroup-Size
Manuel Bottinib56c1752020-11-18 17:56:30 +0000127 kernel.set_lws_hint(opt_tuning_params.get_lws());
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100128 if (_tuning_info.tune_wbsm)
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000129 {
130 kernel.set_wbsm_hint(opt_tuning_params.get_wbsm());
131 }
Anthony Barbier8db83182018-02-27 13:08:00 +0000132 }
133 }
134 else
135 {
136 // Set Local-Workgroup-Size
Manuel Bottinib56c1752020-11-18 17:56:30 +0000137 kernel.set_lws_hint(p->second.get_lws());
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100138 if (_tuning_info.tune_wbsm)
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000139 {
140 kernel.set_wbsm_hint(p->second.get_wbsm());
141 }
Anthony Barbier8db83182018-02-27 13:08:00 +0000142 }
143 }
144}
SiCong Li0a486cf2022-04-07 17:41:51 +0100145void CLTuner::tune_kernel_dynamic(ICLKernel &kernel, ITensorPack &tensors)
146{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100147 DefaultKernelData data{tensors};
SiCong Li0a486cf2022-04-07 17:41:51 +0100148
149 do_tune_kernel_dynamic(kernel, &data);
150}
151
Manuel Bottinib56c1752020-11-18 17:56:30 +0000152void CLTuner::add_tuning_params(const std::string &kernel_id, CLTuningParams optimal_tuning_params)
153{
154 _tuning_params_table.emplace(kernel_id, optimal_tuning_params);
155}
156
SiCong Li0a486cf2022-04-07 17:41:51 +0100157CLTuningParams CLTuner::find_optimal_tuning_params(ICLKernel &kernel, IKernelData *data)
Anthony Barbier8db83182018-02-27 13:08:00 +0000158{
Georgios Pinitas4632e5e2019-02-06 14:47:59 +0000159 // Profiling queue
160 cl::CommandQueue queue_profiler;
161
162 // Extract real OpenCL function to intercept
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100163 if (real_clEnqueueNDRangeKernel == nullptr)
Anthony Barbier8db83182018-02-27 13:08:00 +0000164 {
165 real_clEnqueueNDRangeKernel = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
Gian Marco85e6f512018-02-01 16:57:48 +0000166 }
Georgios Pinitas4632e5e2019-02-06 14:47:59 +0000167
168 // Get the default queue
169 cl::CommandQueue default_queue = CLScheduler::get().queue();
170
171 // Check if we can use the OpenCL timer with the default queue
172 cl_command_queue_properties props = default_queue.getInfo<CL_QUEUE_PROPERTIES>();
173
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100174 if ((props & CL_QUEUE_PROFILING_ENABLE) == 0)
Georgios Pinitas4632e5e2019-02-06 14:47:59 +0000175 {
176 // Set the queue for profiling
177 queue_profiler = cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE);
178 }
179 else
180 {
181 queue_profiler = default_queue;
182 }
183
Gian Marco85e6f512018-02-01 16:57:48 +0000184 // Start intercepting enqueues:
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100185 auto interceptor = [this](cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim, const size_t *gwo,
186 const size_t *gws, const size_t *lws, cl_uint num_events_in_wait_list,
187 const cl_event *event_wait_list, cl_event *event)
Anthony Barbier48c19f12018-04-20 11:31:52 +0100188 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100189 if (this->kernel_event_is_set())
Anthony Barbier48c19f12018-04-20 11:31:52 +0100190 {
191 // If the event is already set it means the kernel enqueue is sliced: given that we only time the first slice we can save time by skipping the other enqueues.
192 return CL_SUCCESS;
193 }
194 cl_event tmp;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100195 cl_int retval = this->real_clEnqueueNDRangeKernel(command_queue, kernel, work_dim, gwo, gws, lws,
196 num_events_in_wait_list, event_wait_list, &tmp);
Anthony Barbier48c19f12018-04-20 11:31:52 +0100197
198 // Set OpenCL event
199 this->set_cl_kernel_event(tmp);
200
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100201 if (event != nullptr)
Vidhya Sudhan Loganathanca65af32019-02-07 11:14:42 +0000202 {
203 //return cl_event from the intercepted call
204 clRetainEvent(tmp);
205 *event = tmp;
206 }
Anthony Barbier48c19f12018-04-20 11:31:52 +0100207 return retval;
208 };
209 CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
Gian Marcode691f02017-09-08 16:13:11 +0100210
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +0100211 // Run the kernel with default lws to be used as baseline
SiCong Li0a486cf2022-04-07 17:41:51 +0100212 data->do_run(kernel, queue_profiler);
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +0100213
SiCong Li47f177e2023-02-22 17:24:09 +0000214 /// Get the cached gws used by the kernel
215 /// NOTE: The window configured inside configure() is usually changed in run(). Thus we should not calculate gws
216 /// from this static window. Instead we get the real gws used (and cached) by run() in the previous step.
217 /// This is only a temporary workaround. An ideal solution involves decoupling the execution window from run() / run_op()
218 /// Please see COMPMID-5934
219 cl::NDRange gws = kernel.get_cached_gws();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100220 ARM_COMPUTE_LOG_MSG_WITH_FORMAT_ACL(
221 arm_compute::logging::LogLevel::INFO,
222 "[CLTuner] Kernel with config_id '%s' uses %s as the upper-bound for lws search", kernel.config_id().c_str(),
223 to_string(gws).c_str());
SiCong Li47f177e2023-02-22 17:24:09 +0000224
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +0100225 queue_profiler.finish();
226
227 const cl_ulong start = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
228 const cl_ulong end = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
229 cl_ulong min_exec_time = end - start;
230 _kernel_event = nullptr;
231
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000232 CLTuningParams opt_tuning_params(cl::NullRange, 0);
Gian Marcode691f02017-09-08 16:13:11 +0100233
Manuel Bottinib56c1752020-11-18 17:56:30 +0000234 // Construct the list of tuning parameters values to be tested based on the tuner mode.
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000235 auto tuning_list = cl_tuner::get_tuning_parameters_list(_tuning_info, gws);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100236 for (size_t i = 0; i < tuning_list->size(); ++i)
Gian Marco85e6f512018-02-01 16:57:48 +0000237 {
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000238 CLTuningParams tuning_test = (*tuning_list)[i];
239 // Setting the lws
240 cl::NDRange lws_test = tuning_test.get_lws();
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +0100241 auto x = lws_test[0];
242 auto y = lws_test[1];
243 auto z = lws_test[2];
Gian Marco Iodicedeaed2d2019-05-14 17:11:53 +0100244 const bool invalid_lws = (x * y * z > kernel.get_max_workgroup_size()) || (x == 1 && y == 1 && z == 1);
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +0100245
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100246 if (invalid_lws)
Gian Marcode691f02017-09-08 16:13:11 +0100247 {
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +0100248 continue;
249 }
Gian Marco85e6f512018-02-01 16:57:48 +0000250
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +0100251 kernel.set_lws_hint(lws_test);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100252 if (_tuning_info.tune_wbsm && CLKernelLibrary::get().is_wbsm_supported())
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000253 {
254 cl_int wbsm_test = tuning_test.get_wbsm();
255 kernel.set_wbsm_hint(wbsm_test);
256 }
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100257 ARM_COMPUTE_LOG_MSG_WITH_FORMAT_ACL(arm_compute::logging::LogLevel::INFO, "[CLTuner] Trying LWS: %s, WBSM: %d",
SiCong Li47f177e2023-02-22 17:24:09 +0000258 to_string(kernel.lws_hint()).c_str(), kernel.wbsm_hint());
Gian Marco Iodicea74923c2019-01-31 17:06:54 +0000259
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +0100260 // Run the kernel
SiCong Li0a486cf2022-04-07 17:41:51 +0100261 data->do_run(kernel, queue_profiler);
Gian Marco85e6f512018-02-01 16:57:48 +0000262
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +0100263 queue_profiler.finish();
Gian Marcoc78d4bc2018-01-25 13:49:44 +0000264
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +0100265 const cl_ulong start = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
266 const cl_ulong end = _kernel_event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
267 const cl_ulong diff = end - start;
268 _kernel_event = nullptr;
Gian Marco Iodicedeaed2d2019-05-14 17:11:53 +0100269
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +0100270 // Check the execution time
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100271 if (diff < min_exec_time)
Vidhya Sudhan Loganathan050471e2019-04-25 09:27:24 +0100272 {
273 min_exec_time = diff;
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000274 opt_tuning_params.set_lws(tuning_test.get_lws());
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100275 if (_tuning_info.tune_wbsm)
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000276 {
277 opt_tuning_params.set_wbsm(tuning_test.get_wbsm());
278 }
Gian Marcode691f02017-09-08 16:13:11 +0100279 }
280 }
281
Gian Marco85e6f512018-02-01 16:57:48 +0000282 // Restore real function
Anthony Barbier8db83182018-02-27 13:08:00 +0000283 CLSymbols::get().clEnqueueNDRangeKernel_ptr = real_clEnqueueNDRangeKernel;
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000284 return opt_tuning_params;
Gian Marcode691f02017-09-08 16:13:11 +0100285}
286
Manuel Bottinib56c1752020-11-18 17:56:30 +0000287const std::unordered_map<std::string, CLTuningParams> &CLTuner::tuning_params_table() const
288{
289 return _tuning_params_table;
290}
291
292void CLTuner::import_tuning_params(const std::unordered_map<std::string, CLTuningParams> &tuning_params_table)
293{
294 _tuning_params_table.clear();
295 _tuning_params_table = tuning_params_table;
296}
297
Anthony Barbier8b811952018-02-28 13:47:58 +0000298void CLTuner::load_from_file(const std::string &filename)
Gian Marco85e6f512018-02-01 16:57:48 +0000299{
Anthony Barbier8b811952018-02-28 13:47:58 +0000300 std::ifstream fs;
Anthony Barbierf5dcf792018-02-28 18:04:45 +0000301 fs.exceptions(std::ifstream::badbit);
Anthony Barbier8b811952018-02-28 13:47:58 +0000302 fs.open(filename, std::ios::in);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100303 if (!fs.is_open())
Anthony Barbier8db83182018-02-27 13:08:00 +0000304 {
Michalis Spyrou7c60c992019-10-10 14:33:47 +0100305 ARM_COMPUTE_ERROR_VAR("Failed to open '%s' (%s [%d])", filename.c_str(), strerror(errno), errno);
Anthony Barbier317fa7f2018-03-01 10:11:22 +0000306 }
307 std::string line;
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000308 bool header_line = true;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100309 while (!std::getline(fs, line).fail())
Anthony Barbier317fa7f2018-03-01 10:11:22 +0000310 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100311 if (header_line)
Anthony Barbier8db83182018-02-27 13:08:00 +0000312 {
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000313 header_line = false;
314 size_t pos_lws = line.find("lws");
315 size_t pos_wbsm = line.find("wbsm");
316 _tuning_info.tune_wbsm = false;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100317 if (pos_lws != std::string::npos || pos_wbsm != std::string::npos)
Anthony Barbier8db83182018-02-27 13:08:00 +0000318 {
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000319 // The file has in the first line the parameters it has been tuned on
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100320 if (pos_wbsm != std::string::npos)
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000321 {
322 _tuning_info.tune_wbsm = true;
323 }
324 // Once the line with the tuning parameter is read we can
325 // read the next one to start collecting the values
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100326 if (std::getline(fs, line).fail())
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000327 {
328 break;
329 }
Anthony Barbier8db83182018-02-27 13:08:00 +0000330 }
Anthony Barbier8db83182018-02-27 13:08:00 +0000331 }
Anthony Barbier317fa7f2018-03-01 10:11:22 +0000332
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000333 CLTuningParams tuning_params;
334 size_t pos = line.find(";");
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100335 if (pos == std::string::npos)
Anthony Barbier317fa7f2018-03-01 10:11:22 +0000336 {
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000337 ARM_COMPUTE_ERROR_VAR("Malformed row '%s' in %s", line.c_str(), filename.c_str());
Anthony Barbier317fa7f2018-03-01 10:11:22 +0000338 }
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000339 std::string kernel_id = line.substr(0, pos);
340 line.erase(0, pos + 1);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100341 if (!tuning_params.from_string(_tuning_info, line))
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000342 {
343 ARM_COMPUTE_ERROR_VAR("Malformed row '%s' in %s", line.c_str(), filename.c_str());
344 }
345 add_tuning_params(kernel_id, tuning_params);
Anthony Barbier8db83182018-02-27 13:08:00 +0000346 }
Anthony Barbier317fa7f2018-03-01 10:11:22 +0000347 fs.close();
Gian Marco85e6f512018-02-01 16:57:48 +0000348}
349
Manuel Bottinib56c1752020-11-18 17:56:30 +0000350bool CLTuner::save_to_file(const std::string &filename) const
Gian Marco85e6f512018-02-01 16:57:48 +0000351{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100352 if (!_tune_new_kernels || _tuning_params_table.empty() || filename.empty())
Manuel Bottinib56c1752020-11-18 17:56:30 +0000353 {
354 return false;
355 }
Anthony Barbier8b811952018-02-28 13:47:58 +0000356 std::ofstream fs;
357 fs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
358 fs.open(filename, std::ios::out);
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000359 std::string header_string = "";
360 header_string += "lws";
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100361 if (_tuning_info.tune_wbsm)
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000362 {
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100363 if (!header_string.empty())
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000364 {
365 header_string += " ";
366 }
367 header_string += "wbsm";
368 }
369 fs << header_string << std::endl;
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100370 for (auto const &kernel_data : _tuning_params_table)
Anthony Barbier8db83182018-02-27 13:08:00 +0000371 {
Manuel Bottinibe9f9f92021-01-25 15:07:17 +0000372 CLTuningParams tun_pams(kernel_data.second);
373 fs << kernel_data.first << tun_pams.to_string(_tuning_info) << std::endl;
Anthony Barbier8db83182018-02-27 13:08:00 +0000374 }
375 fs.close();
Manuel Bottinib56c1752020-11-18 17:56:30 +0000376 return true;
Anthony Barbier8db83182018-02-27 13:08:00 +0000377}
Matthew Bentham758b5ba2020-03-05 23:37:48 +0000378} // namespace arm_compute