blob: c443aade56bf344e3f72ebde99b4c51ad1818f92 [file] [log] [blame]
Anthony Barbier6e433492017-11-09 15:52:00 +00001/*
Anthony Barbier5d9d0192018-01-26 16:38:07 +00002 * Copyright (c) 2017-2018 ARM Limited.
Anthony Barbier6e433492017-11-09 15:52:00 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "OpenCLTimer.h"
25
26#include "../Framework.h"
27#include "../Utils.h"
28
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +010029#include "arm_compute/graph/INode.h"
Anthony Barbier6e433492017-11-09 15:52:00 +000030#include "arm_compute/runtime/CL/CLScheduler.h"
31
32#ifndef ARM_COMPUTE_CL
33#error "You can't use OpenCLTimer without OpenCL"
34#endif /* ARM_COMPUTE_CL */
35
36namespace arm_compute
37{
38namespace test
39{
40namespace framework
41{
Anthony Barbier72f4ae52018-11-07 17:33:54 +000042template <bool output_timestamps>
43std::string OpenCLClock<output_timestamps>::id() const
Anthony Barbier6e433492017-11-09 15:52:00 +000044{
Anthony Barbier72f4ae52018-11-07 17:33:54 +000045 if(output_timestamps)
46 {
47 return "OpenCLTimestamps";
48 }
49 else
50 {
51 return "OpenCLTimer";
52 }
Anthony Barbier6e433492017-11-09 15:52:00 +000053}
54
Anthony Barbier72f4ae52018-11-07 17:33:54 +000055template <bool output_timestamps>
56OpenCLClock<output_timestamps>::OpenCLClock(ScaleFactor scale_factor)
Anthony Barbierc4835212018-05-16 14:20:04 +010057 : _kernels(), _real_function(nullptr), _real_graph_function(nullptr), _prefix(), _timer_enabled(false)
Anthony Barbier6e433492017-11-09 15:52:00 +000058{
59 auto q = CLScheduler::get().queue();
60 cl_command_queue_properties props = q.getInfo<CL_QUEUE_PROPERTIES>();
61 if((props & CL_QUEUE_PROFILING_ENABLE) == 0)
62 {
63 CLScheduler::get().set_queue(cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE));
64 }
Giorgio Arenace58a9f2017-10-31 17:59:17 +000065
66 switch(scale_factor)
67 {
68 case ScaleFactor::NONE:
69 _scale_factor = 1.f;
70 _unit = "ns";
71 break;
72 case ScaleFactor::TIME_US:
73 _scale_factor = 1000.f;
74 _unit = "us";
75 break;
76 case ScaleFactor::TIME_MS:
77 _scale_factor = 1000000.f;
78 _unit = "ms";
79 break;
80 case ScaleFactor::TIME_S:
81 _scale_factor = 1000000000.f;
82 _unit = "s";
83 break;
84 default:
85 ARM_COMPUTE_ERROR("Invalid scale");
86 }
Anthony Barbier6e433492017-11-09 15:52:00 +000087}
88
Anthony Barbier72f4ae52018-11-07 17:33:54 +000089template <bool output_timestamps>
90void OpenCLClock<output_timestamps>::test_start()
Anthony Barbier6e433492017-11-09 15:52:00 +000091{
Anthony Barbier6e433492017-11-09 15:52:00 +000092 // Start intercepting enqueues:
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +010093 ARM_COMPUTE_ERROR_ON(_real_function != nullptr);
94 ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
95 _real_function = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
96 _real_graph_function = graph::TaskExecutor::get().execute_function;
97 auto interceptor = [this](
98 cl_command_queue command_queue,
99 cl_kernel kernel,
100 cl_uint work_dim,
101 const size_t *gwo,
102 const size_t *gws,
103 const size_t *lws,
104 cl_uint num_events_in_wait_list,
105 const cl_event * event_wait_list,
106 cl_event * event)
Anthony Barbier48c19f12018-04-20 11:31:52 +0100107 {
Anthony Barbierc4835212018-05-16 14:20:04 +0100108 if(this->_timer_enabled)
109 {
110 ARM_COMPUTE_ERROR_ON_MSG(event != nullptr, "Not supported");
111 ARM_COMPUTE_UNUSED(event);
Anthony Barbier48c19f12018-04-20 11:31:52 +0100112
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000113 kernel_info info;
114 cl::Kernel cpp_kernel(kernel, true);
115 std::stringstream ss;
Anthony Barbierc4835212018-05-16 14:20:04 +0100116 ss << this->_prefix << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
117 if(gws != nullptr)
118 {
119 ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]";
120 }
121 if(lws != nullptr)
122 {
123 ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]";
124 }
125 info.name = ss.str();
126 cl_event tmp;
127 cl_int retval = this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
128 info.event = tmp;
129 this->_kernels.push_back(std::move(info));
130 return retval;
Anthony Barbier48c19f12018-04-20 11:31:52 +0100131 }
Anthony Barbierc4835212018-05-16 14:20:04 +0100132 else
Anthony Barbier48c19f12018-04-20 11:31:52 +0100133 {
Anthony Barbierc4835212018-05-16 14:20:04 +0100134 return this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, event);
Anthony Barbier48c19f12018-04-20 11:31:52 +0100135 }
Anthony Barbier48c19f12018-04-20 11:31:52 +0100136 };
137
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100138 // Start intercepting tasks:
139 auto task_interceptor = [this](graph::ExecutionTask & task)
140 {
141 if(task.node != nullptr && !task.node->name().empty())
142 {
143 this->_prefix = task.node->name() + "/";
144 }
145 else
146 {
147 this->_prefix = "";
148 }
149 this->_real_graph_function(task);
150 this->_prefix = "";
151 };
152
Anthony Barbier48c19f12018-04-20 11:31:52 +0100153 CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100154 graph::TaskExecutor::get().execute_function = task_interceptor;
Anthony Barbier6e433492017-11-09 15:52:00 +0000155}
156
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000157template <bool output_timestamps>
158void OpenCLClock<output_timestamps>::start()
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100159{
160 _kernels.clear();
Anthony Barbierc4835212018-05-16 14:20:04 +0100161 _timer_enabled = true;
162}
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000163template <bool output_timestamps>
164void OpenCLClock<output_timestamps>::stop()
Anthony Barbierc4835212018-05-16 14:20:04 +0100165{
166 _timer_enabled = false;
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100167}
168
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000169template <bool output_timestamps>
170void OpenCLClock<output_timestamps>::test_stop()
Anthony Barbier6e433492017-11-09 15:52:00 +0000171{
172 // Restore real function
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100173 CLSymbols::get().clEnqueueNDRangeKernel_ptr = _real_function;
174 graph::TaskExecutor::get().execute_function = _real_graph_function;
175 _real_graph_function = nullptr;
176 _real_function = nullptr;
Anthony Barbier6e433492017-11-09 15:52:00 +0000177}
178
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000179template <bool output_timestamps>
180Instrument::MeasurementsMap OpenCLClock<output_timestamps>::measurements() const
Anthony Barbier6e433492017-11-09 15:52:00 +0000181{
182 MeasurementsMap measurements;
183 unsigned int kernel_number = 0;
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100184 for(auto kernel : _kernels)
Anthony Barbier6e433492017-11-09 15:52:00 +0000185 {
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000186 cl_ulong start, end;
187 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
188 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
189 std::string name = kernel.name + " #" + support::cpp11::to_string(kernel_number++);
Anthony Barbier6e433492017-11-09 15:52:00 +0000190
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000191 if(output_timestamps)
192 {
193 measurements.emplace("[start]" + name, Measurement(start / static_cast<cl_ulong>(_scale_factor), _unit));
194 measurements.emplace("[end]" + name, Measurement(end / static_cast<cl_ulong>(_scale_factor), _unit));
195 }
196 else
197 {
198 measurements.emplace(name, Measurement((end - start) / _scale_factor, _unit));
199 }
Anthony Barbier6e433492017-11-09 15:52:00 +0000200 }
201
202 return measurements;
203}
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000204
Anthony Barbiere7f4a432018-11-09 10:58:40 +0000205template <bool output_timestamps>
206Instrument::MeasurementsMap OpenCLClock<output_timestamps>::test_measurements() const
207{
208 MeasurementsMap measurements;
209
210 if(output_timestamps)
211 {
212 // The OpenCL clock and the wall clock are not in sync, so we use
213 // this trick to calculate the offset between the two clocks:
214 ::cl::Event event;
215 cl_ulong now_gpu;
216
217 // Enqueue retrieve current CPU clock and enqueue a dummy marker
218 std::chrono::high_resolution_clock::time_point now_cpu = std::chrono::high_resolution_clock::now();
219 CLScheduler::get().queue().enqueueMarker(&event);
220
221 CLScheduler::get().queue().finish();
222 //Access the time at which the marker was enqueued:
223 event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &now_gpu);
224
225 measurements.emplace("Now Wall clock", Measurement(now_cpu.time_since_epoch().count() / 1000, "us"));
226 measurements.emplace("Now OpenCL", Measurement(now_gpu / static_cast<cl_ulong>(_scale_factor), _unit));
227 }
228
229 return measurements;
230}
231
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000232template class OpenCLClock<true>;
233template class OpenCLClock<false>;
Anthony Barbier6e433492017-11-09 15:52:00 +0000234} // namespace framework
235} // namespace test
236} // namespace arm_compute