blob: ca859b6fd9c5b3d785f2adf85ed08264d8c0004e [file] [log] [blame]
Anthony Barbier6e433492017-11-09 15:52:00 +00001/*
Vidhya Sudhan Loganathanca65af32019-02-07 11:14:42 +00002 * Copyright (c) 2017-2019 ARM Limited.
Anthony Barbier6e433492017-11-09 15:52:00 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "OpenCLTimer.h"
25
26#include "../Framework.h"
27#include "../Utils.h"
28
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +010029#include "arm_compute/graph/INode.h"
Anthony Barbier6e433492017-11-09 15:52:00 +000030#include "arm_compute/runtime/CL/CLScheduler.h"
31
32#ifndef ARM_COMPUTE_CL
33#error "You can't use OpenCLTimer without OpenCL"
34#endif /* ARM_COMPUTE_CL */
35
36namespace arm_compute
37{
38namespace test
39{
40namespace framework
41{
Anthony Barbier72f4ae52018-11-07 17:33:54 +000042template <bool output_timestamps>
43std::string OpenCLClock<output_timestamps>::id() const
Anthony Barbier6e433492017-11-09 15:52:00 +000044{
Anthony Barbier72f4ae52018-11-07 17:33:54 +000045 if(output_timestamps)
46 {
47 return "OpenCLTimestamps";
48 }
49 else
50 {
51 return "OpenCLTimer";
52 }
Anthony Barbier6e433492017-11-09 15:52:00 +000053}
54
Anthony Barbier72f4ae52018-11-07 17:33:54 +000055template <bool output_timestamps>
56OpenCLClock<output_timestamps>::OpenCLClock(ScaleFactor scale_factor)
Anthony Barbierc4835212018-05-16 14:20:04 +010057 : _kernels(), _real_function(nullptr), _real_graph_function(nullptr), _prefix(), _timer_enabled(false)
Anthony Barbier6e433492017-11-09 15:52:00 +000058{
59 auto q = CLScheduler::get().queue();
60 cl_command_queue_properties props = q.getInfo<CL_QUEUE_PROPERTIES>();
61 if((props & CL_QUEUE_PROFILING_ENABLE) == 0)
62 {
63 CLScheduler::get().set_queue(cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE));
64 }
Giorgio Arenace58a9f2017-10-31 17:59:17 +000065
66 switch(scale_factor)
67 {
68 case ScaleFactor::NONE:
69 _scale_factor = 1.f;
70 _unit = "ns";
71 break;
72 case ScaleFactor::TIME_US:
73 _scale_factor = 1000.f;
74 _unit = "us";
75 break;
76 case ScaleFactor::TIME_MS:
77 _scale_factor = 1000000.f;
78 _unit = "ms";
79 break;
80 case ScaleFactor::TIME_S:
81 _scale_factor = 1000000000.f;
82 _unit = "s";
83 break;
84 default:
85 ARM_COMPUTE_ERROR("Invalid scale");
86 }
Anthony Barbier6e433492017-11-09 15:52:00 +000087}
88
Anthony Barbier72f4ae52018-11-07 17:33:54 +000089template <bool output_timestamps>
90void OpenCLClock<output_timestamps>::test_start()
Anthony Barbier6e433492017-11-09 15:52:00 +000091{
Anthony Barbier6e433492017-11-09 15:52:00 +000092 // Start intercepting enqueues:
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +010093 ARM_COMPUTE_ERROR_ON(_real_function != nullptr);
94 ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
95 _real_function = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
96 _real_graph_function = graph::TaskExecutor::get().execute_function;
97 auto interceptor = [this](
98 cl_command_queue command_queue,
99 cl_kernel kernel,
100 cl_uint work_dim,
101 const size_t *gwo,
102 const size_t *gws,
103 const size_t *lws,
104 cl_uint num_events_in_wait_list,
105 const cl_event * event_wait_list,
106 cl_event * event)
Anthony Barbier48c19f12018-04-20 11:31:52 +0100107 {
Anthony Barbierc4835212018-05-16 14:20:04 +0100108 if(this->_timer_enabled)
109 {
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000110 kernel_info info;
111 cl::Kernel cpp_kernel(kernel, true);
112 std::stringstream ss;
Anthony Barbierc4835212018-05-16 14:20:04 +0100113 ss << this->_prefix << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
114 if(gws != nullptr)
115 {
116 ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]";
117 }
118 if(lws != nullptr)
119 {
120 ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]";
121 }
122 info.name = ss.str();
123 cl_event tmp;
124 cl_int retval = this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
125 info.event = tmp;
126 this->_kernels.push_back(std::move(info));
Vidhya Sudhan Loganathanca65af32019-02-07 11:14:42 +0000127
128 if(event != nullptr)
129 {
130 //return cl_event from the intercepted call
131 clRetainEvent(tmp);
132 *event = tmp;
133 }
Anthony Barbierc4835212018-05-16 14:20:04 +0100134 return retval;
Anthony Barbier48c19f12018-04-20 11:31:52 +0100135 }
Anthony Barbierc4835212018-05-16 14:20:04 +0100136 else
Anthony Barbier48c19f12018-04-20 11:31:52 +0100137 {
Anthony Barbierc4835212018-05-16 14:20:04 +0100138 return this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, event);
Anthony Barbier48c19f12018-04-20 11:31:52 +0100139 }
Anthony Barbier48c19f12018-04-20 11:31:52 +0100140 };
141
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100142 // Start intercepting tasks:
143 auto task_interceptor = [this](graph::ExecutionTask & task)
144 {
145 if(task.node != nullptr && !task.node->name().empty())
146 {
147 this->_prefix = task.node->name() + "/";
148 }
149 else
150 {
151 this->_prefix = "";
152 }
153 this->_real_graph_function(task);
154 this->_prefix = "";
155 };
156
Anthony Barbier48c19f12018-04-20 11:31:52 +0100157 CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100158 graph::TaskExecutor::get().execute_function = task_interceptor;
Anthony Barbier6e433492017-11-09 15:52:00 +0000159}
160
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000161template <bool output_timestamps>
162void OpenCLClock<output_timestamps>::start()
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100163{
164 _kernels.clear();
Anthony Barbierc4835212018-05-16 14:20:04 +0100165 _timer_enabled = true;
166}
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000167template <bool output_timestamps>
168void OpenCLClock<output_timestamps>::stop()
Anthony Barbierc4835212018-05-16 14:20:04 +0100169{
170 _timer_enabled = false;
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100171}
172
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000173template <bool output_timestamps>
174void OpenCLClock<output_timestamps>::test_stop()
Anthony Barbier6e433492017-11-09 15:52:00 +0000175{
176 // Restore real function
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100177 CLSymbols::get().clEnqueueNDRangeKernel_ptr = _real_function;
178 graph::TaskExecutor::get().execute_function = _real_graph_function;
179 _real_graph_function = nullptr;
180 _real_function = nullptr;
Anthony Barbier6e433492017-11-09 15:52:00 +0000181}
182
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000183template <bool output_timestamps>
184Instrument::MeasurementsMap OpenCLClock<output_timestamps>::measurements() const
Anthony Barbier6e433492017-11-09 15:52:00 +0000185{
186 MeasurementsMap measurements;
187 unsigned int kernel_number = 0;
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100188 for(auto kernel : _kernels)
Anthony Barbier6e433492017-11-09 15:52:00 +0000189 {
Anthony Barbier1e1bef12018-11-20 16:54:42 +0000190 cl_ulong queued, flushed, start, end;
191 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &queued);
192 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &flushed);
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000193 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
194 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
195 std::string name = kernel.name + " #" + support::cpp11::to_string(kernel_number++);
Anthony Barbier6e433492017-11-09 15:52:00 +0000196
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000197 if(output_timestamps)
198 {
199 measurements.emplace("[start]" + name, Measurement(start / static_cast<cl_ulong>(_scale_factor), _unit));
Anthony Barbier1e1bef12018-11-20 16:54:42 +0000200 measurements.emplace("[queued]" + name, Measurement(queued / static_cast<cl_ulong>(_scale_factor), _unit));
201 measurements.emplace("[flushed]" + name, Measurement(flushed / static_cast<cl_ulong>(_scale_factor), _unit));
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000202 measurements.emplace("[end]" + name, Measurement(end / static_cast<cl_ulong>(_scale_factor), _unit));
203 }
204 else
205 {
206 measurements.emplace(name, Measurement((end - start) / _scale_factor, _unit));
207 }
Anthony Barbier6e433492017-11-09 15:52:00 +0000208 }
209
210 return measurements;
211}
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000212
Anthony Barbiere7f4a432018-11-09 10:58:40 +0000213template <bool output_timestamps>
214Instrument::MeasurementsMap OpenCLClock<output_timestamps>::test_measurements() const
215{
216 MeasurementsMap measurements;
217
218 if(output_timestamps)
219 {
220 // The OpenCL clock and the wall clock are not in sync, so we use
221 // this trick to calculate the offset between the two clocks:
222 ::cl::Event event;
223 cl_ulong now_gpu;
224
225 // Enqueue retrieve current CPU clock and enqueue a dummy marker
226 std::chrono::high_resolution_clock::time_point now_cpu = std::chrono::high_resolution_clock::now();
227 CLScheduler::get().queue().enqueueMarker(&event);
228
229 CLScheduler::get().queue().finish();
230 //Access the time at which the marker was enqueued:
231 event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &now_gpu);
232
233 measurements.emplace("Now Wall clock", Measurement(now_cpu.time_since_epoch().count() / 1000, "us"));
234 measurements.emplace("Now OpenCL", Measurement(now_gpu / static_cast<cl_ulong>(_scale_factor), _unit));
235 }
236
237 return measurements;
238}
239
Anthony Barbier6e433492017-11-09 15:52:00 +0000240} // namespace framework
241} // namespace test
242} // namespace arm_compute
Anthony Barbiercc225be2018-11-09 15:35:20 +0000243
244template class arm_compute::test::framework::OpenCLClock<true>;
245template class arm_compute::test::framework::OpenCLClock<false>;