blob: e9f945bd95f9a0ca9085aa97d26357985a54c407 [file] [log] [blame]
Anthony Barbier6e433492017-11-09 15:52:00 +00001/*
Georgios Pinitasb6af4822021-09-14 12:33:34 +01002 * Copyright (c) 2017-2019, 2021 Arm Limited.
Anthony Barbier6e433492017-11-09 15:52:00 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "OpenCLTimer.h"
25
26#include "../Framework.h"
27#include "../Utils.h"
28
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +010029#include "arm_compute/graph/INode.h"
Anthony Barbier6e433492017-11-09 15:52:00 +000030#include "arm_compute/runtime/CL/CLScheduler.h"
31
32#ifndef ARM_COMPUTE_CL
33#error "You can't use OpenCLTimer without OpenCL"
34#endif /* ARM_COMPUTE_CL */
35
36namespace arm_compute
37{
38namespace test
39{
40namespace framework
41{
Anthony Barbier72f4ae52018-11-07 17:33:54 +000042template <bool output_timestamps>
43std::string OpenCLClock<output_timestamps>::id() const
Anthony Barbier6e433492017-11-09 15:52:00 +000044{
Anthony Barbier72f4ae52018-11-07 17:33:54 +000045 if(output_timestamps)
46 {
47 return "OpenCLTimestamps";
48 }
49 else
50 {
51 return "OpenCLTimer";
52 }
Anthony Barbier6e433492017-11-09 15:52:00 +000053}
54
Anthony Barbier72f4ae52018-11-07 17:33:54 +000055template <bool output_timestamps>
56OpenCLClock<output_timestamps>::OpenCLClock(ScaleFactor scale_factor)
Georgios Pinitasb6af4822021-09-14 12:33:34 +010057 : _kernels(),
58 _real_function(nullptr),
59#ifdef ARM_COMPUTE_GRAPH_ENABLED
60 _real_graph_function(nullptr),
61#endif /* ARM_COMPUTE_GRAPH_ENABLED */
62 _prefix(),
63 _timer_enabled(false)
Anthony Barbier6e433492017-11-09 15:52:00 +000064{
65 auto q = CLScheduler::get().queue();
66 cl_command_queue_properties props = q.getInfo<CL_QUEUE_PROPERTIES>();
67 if((props & CL_QUEUE_PROFILING_ENABLE) == 0)
68 {
69 CLScheduler::get().set_queue(cl::CommandQueue(CLScheduler::get().context(), props | CL_QUEUE_PROFILING_ENABLE));
70 }
Giorgio Arenace58a9f2017-10-31 17:59:17 +000071
72 switch(scale_factor)
73 {
74 case ScaleFactor::NONE:
75 _scale_factor = 1.f;
76 _unit = "ns";
77 break;
78 case ScaleFactor::TIME_US:
79 _scale_factor = 1000.f;
80 _unit = "us";
81 break;
82 case ScaleFactor::TIME_MS:
83 _scale_factor = 1000000.f;
84 _unit = "ms";
85 break;
86 case ScaleFactor::TIME_S:
87 _scale_factor = 1000000000.f;
88 _unit = "s";
89 break;
90 default:
91 ARM_COMPUTE_ERROR("Invalid scale");
92 }
Anthony Barbier6e433492017-11-09 15:52:00 +000093}
94
Anthony Barbier72f4ae52018-11-07 17:33:54 +000095template <bool output_timestamps>
96void OpenCLClock<output_timestamps>::test_start()
Anthony Barbier6e433492017-11-09 15:52:00 +000097{
Anthony Barbier6e433492017-11-09 15:52:00 +000098 // Start intercepting enqueues:
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +010099 ARM_COMPUTE_ERROR_ON(_real_function != nullptr);
Georgios Pinitasb6af4822021-09-14 12:33:34 +0100100 _real_function = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
101 auto interceptor = [this](
102 cl_command_queue command_queue,
103 cl_kernel kernel,
104 cl_uint work_dim,
105 const size_t *gwo,
106 const size_t *gws,
107 const size_t *lws,
108 cl_uint num_events_in_wait_list,
109 const cl_event * event_wait_list,
110 cl_event * event)
Anthony Barbier48c19f12018-04-20 11:31:52 +0100111 {
Anthony Barbierc4835212018-05-16 14:20:04 +0100112 if(this->_timer_enabled)
113 {
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000114 kernel_info info;
115 cl::Kernel cpp_kernel(kernel, true);
116 std::stringstream ss;
Anthony Barbierc4835212018-05-16 14:20:04 +0100117 ss << this->_prefix << cpp_kernel.getInfo<CL_KERNEL_FUNCTION_NAME>();
118 if(gws != nullptr)
119 {
120 ss << " GWS[" << gws[0] << "," << gws[1] << "," << gws[2] << "]";
121 }
122 if(lws != nullptr)
123 {
124 ss << " LWS[" << lws[0] << "," << lws[1] << "," << lws[2] << "]";
125 }
126 info.name = ss.str();
127 cl_event tmp;
128 cl_int retval = this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, &tmp);
129 info.event = tmp;
130 this->_kernels.push_back(std::move(info));
Vidhya Sudhan Loganathanca65af32019-02-07 11:14:42 +0000131
132 if(event != nullptr)
133 {
134 //return cl_event from the intercepted call
135 clRetainEvent(tmp);
136 *event = tmp;
137 }
Anthony Barbierc4835212018-05-16 14:20:04 +0100138 return retval;
Anthony Barbier48c19f12018-04-20 11:31:52 +0100139 }
Anthony Barbierc4835212018-05-16 14:20:04 +0100140 else
Anthony Barbier48c19f12018-04-20 11:31:52 +0100141 {
Anthony Barbierc4835212018-05-16 14:20:04 +0100142 return this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, event);
Anthony Barbier48c19f12018-04-20 11:31:52 +0100143 }
Anthony Barbier48c19f12018-04-20 11:31:52 +0100144 };
Georgios Pinitasb6af4822021-09-14 12:33:34 +0100145 CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
Anthony Barbier48c19f12018-04-20 11:31:52 +0100146
Georgios Pinitasb6af4822021-09-14 12:33:34 +0100147#ifdef ARM_COMPUTE_GRAPH_ENABLED
148 ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
149 _real_graph_function = graph::TaskExecutor::get().execute_function;
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100150 // Start intercepting tasks:
151 auto task_interceptor = [this](graph::ExecutionTask & task)
152 {
153 if(task.node != nullptr && !task.node->name().empty())
154 {
155 this->_prefix = task.node->name() + "/";
156 }
157 else
158 {
159 this->_prefix = "";
160 }
161 this->_real_graph_function(task);
162 this->_prefix = "";
163 };
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100164 graph::TaskExecutor::get().execute_function = task_interceptor;
Georgios Pinitasb6af4822021-09-14 12:33:34 +0100165#endif /* ARM_COMPUTE_GRAPH_ENABLED */
Anthony Barbier6e433492017-11-09 15:52:00 +0000166}
167
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000168template <bool output_timestamps>
169void OpenCLClock<output_timestamps>::start()
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100170{
171 _kernels.clear();
Anthony Barbierc4835212018-05-16 14:20:04 +0100172 _timer_enabled = true;
173}
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000174template <bool output_timestamps>
175void OpenCLClock<output_timestamps>::stop()
Anthony Barbierc4835212018-05-16 14:20:04 +0100176{
177 _timer_enabled = false;
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100178}
179
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000180template <bool output_timestamps>
181void OpenCLClock<output_timestamps>::test_stop()
Anthony Barbier6e433492017-11-09 15:52:00 +0000182{
183 // Restore real function
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100184 CLSymbols::get().clEnqueueNDRangeKernel_ptr = _real_function;
Georgios Pinitasb6af4822021-09-14 12:33:34 +0100185 _real_function = nullptr;
186#ifdef ARM_COMPUTE_GRAPH_ENABLED
Georgios Pinitas5c2fb3f2018-05-01 15:26:20 +0100187 graph::TaskExecutor::get().execute_function = _real_graph_function;
188 _real_graph_function = nullptr;
Georgios Pinitasb6af4822021-09-14 12:33:34 +0100189#endif /* ARM_COMPUTE_GRAPH_ENABLED */
Anthony Barbier6e433492017-11-09 15:52:00 +0000190}
191
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000192template <bool output_timestamps>
193Instrument::MeasurementsMap OpenCLClock<output_timestamps>::measurements() const
Anthony Barbier6e433492017-11-09 15:52:00 +0000194{
195 MeasurementsMap measurements;
196 unsigned int kernel_number = 0;
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100197 for(auto const &kernel : _kernels)
Anthony Barbier6e433492017-11-09 15:52:00 +0000198 {
Michalis Spyroubcfd09a2019-05-01 13:03:59 +0100199 cl_ulong queued;
200 cl_ulong flushed;
201 cl_ulong start;
202 cl_ulong end;
Anthony Barbier1e1bef12018-11-20 16:54:42 +0000203 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &queued);
204 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &flushed);
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000205 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start);
206 kernel.event.getProfilingInfo(CL_PROFILING_COMMAND_END, &end);
207 std::string name = kernel.name + " #" + support::cpp11::to_string(kernel_number++);
Anthony Barbier6e433492017-11-09 15:52:00 +0000208
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000209 if(output_timestamps)
210 {
211 measurements.emplace("[start]" + name, Measurement(start / static_cast<cl_ulong>(_scale_factor), _unit));
Anthony Barbier1e1bef12018-11-20 16:54:42 +0000212 measurements.emplace("[queued]" + name, Measurement(queued / static_cast<cl_ulong>(_scale_factor), _unit));
213 measurements.emplace("[flushed]" + name, Measurement(flushed / static_cast<cl_ulong>(_scale_factor), _unit));
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000214 measurements.emplace("[end]" + name, Measurement(end / static_cast<cl_ulong>(_scale_factor), _unit));
215 }
216 else
217 {
218 measurements.emplace(name, Measurement((end - start) / _scale_factor, _unit));
219 }
Anthony Barbier6e433492017-11-09 15:52:00 +0000220 }
221
222 return measurements;
223}
Anthony Barbier72f4ae52018-11-07 17:33:54 +0000224
Anthony Barbiere7f4a432018-11-09 10:58:40 +0000225template <bool output_timestamps>
226Instrument::MeasurementsMap OpenCLClock<output_timestamps>::test_measurements() const
227{
228 MeasurementsMap measurements;
229
230 if(output_timestamps)
231 {
232 // The OpenCL clock and the wall clock are not in sync, so we use
233 // this trick to calculate the offset between the two clocks:
234 ::cl::Event event;
235 cl_ulong now_gpu;
236
237 // Enqueue retrieve current CPU clock and enqueue a dummy marker
238 std::chrono::high_resolution_clock::time_point now_cpu = std::chrono::high_resolution_clock::now();
239 CLScheduler::get().queue().enqueueMarker(&event);
240
241 CLScheduler::get().queue().finish();
242 //Access the time at which the marker was enqueued:
243 event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &now_gpu);
244
245 measurements.emplace("Now Wall clock", Measurement(now_cpu.time_since_epoch().count() / 1000, "us"));
246 measurements.emplace("Now OpenCL", Measurement(now_gpu / static_cast<cl_ulong>(_scale_factor), _unit));
247 }
248
249 return measurements;
250}
251
Anthony Barbier6e433492017-11-09 15:52:00 +0000252} // namespace framework
253} // namespace test
254} // namespace arm_compute
Anthony Barbiercc225be2018-11-09 15:35:20 +0000255
256template class arm_compute::test::framework::OpenCLClock<true>;
257template class arm_compute::test::framework::OpenCLClock<false>;