blob: cc53356c0d6d831f026303f50dc499e4fa8ab721 [file] [log] [blame]
David Monahan8a570462023-11-22 13:24:25 +00001//
2// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include "GpuFsaContextControl.hpp"
7
8#include <armnn/Exceptions.hpp>
9#include <armnn/utility/Assert.hpp>
10#include <LeakChecking.hpp>
11
12#include <arm_compute/core/CL/CLKernelLibrary.h>
13#include <arm_compute/runtime/CL/CLScheduler.h>
14
15#include <fmt/format.h>
16
17namespace cl
18{
19class Context;
20class CommandQueue;
21class Device;
22}
23
24namespace armnn
25{
26
27GpuFsaContextControl::GpuFsaContextControl(arm_compute::CLTuner *tuner,
28 arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle,
29 bool profilingEnabled)
30 : m_Tuner(tuner)
31 , m_HeuristicsHandle(heuristicsHandle)
32 , m_ProfilingEnabled(profilingEnabled)
33{
34 try
35 {
36 std::vector<cl::Platform> platforms;
37 cl::Platform::get(&platforms);
38
39 // Selects default platform for the first element.
40 cl::Platform::setDefault(platforms[0]);
41
42 std::vector<cl::Device> devices;
43 platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices);
44
45 // Selects default device for the first element.
46 cl::Device::setDefault(devices[0]);
47 }
48 catch (const cl::Error& clError)
49 {
50 throw ClRuntimeUnavailableException(fmt::format(
51 "Could not initialize the CL runtime. Error description: {0}. CL error code: {1}",
52 clError.what(), clError.err()));
53 }
54
55 // Removes the use of global CL context.
56 cl::Context::setDefault(cl::Context{});
57 if (cl::Context::getDefault()() != NULL)
58 {
59 throw armnn::Exception("GpuFsaContextControl: Unable to remove the global CL context");
60 }
61
62 // Removes the use of global CL command queue.
63 cl::CommandQueue::setDefault(cl::CommandQueue{});
64 if (cl::CommandQueue::getDefault()() != NULL)
65 {
66 throw armnn::Exception("GpuFsaContextControl: Unable to remove the global CL command queue");
67 }
68
69 // Always load the OpenCL runtime.
70 LoadOpenClRuntime();
71}
72
73GpuFsaContextControl::~GpuFsaContextControl()
74{
75 // Load the OpencCL runtime without the tuned parameters to free the memory for them.
76 try
77 {
78 UnloadOpenClRuntime();
79 }
80 catch (const cl::Error& clError)
81 {
82 // This should not happen, it is ignored if it does.
83
84 // Coverity fix: BOOST_LOG_TRIVIAL (previously used here to report the error) may throw an
85 // exception of type std::length_error.
86 // Using stderr instead in this context as there is no point in nesting try-catch blocks here.
87 std::cerr << "A CL error occurred unloading the runtime tuner parameters: "
88 << clError.what() << ". CL error code is: " << clError.err() << std::endl;
89 }
90}
91
92void GpuFsaContextControl::LoadOpenClRuntime()
93{
94 DoLoadOpenClRuntime(true);
95}
96
97void GpuFsaContextControl::UnloadOpenClRuntime()
98{
99 DoLoadOpenClRuntime(false);
100}
101
102void GpuFsaContextControl::DoLoadOpenClRuntime(bool updateTunedParameters)
103{
104 cl::Device device = cl::Device::getDefault();
105 cl::Context context;
106 cl::CommandQueue commandQueue;
107
108 if (arm_compute::CLScheduler::get().is_initialised() && arm_compute::CLScheduler::get().context()() != NULL)
109 {
110 // Wait for all queued CL requests to finish before reinitialising it.
111 arm_compute::CLScheduler::get().sync();
112 }
113
114 try
115 {
116 arm_compute::CLKernelLibrary::get().clear_programs_cache();
117 // Initialise the scheduler with a dummy context to release the LLVM data (which only happens when there are no
118 // context references); it is initialised again, with a proper context, later.
119 arm_compute::CLScheduler::get().init(context, commandQueue, device);
120 arm_compute::CLKernelLibrary::get().init(".", context, device);
121
122 {
123 //
124 // Here we replace the context with a new one in which
125 // the memory leak checks show it as an extra allocation but
126 // because of the scope of the leak checks, it doesn't count
127 // the disposal of the original object. On the other hand it
128 // does count the creation of this context which it flags
129 // as a memory leak. By adding the following line we prevent
130 // this to happen.
131 //
132 ARMNN_DISABLE_LEAK_CHECKING_IN_SCOPE();
133 context = cl::Context(device);
134 }
135
136 // NOTE: In this specific case profiling has to be enabled on the command queue
137 // in order for the CLTuner to work.
138 bool profilingNeededForClTuner = updateTunedParameters && m_Tuner &&
139 m_Tuner->tune_new_kernels();
140
141 if (m_ProfilingEnabled || profilingNeededForClTuner)
142 {
143 // Create a new queue with profiling enabled.
144 commandQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE);
145 }
146 else
147 {
148 // Use default queue.
149 commandQueue = cl::CommandQueue(context, device);
150 }
151 }
152 catch (const cl::Error& clError)
153 {
154 throw ClRuntimeUnavailableException(fmt::format(
155 "Could not initialize the CL runtime. Error description: {0}. CL error code: {1}",
156 clError.what(), clError.err()));
157 }
158
159 // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute.
160 arm_compute::CLKernelLibrary::get().init(".", context, device);
161 arm_compute::CLScheduler::get().init(context, commandQueue, device, m_Tuner, m_HeuristicsHandle);
162}
163
164void GpuFsaContextControl::ClearClCache()
165{
166 DoLoadOpenClRuntime(true);
167}
168
169} // namespace armnn