David Monahan | 8a57046 | 2023-11-22 13:24:25 +0000 | [diff] [blame] | 1 | // |
| 2 | // Copyright © 2023 Arm Ltd and Contributors. All rights reserved. |
| 3 | // SPDX-License-Identifier: MIT |
| 4 | // |
| 5 | |
| 6 | #include "GpuFsaContextControl.hpp" |
| 7 | |
| 8 | #include <armnn/Exceptions.hpp> |
| 9 | #include <armnn/utility/Assert.hpp> |
| 10 | #include <LeakChecking.hpp> |
| 11 | |
| 12 | #include <arm_compute/core/CL/CLKernelLibrary.h> |
| 13 | #include <arm_compute/runtime/CL/CLScheduler.h> |
| 14 | |
| 15 | #include <fmt/format.h> |
| 16 | |
| 17 | namespace cl |
| 18 | { |
| 19 | class Context; |
| 20 | class CommandQueue; |
| 21 | class Device; |
| 22 | } |
| 23 | |
| 24 | namespace armnn |
| 25 | { |
| 26 | |
| 27 | GpuFsaContextControl::GpuFsaContextControl(arm_compute::CLTuner *tuner, |
| 28 | arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle, |
| 29 | bool profilingEnabled) |
| 30 | : m_Tuner(tuner) |
| 31 | , m_HeuristicsHandle(heuristicsHandle) |
| 32 | , m_ProfilingEnabled(profilingEnabled) |
| 33 | { |
| 34 | try |
| 35 | { |
| 36 | std::vector<cl::Platform> platforms; |
| 37 | cl::Platform::get(&platforms); |
| 38 | |
| 39 | // Selects default platform for the first element. |
| 40 | cl::Platform::setDefault(platforms[0]); |
| 41 | |
| 42 | std::vector<cl::Device> devices; |
| 43 | platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices); |
| 44 | |
| 45 | // Selects default device for the first element. |
| 46 | cl::Device::setDefault(devices[0]); |
| 47 | } |
| 48 | catch (const cl::Error& clError) |
| 49 | { |
| 50 | throw ClRuntimeUnavailableException(fmt::format( |
| 51 | "Could not initialize the CL runtime. Error description: {0}. CL error code: {1}", |
| 52 | clError.what(), clError.err())); |
| 53 | } |
| 54 | |
| 55 | // Removes the use of global CL context. |
| 56 | cl::Context::setDefault(cl::Context{}); |
| 57 | if (cl::Context::getDefault()() != NULL) |
| 58 | { |
| 59 | throw armnn::Exception("GpuFsaContextControl: Unable to remove the global CL context"); |
| 60 | } |
| 61 | |
| 62 | // Removes the use of global CL command queue. |
| 63 | cl::CommandQueue::setDefault(cl::CommandQueue{}); |
| 64 | if (cl::CommandQueue::getDefault()() != NULL) |
| 65 | { |
| 66 | throw armnn::Exception("GpuFsaContextControl: Unable to remove the global CL command queue"); |
| 67 | } |
| 68 | |
| 69 | // Always load the OpenCL runtime. |
| 70 | LoadOpenClRuntime(); |
| 71 | } |
| 72 | |
| 73 | GpuFsaContextControl::~GpuFsaContextControl() |
| 74 | { |
| 75 | // Load the OpencCL runtime without the tuned parameters to free the memory for them. |
| 76 | try |
| 77 | { |
| 78 | UnloadOpenClRuntime(); |
| 79 | } |
| 80 | catch (const cl::Error& clError) |
| 81 | { |
| 82 | // This should not happen, it is ignored if it does. |
| 83 | |
| 84 | // Coverity fix: BOOST_LOG_TRIVIAL (previously used here to report the error) may throw an |
| 85 | // exception of type std::length_error. |
| 86 | // Using stderr instead in this context as there is no point in nesting try-catch blocks here. |
| 87 | std::cerr << "A CL error occurred unloading the runtime tuner parameters: " |
| 88 | << clError.what() << ". CL error code is: " << clError.err() << std::endl; |
| 89 | } |
| 90 | } |
| 91 | |
| 92 | void GpuFsaContextControl::LoadOpenClRuntime() |
| 93 | { |
| 94 | DoLoadOpenClRuntime(true); |
| 95 | } |
| 96 | |
| 97 | void GpuFsaContextControl::UnloadOpenClRuntime() |
| 98 | { |
| 99 | DoLoadOpenClRuntime(false); |
| 100 | } |
| 101 | |
| 102 | void GpuFsaContextControl::DoLoadOpenClRuntime(bool updateTunedParameters) |
| 103 | { |
| 104 | cl::Device device = cl::Device::getDefault(); |
| 105 | cl::Context context; |
| 106 | cl::CommandQueue commandQueue; |
| 107 | |
| 108 | if (arm_compute::CLScheduler::get().is_initialised() && arm_compute::CLScheduler::get().context()() != NULL) |
| 109 | { |
| 110 | // Wait for all queued CL requests to finish before reinitialising it. |
| 111 | arm_compute::CLScheduler::get().sync(); |
| 112 | } |
| 113 | |
| 114 | try |
| 115 | { |
| 116 | arm_compute::CLKernelLibrary::get().clear_programs_cache(); |
| 117 | // Initialise the scheduler with a dummy context to release the LLVM data (which only happens when there are no |
| 118 | // context references); it is initialised again, with a proper context, later. |
| 119 | arm_compute::CLScheduler::get().init(context, commandQueue, device); |
| 120 | arm_compute::CLKernelLibrary::get().init(".", context, device); |
| 121 | |
| 122 | { |
| 123 | // |
| 124 | // Here we replace the context with a new one in which |
| 125 | // the memory leak checks show it as an extra allocation but |
| 126 | // because of the scope of the leak checks, it doesn't count |
| 127 | // the disposal of the original object. On the other hand it |
| 128 | // does count the creation of this context which it flags |
| 129 | // as a memory leak. By adding the following line we prevent |
| 130 | // this to happen. |
| 131 | // |
| 132 | ARMNN_DISABLE_LEAK_CHECKING_IN_SCOPE(); |
| 133 | context = cl::Context(device); |
| 134 | } |
| 135 | |
| 136 | // NOTE: In this specific case profiling has to be enabled on the command queue |
| 137 | // in order for the CLTuner to work. |
| 138 | bool profilingNeededForClTuner = updateTunedParameters && m_Tuner && |
| 139 | m_Tuner->tune_new_kernels(); |
| 140 | |
| 141 | if (m_ProfilingEnabled || profilingNeededForClTuner) |
| 142 | { |
| 143 | // Create a new queue with profiling enabled. |
| 144 | commandQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE); |
| 145 | } |
| 146 | else |
| 147 | { |
| 148 | // Use default queue. |
| 149 | commandQueue = cl::CommandQueue(context, device); |
| 150 | } |
| 151 | } |
| 152 | catch (const cl::Error& clError) |
| 153 | { |
| 154 | throw ClRuntimeUnavailableException(fmt::format( |
| 155 | "Could not initialize the CL runtime. Error description: {0}. CL error code: {1}", |
| 156 | clError.what(), clError.err())); |
| 157 | } |
| 158 | |
| 159 | // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute. |
| 160 | arm_compute::CLKernelLibrary::get().init(".", context, device); |
| 161 | arm_compute::CLScheduler::get().init(context, commandQueue, device, m_Tuner, m_HeuristicsHandle); |
| 162 | } |
| 163 | |
| 164 | void GpuFsaContextControl::ClearClCache() |
| 165 | { |
| 166 | DoLoadOpenClRuntime(true); |
| 167 | } |
| 168 | |
| 169 | } // namespace armnn |