Blame - src/backends/gpuFsa/GpuFsaContextControl.cpp - ml/armnn

blob: cc53356c0d6d831f026303f50dc499e4fa8ab721 [file] [log] [blame]

David Monahan	8a57046	2023-11-22 13:24:25 +0000	[diff] [blame]	1	//
				2	// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "GpuFsaContextControl.hpp"
				7
				8	#include <armnn/Exceptions.hpp>
				9	#include <armnn/utility/Assert.hpp>
				10	#include <LeakChecking.hpp>
				11
				12	#include <arm_compute/core/CL/CLKernelLibrary.h>
				13	#include <arm_compute/runtime/CL/CLScheduler.h>
				14
				15	#include <fmt/format.h>
				16
				17	namespace cl
				18	{
				19	class Context;
				20	class CommandQueue;
				21	class Device;
				22	}
				23
				24	namespace armnn
				25	{
				26
				27	GpuFsaContextControl::GpuFsaContextControl(arm_compute::CLTuner *tuner,
				28	arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle,
				29	bool profilingEnabled)
				30	: m_Tuner(tuner)
				31	, m_HeuristicsHandle(heuristicsHandle)
				32	, m_ProfilingEnabled(profilingEnabled)
				33	{
				34	try
				35	{
				36	std::vector<cl::Platform> platforms;
				37	cl::Platform::get(&platforms);
				38
				39	// Selects default platform for the first element.
				40	cl::Platform::setDefault(platforms[0]);
				41
				42	std::vector<cl::Device> devices;
				43	platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices);
				44
				45	// Selects default device for the first element.
				46	cl::Device::setDefault(devices[0]);
				47	}
				48	catch (const cl::Error& clError)
				49	{
				50	throw ClRuntimeUnavailableException(fmt::format(
				51	"Could not initialize the CL runtime. Error description: {0}. CL error code: {1}",
				52	clError.what(), clError.err()));
				53	}
				54
				55	// Removes the use of global CL context.
				56	cl::Context::setDefault(cl::Context{});
				57	if (cl::Context::getDefault()() != NULL)
				58	{
				59	throw armnn::Exception("GpuFsaContextControl: Unable to remove the global CL context");
				60	}
				61
				62	// Removes the use of global CL command queue.
				63	cl::CommandQueue::setDefault(cl::CommandQueue{});
				64	if (cl::CommandQueue::getDefault()() != NULL)
				65	{
				66	throw armnn::Exception("GpuFsaContextControl: Unable to remove the global CL command queue");
				67	}
				68
				69	// Always load the OpenCL runtime.
				70	LoadOpenClRuntime();
				71	}
				72
				73	GpuFsaContextControl::~GpuFsaContextControl()
				74	{
				75	// Load the OpencCL runtime without the tuned parameters to free the memory for them.
				76	try
				77	{
				78	UnloadOpenClRuntime();
				79	}
				80	catch (const cl::Error& clError)
				81	{
				82	// This should not happen, it is ignored if it does.
				83
				84	// Coverity fix: BOOST_LOG_TRIVIAL (previously used here to report the error) may throw an
				85	// exception of type std::length_error.
				86	// Using stderr instead in this context as there is no point in nesting try-catch blocks here.
				87	std::cerr << "A CL error occurred unloading the runtime tuner parameters: "
				88	<< clError.what() << ". CL error code is: " << clError.err() << std::endl;
				89	}
				90	}
				91
				92	void GpuFsaContextControl::LoadOpenClRuntime()
				93	{
				94	DoLoadOpenClRuntime(true);
				95	}
				96
				97	void GpuFsaContextControl::UnloadOpenClRuntime()
				98	{
				99	DoLoadOpenClRuntime(false);
				100	}
				101
				102	void GpuFsaContextControl::DoLoadOpenClRuntime(bool updateTunedParameters)
				103	{
				104	cl::Device device = cl::Device::getDefault();
				105	cl::Context context;
				106	cl::CommandQueue commandQueue;
				107
				108	if (arm_compute::CLScheduler::get().is_initialised() && arm_compute::CLScheduler::get().context()() != NULL)
				109	{
				110	// Wait for all queued CL requests to finish before reinitialising it.
				111	arm_compute::CLScheduler::get().sync();
				112	}
				113
				114	try
				115	{
				116	arm_compute::CLKernelLibrary::get().clear_programs_cache();
				117	// Initialise the scheduler with a dummy context to release the LLVM data (which only happens when there are no
				118	// context references); it is initialised again, with a proper context, later.
				119	arm_compute::CLScheduler::get().init(context, commandQueue, device);
				120	arm_compute::CLKernelLibrary::get().init(".", context, device);
				121
				122	{
				123	//
				124	// Here we replace the context with a new one in which
				125	// the memory leak checks show it as an extra allocation but
				126	// because of the scope of the leak checks, it doesn't count
				127	// the disposal of the original object. On the other hand it
				128	// does count the creation of this context which it flags
				129	// as a memory leak. By adding the following line we prevent
				130	// this to happen.
				131	//
				132	ARMNN_DISABLE_LEAK_CHECKING_IN_SCOPE();
				133	context = cl::Context(device);
				134	}
				135
				136	// NOTE: In this specific case profiling has to be enabled on the command queue
				137	// in order for the CLTuner to work.
				138	bool profilingNeededForClTuner = updateTunedParameters && m_Tuner &&
				139	m_Tuner->tune_new_kernels();
				140
				141	if (m_ProfilingEnabled \|\| profilingNeededForClTuner)
				142	{
				143	// Create a new queue with profiling enabled.
				144	commandQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE);
				145	}
				146	else
				147	{
				148	// Use default queue.
				149	commandQueue = cl::CommandQueue(context, device);
				150	}
				151	}
				152	catch (const cl::Error& clError)
				153	{
				154	throw ClRuntimeUnavailableException(fmt::format(
				155	"Could not initialize the CL runtime. Error description: {0}. CL error code: {1}",
				156	clError.what(), clError.err()));
				157	}
				158
				159	// Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute.
				160	arm_compute::CLKernelLibrary::get().init(".", context, device);
				161	arm_compute::CLScheduler::get().init(context, commandQueue, device, m_Tuner, m_HeuristicsHandle);
				162	}
				163
				164	void GpuFsaContextControl::ClearClCache()
				165	{
				166	DoLoadOpenClRuntime(true);
				167	}
				168
				169	} // namespace armnn