Blame - src/backends/gpuFsa/GpuFsaBackendContext.cpp - ml/armnn

blob: 84b948303a967d564a98fafb0a4ebeb599f5c1a4 [file] [log] [blame]

David Monahan	8a57046	2023-11-22 13:24:25 +0000	[diff] [blame]	1	//
				2	// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include "GpuFsaBackendContext.hpp"
				7	#include "GpuFsaContextControl.hpp"
				8
				9	#include <armnn/utility/Assert.hpp>
				10	#include <armnn/utility/PolymorphicDowncast.hpp>
				11
				12	#include <arm_compute/core/CL/OpenCL.h>
				13	#include <arm_compute/core/CL/CLKernelLibrary.h>
				14	#include <arm_compute/runtime/CL/CLScheduler.h>
				15	#include <arm_compute/runtime/CL/CLTunerTypes.h>
				16
				17	namespace armnn
				18	{
				19
				20	struct GpuFsaBackendContext::GpuFsaContextControlWrapper
				21	{
				22	GpuFsaContextControlWrapper(arm_compute::CLTuner* tuner,
				23	arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle,
				24	bool profilingEnabled)
				25	: m_GpuFsaContextControl(tuner, heuristicsHandle, profilingEnabled)
				26	{}
				27
				28	bool Sync()
				29	{
				30	if (arm_compute::CLScheduler::get().context()() != NULL)
				31	{
				32	// Waits for all queued CL requests to finish before unloading the network they may be using.
				33	try
				34	{
				35	// Coverity fix: arm_compute::CLScheduler::sync() may throw an exception of type cl::Error.
				36	arm_compute::CLScheduler::get().sync();
				37	}
				38	catch (const cl::Error& err)
				39	{
				40	ARMNN_LOG(warning) << "Runtime::UnloadNetwork(): an error occurred while waiting for "
				41	"the queued CL requests to finish";
				42	throw err;
				43	}
				44	}
				45
				46	return true;
				47	}
				48
				49	void ClearClCache()
				50	{
				51	if (arm_compute::CLScheduler::get().context()() != NULL)
				52	{
				53	// There are no loaded networks left, so clear the CL cache to free up memory
				54	m_GpuFsaContextControl.ClearClCache();
				55	}
				56	}
				57
				58	GpuFsaContextControl m_GpuFsaContextControl;
				59	};
				60
				61	GpuFsaBackendContext::GpuFsaBackendContext(const IRuntime::CreationOptions& options)
				62	: IBackendContext(options)
				63	, m_TuningFile()
				64	{
				65	bool kernelProfiling = options.m_EnableGpuProfiling;
				66
				67	arm_compute::CLTuner* tuner = nullptr;
				68	arm_compute::CLGEMMHeuristicsHandle* mlgoTuner = nullptr;
				69	bool useLegacyTunerAPI = options.m_GpuAccTunedParameters.get() != nullptr;
				70	if (useLegacyTunerAPI)
				71	{
				72	auto clTunerParams = PolymorphicDowncast<ClTunedParameters*>(
				73	options.m_GpuAccTunedParameters.get());
				74	tuner = &clTunerParams->m_Tuner;
				75
				76	if (tuner)
				77	{
				78	auto ConvertTuningLevel = [](IGpuAccTunedParameters::TuningLevel level,
				79	armnn::IGpuAccTunedParameters::Mode mode)
				80	{
				81	if (mode == armnn::IGpuAccTunedParameters::Mode::UseTunedParameters)
				82	{
				83	return TuningLevel::None;
				84	}
				85
				86	switch(level)
				87	{
				88	case IGpuAccTunedParameters::TuningLevel::Rapid:
				89	return TuningLevel::Rapid;
				90	case IGpuAccTunedParameters::TuningLevel::Normal:
				91	return TuningLevel::Normal;
				92	case IGpuAccTunedParameters::TuningLevel::Exhaustive:
				93	return TuningLevel::Exhaustive;
				94	default:
				95	{
				96	ARMNN_LOG(warning) << "Tuning level not recognised.";
				97	return TuningLevel::None;
				98	}
				99	}
				100	};
				101
				102	TuningLevel tuningLevel = ConvertTuningLevel(clTunerParams->m_TuningLevel, clTunerParams->m_Mode);
				103	ConfigureTuner(*tuner, tuningLevel);
				104	}
				105	}
				106	else //New backend options API
				107	{
				108	const TuningLevel defaultTuningLevel = TuningLevel::None;
				109	auto tuningLevel = defaultTuningLevel;
				110
				111	ParseOptions(options.m_BackendOptions, "GpuFsa", [&](std::string name, const BackendOptions::Var& value)
				112	{
				113	if (name == "KernelProfilingEnabled")
				114	{
				115	kernelProfiling \|= ParseBooleanBackendOption(value, false);
				116	} else if (name == "TuningFile")
				117	{
				118	m_TuningFile = ParseStringBackendOption(value, "");
				119	} else if (name == "TuningLevel")
				120	{
				121	tuningLevel = ParseTuningLevel(value, defaultTuningLevel);
				122	}
				123	else if (name == "MLGOTuningFilePath")
				124	{
				125	m_MLGOTuningFile = ParseStringBackendOption(value, "");
				126	}
				127	});
				128
				129	// Create the tuner, in tuning mode initially.
				130	m_Tuner = std::make_unique<arm_compute::CLTuner>(true);
				131
				132	ConfigureTuner(*(m_Tuner.get()), tuningLevel);
				133
				134	if (!m_TuningFile.empty())
				135	{
				136	try
				137	{
				138	ARMNN_LOG(info) << "Loading Gpu tuning data from file: " << m_TuningFile;
				139	m_Tuner->load_from_file(m_TuningFile.c_str());
				140	}
				141	catch (const std::exception& e)
				142	{
				143	// Warn if not tuning, otherwise tuning will generate new params
				144	if (tuningLevel == TuningLevel::None)
				145	{
				146	ARMNN_LOG(warning) << "Could not load GpuFsa tuner data file.";
				147	}
				148	}
				149	}
				150
				151	if (!m_MLGOTuningFile.empty())
				152	{
				153	try
				154	{
				155	ARMNN_LOG(info) << "Loading Gpu MLGO tuning data from file: " << m_TuningFile;
				156	if(m_MLGOTuner.reload_from_file(m_MLGOTuningFile.c_str()))
				157	{
				158	mlgoTuner = &m_MLGOTuner;
				159	}
				160	}
				161	catch (const std::exception& e)
				162	{
				163	ARMNN_LOG(warning) << "Could not load GpuFsa MLGO tuner data file.";
				164	}
				165	}
				166
				167	tuner = m_Tuner.get();
				168	}
				169
				170	m_GpuFsaContextControlWrapper = std::make_unique<GpuFsaContextControlWrapper>(
				171	tuner,
				172	mlgoTuner,
				173	kernelProfiling
				174	);
				175	}
				176
				177	bool GpuFsaBackendContext::BeforeLoadNetwork(NetworkId)
				178	{
				179	return true;
				180	}
				181
				182	bool GpuFsaBackendContext::AfterLoadNetwork(NetworkId networkId)
				183	{
				184	{
				185	std::lock_guard<std::mutex> lockGuard(m_Mutex);
				186	m_NetworkIds.insert(networkId);
				187	}
				188	return true;
				189	}
				190
				191	bool GpuFsaBackendContext::BeforeUnloadNetwork(NetworkId)
				192	{
				193	return m_GpuFsaContextControlWrapper->Sync();
				194	}
				195
				196	bool GpuFsaBackendContext::AfterUnloadNetwork(NetworkId networkId)
				197	{
				198	bool clearCache = false;
				199	{
				200	std::lock_guard<std::mutex> lockGuard(m_Mutex);
				201	m_NetworkIds.erase(networkId);
				202	clearCache = m_NetworkIds.empty();
				203	}
				204
				205	if (clearCache)
				206	{
				207	m_GpuFsaContextControlWrapper->ClearClCache();
				208	}
				209
				210	return true;
				211	}
				212
				213	bool GpuFsaBackendContext::AfterEnqueueWorkload(NetworkId)
				214	{
				215	return m_GpuFsaContextControlWrapper->Sync();
				216	}
				217
				218	GpuFsaBackendContext::~GpuFsaBackendContext()
				219	{
				220	if (m_Tuner && !m_TuningFile.empty())
				221	{
				222	try
				223	{
				224	m_Tuner->save_to_file(m_TuningFile.c_str());
				225	}
				226	catch(const std::exception& e)
				227	{
				228	ARMNN_LOG(warning) << "Could not save GpuFsa tuner data to file " << m_TuningFile;
				229	}
				230	}
				231	}
				232
				233	} // namespace armnn