Blame - src/backends/gpuFsa/GpuFsaBackend.hpp - ml/armnn

blob: 26960065c763478e24d50829d8f3903044dbecc7 [file] [log] [blame]

David Monahan	8a57046	2023-11-22 13:24:25 +0000	[diff] [blame^]	1	//
				2	// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5	#pragma once
				6
				7	#include <armnn/backends/IBackendInternal.hpp>
				8	#include <aclCommon/BaseMemoryManager.hpp>
				9
				10	#include <arm_compute/runtime/CL/CLBufferAllocator.h>
				11	#include <arm_compute/runtime/CL/CLMemoryRegion.h>
				12	#include <arm_compute/core/CL/CLKernelLibrary.h>
				13	#include <CL/cl_ext.h>
				14
				15	// System includes for mapping and unmapping memory
				16	#include <sys/mman.h>
				17
				18	namespace armnn
				19	{
				20
				21	// add new capabilities here..
				22	const BackendCapabilities gpuFsaCapabilities("GpuFsa",
				23	{
				24	{"NonConstWeights", false},
				25	{"AsyncExecution", false},
				26	{"ProtectedContentAllocation", false},
				27	{"ConstantTensorsAsInputs", false},
				28	{"PreImportIOTensors", false},
				29	{"ExternallyManagedMemory", false},
				30	{"MultiAxisPacking", false},
				31	{"SingleAxisPacking", false}
				32	});
				33
				34	class GpuFsaBackend : public IBackendInternal
				35	{
				36	public:
				37	GpuFsaBackend() : m_CustomAllocator(nullptr) {};
				38	GpuFsaBackend(std::shared_ptr<ICustomAllocator> allocator)
				39	{
				40	UseCustomMemoryAllocator(allocator, armnn::EmptyOptional());
				41	}
				42	~GpuFsaBackend() = default;
				43
				44	static const BackendId& GetIdStatic();
				45	const BackendId& GetId() const override { return GetIdStatic(); }
				46
				47	IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override;
				48
				49	IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
				50	const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override;
				51
				52	IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(TensorHandleFactoryRegistry& registry) const override;
				53
				54	IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
				55	const ModelOptions& modelOptions,
				56	MemorySourceFlags inputFlags,
				57	MemorySourceFlags outputFlags) const override;
				58
				59	std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;
				60
				61	void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override;
				62
				63	void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry,
				64	MemorySourceFlags inputFlags,
				65	MemorySourceFlags outputFlags) override;
				66
				67	IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
				68	IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(
				69	const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override;
				70
				71	IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override;
				72
				73	OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph,
				74	const ModelOptions& modelOptions) const override;
				75
				76	std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override;
				77
				78	BackendCapabilities GetCapabilities() const override
				79	{
				80	return gpuFsaCapabilities;
				81	};
				82
				83	virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,
				84	armnn::Optional<std::string&>) override
				85	{
				86	ARMNN_LOG(info) << "Using Custom Allocator for GpuFsaBackend";
				87
				88	// Set flag to signal the backend to use a custom memory allocator
				89	m_CustomAllocator = std::make_shared<GpuFsaBackendCustomAllocatorWrapper>(std::move(allocator));
				90	m_UsingCustomAllocator = true;
				91	return m_UsingCustomAllocator;
				92	}
				93
				94	// Cl requires a arm_compute::IAllocator we wrap the Arm NN ICustomAllocator to achieve this
				95	class GpuFsaBackendCustomAllocatorWrapper : public arm_compute::IAllocator
				96	{
				97	public:
				98	GpuFsaBackendCustomAllocatorWrapper(std::shared_ptr<ICustomAllocator> alloc) : m_CustomAllocator(alloc)
				99	{}
				100	// Inherited methods overridden:
				101	void* allocate(size_t size, size_t alignment) override
				102	{
				103	auto alloc = m_CustomAllocator->allocate(size, alignment);
				104	return MapAllocatedMemory(alloc, size, m_CustomAllocator->GetMemorySourceType());
				105	}
				106	void free(void* ptr) override
				107	{
				108	auto hostMemPtr = m_AllocatedBufferMappings[ptr];
				109	clReleaseMemObject(static_cast<cl_mem>(ptr));
				110	m_CustomAllocator->free(hostMemPtr);
				111	}
				112	std::unique_ptr<arm_compute::IMemoryRegion> make_region(size_t size, size_t alignment) override
				113	{
				114	auto hostMemPtr = m_CustomAllocator->allocate(size, alignment);
				115	cl_mem buffer = MapAllocatedMemory(hostMemPtr, size, m_CustomAllocator->GetMemorySourceType());
				116
				117	return std::make_unique<ClBackendCustomAllocatorMemoryRegion>(cl::Buffer(buffer),
				118	hostMemPtr,
				119	m_CustomAllocator->GetMemorySourceType());
				120	}
				121	private:
				122	cl_mem MapAllocatedMemory(void* memory, size_t size, MemorySource source)
				123	{
				124	// Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
				125	auto cachelineAlignment =
				126	arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
				127	auto roundedSize = cachelineAlignment + size - (size % cachelineAlignment);
				128
				129	if (source == MemorySource::Malloc)
				130	{
				131	const cl_import_properties_arm importProperties[] =
				132	{
				133	CL_IMPORT_TYPE_ARM,
				134	CL_IMPORT_TYPE_HOST_ARM,
				135	0
				136	};
				137	cl_int error = CL_SUCCESS;
				138	cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
				139	CL_MEM_READ_WRITE,
				140	importProperties,
				141	memory,
				142	roundedSize,
				143	&error);
				144	if (error == CL_SUCCESS)
				145	{
				146	m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
				147	return buffer;
				148	}
				149	throw armnn::Exception(
				150	"Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + std::to_string(error));
				151	}
				152	else if (source == MemorySource::DmaBuf)
				153	{
				154	const cl_import_properties_arm importProperties[] =
				155	{
				156	CL_IMPORT_TYPE_ARM,
				157	CL_IMPORT_TYPE_DMA_BUF_ARM,
				158	CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
				159	CL_TRUE,
				160	0
				161	};
				162	cl_int error = CL_SUCCESS;
				163	cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
				164	CL_MEM_READ_WRITE,
				165	importProperties,
				166	memory,
				167	roundedSize,
				168	&error);
				169	if (error == CL_SUCCESS)
				170	{
				171	m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
				172	return buffer;
				173	}
				174	throw armnn::Exception(
				175	"Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
				176	+ std::to_string(error));
				177	}
				178	else if (source == MemorySource::DmaBufProtected)
				179	{
				180	const cl_import_properties_arm importProperties[] =
				181	{
				182	CL_IMPORT_TYPE_ARM,
				183	CL_IMPORT_TYPE_DMA_BUF_ARM,
				184	CL_IMPORT_TYPE_PROTECTED_ARM,
				185	CL_TRUE,
				186	0
				187	};
				188	cl_int error = CL_SUCCESS;
				189	cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
				190	CL_MEM_READ_WRITE,
				191	importProperties,
				192	memory,
				193	roundedSize,
				194	&error);
				195	if (error == CL_SUCCESS)
				196	{
				197	m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
				198	return buffer;
				199	}
				200	throw armnn::Exception(
				201	"Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
				202	+ std::to_string(error));
				203	}
				204	throw armnn::Exception(
				205	"Attempting to allocate memory with unsupported MemorySource type in CustomAllocator");
				206	}
				207	std::shared_ptr<ICustomAllocator> m_CustomAllocator;
				208	std::map<void, void> m_AllocatedBufferMappings;
				209	};
				210
				211	class ClBackendCustomAllocatorMemoryRegion : public arm_compute::ICLMemoryRegion
				212	{
				213	public:
				214	// We need to have a new version of ICLMemoryRegion which holds a hostMemPtr to allow for cpu copy access
				215	ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void* hostMemPtr, armnn::MemorySource source)
				216	: ICLMemoryRegion(buffer.getInfo<CL_MEM_SIZE>())
				217	{
				218	_mem = buffer;
				219	m_HostMemPtr = hostMemPtr;
				220	m_MemorySource = source;
				221	}
				222
				223	// Inherited methods overridden :
				224	void* ptr() override
				225	{
				226	return nullptr;
				227	}
				228
				229	void* map(cl::CommandQueue &q, bool blocking) override
				230	{
				231	armnn::IgnoreUnused(q, blocking);
				232	if (m_HostMemPtr == nullptr)
				233	{
				234	throw armnn::Exception("ClBackend: Attempting to map memory with an invalid host ptr");
				235	}
				236	if (_mapping != nullptr)
				237	{
				238	throw armnn::Exception("ClBackend: Attempting to map memory which has not yet been unmapped");
				239	}
				240	switch (m_MemorySource)
				241	{
				242	case armnn::MemorySource::Malloc:
				243	_mapping = m_HostMemPtr;
				244	return _mapping;
				245	break;
				246	case armnn::MemorySource::DmaBuf:
				247	case armnn::MemorySource::DmaBufProtected:
				248	// If the source is a Dmabuf then the memory ptr should be pointing to an integer value for the fd
				249	_mapping = mmap(NULL, _size, PROT_WRITE, MAP_SHARED, (reinterpret_cast<int>(m_HostMemPtr)), 0);
				250	return _mapping;
				251	break;
				252	default:
				253	throw armnn::Exception("ClBackend: Attempting to map imported memory without a valid source");
				254	break;
				255	}
				256	}
				257
				258	void unmap(cl::CommandQueue &q) override
				259	{
				260	armnn::IgnoreUnused(q);
				261	switch (m_MemorySource)
				262	{
				263	case armnn::MemorySource::Malloc:
				264	_mapping = nullptr;
				265	break;
				266	case armnn::MemorySource::DmaBuf:
				267	case armnn::MemorySource::DmaBufProtected:
				268	munmap(_mapping, _size);
				269	_mapping = nullptr;
				270	break;
				271	default:
				272	throw armnn::Exception("ClBackend: Attempting to unmap imported memory without a valid source");
				273	break;
				274	}
				275	}
				276	private:
				277	void* m_HostMemPtr = nullptr;
				278	armnn::MemorySource m_MemorySource;
				279	};
				280
				281	std::shared_ptr<GpuFsaBackendCustomAllocatorWrapper> m_CustomAllocator;
				282	bool m_UsingCustomAllocator = false;
				283	};
				284
				285	} // namespace armnn