Blame - src/backends/cl/ClImportTensorHandle.hpp - ml/armnn

blob: 54710d813570d29055771368992ab2b8c188bf18 [file] [log] [blame]

David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	1	//
				2	// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#pragma once
				7
				8	#include <aclCommon/ArmComputeTensorHandle.hpp>
				9	#include <aclCommon/ArmComputeTensorUtils.hpp>
				10
				11	#include <Half.hpp>
				12
				13	#include <armnn/utility/PolymorphicDowncast.hpp>
				14
				15	#include <arm_compute/runtime/CL/CLTensor.h>
				16	#include <arm_compute/runtime/CL/CLSubTensor.h>
				17	#include <arm_compute/runtime/IMemoryGroup.h>
				18	#include <arm_compute/runtime/MemoryGroup.h>
				19	#include <arm_compute/core/TensorShape.h>
				20	#include <arm_compute/core/Coordinates.h>
				21
Narumol Prangnawarat	9ef3614	2022-01-25 15:15:34 +0000	[diff] [blame]	22	#include <cl/IClTensorHandle.hpp>
				23
Francis Murtagh	e73eda9	2021-05-21 13:36:54 +0100	[diff] [blame]	24	#include <CL/cl_ext.h>
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	25	#include <arm_compute/core/CL/CLKernelLibrary.h>
				26
				27	namespace armnn
				28	{
				29
Narumol Prangnawarat	9ef3614	2022-01-25 15:15:34 +0000	[diff] [blame]	30	class ClImportTensorHandle : public IClTensorHandle
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	31	{
				32	public:
				33	ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags)
				34	: m_ImportFlags(importFlags)
				35	{
				36	armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
				37	}
				38
				39	ClImportTensorHandle(const TensorInfo& tensorInfo,
				40	DataLayout dataLayout,
				41	MemorySourceFlags importFlags)
David Monahan	6642b8a	2021-11-04 16:31:46 +0000	[diff] [blame]	42	: m_ImportFlags(importFlags), m_Imported(false)
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	43	{
				44	armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
				45	}
				46
				47	arm_compute::CLTensor& GetTensor() override { return m_Tensor; }
				48	arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; }
				49	virtual void Allocate() override {}
				50	virtual void Manage() override {}
				51
				52	virtual const void* Map(bool blocking = true) const override
				53	{
				54	IgnoreUnused(blocking);
				55	return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
				56	}
				57
				58	virtual void Unmap() const override {}
				59
				60	virtual ITensorHandle* GetParent() const override { return nullptr; }
				61
				62	virtual arm_compute::DataType GetDataType() const override
				63	{
				64	return m_Tensor.info()->data_type();
				65	}
				66
				67	virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
				68	{
				69	IgnoreUnused(memoryGroup);
				70	}
				71
				72	TensorShape GetStrides() const override
				73	{
				74	return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
				75	}
				76
				77	TensorShape GetShape() const override
				78	{
				79	return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
				80	}
				81
				82	void SetImportFlags(MemorySourceFlags importFlags)
				83	{
				84	m_ImportFlags = importFlags;
				85	}
				86
				87	MemorySourceFlags GetImportFlags() const override
				88	{
				89	return m_ImportFlags;
				90	}
				91
				92	virtual bool Import(void* memory, MemorySource source) override
				93	{
				94	if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
				95	{
				96	if (source == MemorySource::Malloc)
				97	{
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	98	const cl_import_properties_arm importProperties[] =
				99	{
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	100	CL_IMPORT_TYPE_ARM,
				101	CL_IMPORT_TYPE_HOST_ARM,
				102	0
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	103	};
				104
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	105	return ClImport(importProperties, memory);
				106	}
				107	if (source == MemorySource::DmaBuf)
				108	{
				109	const cl_import_properties_arm importProperties[] =
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	110	{
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	111	CL_IMPORT_TYPE_ARM,
				112	CL_IMPORT_TYPE_DMA_BUF_ARM,
Francis Murtagh	f5d5e6c	2021-07-26 13:19:33 +0100	[diff] [blame]	113	CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
				114	CL_TRUE,
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	115	0
				116	};
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	117
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	118	return ClImport(importProperties, memory);
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	119
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	120	}
Francis Murtagh	9db96e0	2021-08-13 16:15:09 +0100	[diff] [blame]	121	if (source == MemorySource::DmaBufProtected)
				122	{
				123	const cl_import_properties_arm importProperties[] =
				124	{
				125	CL_IMPORT_TYPE_ARM,
				126	CL_IMPORT_TYPE_DMA_BUF_ARM,
				127	CL_IMPORT_TYPE_PROTECTED_ARM,
				128	CL_TRUE,
				129	0
				130	};
				131
				132	return ClImport(importProperties, memory, true);
				133
				134	}
David Monahan	6642b8a	2021-11-04 16:31:46 +0000	[diff] [blame]	135	// Case for importing memory allocated by OpenCl externally directly into the tensor
				136	else if (source == MemorySource::Gralloc)
				137	{
				138	// m_Tensor not yet Allocated
				139	if (!m_Imported && !m_Tensor.buffer())
				140	{
				141	// Importing memory allocated by OpenCl into the tensor directly.
				142	arm_compute::Status status =
				143	m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
				144	m_Imported = bool(status);
				145	if (!m_Imported)
				146	{
				147	throw MemoryImportException(status.error_description());
				148	}
				149	return m_Imported;
				150	}
				151
				152	// m_Tensor.buffer() initially allocated with Allocate().
				153	else if (!m_Imported && m_Tensor.buffer())
				154	{
				155	throw MemoryImportException(
				156	"ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
				157	}
				158
				159	// m_Tensor.buffer() previously imported.
				160	else if (m_Imported)
				161	{
				162	// Importing memory allocated by OpenCl into the tensor directly.
				163	arm_compute::Status status =
				164	m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
				165	m_Imported = bool(status);
				166	if (!m_Imported)
				167	{
				168	throw MemoryImportException(status.error_description());
				169	}
				170	return m_Imported;
				171	}
				172	else
				173	{
				174	throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory");
				175	}
				176	}
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	177	else
				178	{
				179	throw MemoryImportException("ClImportTensorHandle::Import flag is not supported");
				180	}
				181	}
				182	else
				183	{
				184	throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
				185	}
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	186	}
				187
Nikhil Raj	60ab976	2022-01-13 09:34:44 +0000	[diff] [blame]	188	virtual bool CanBeImported(void* memory, MemorySource source) override
				189	{
				190	if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
				191	{
				192	if (source == MemorySource::Malloc)
				193	{
				194	const cl_import_properties_arm importProperties[] =
				195	{
				196	CL_IMPORT_TYPE_ARM,
				197	CL_IMPORT_TYPE_HOST_ARM,
				198	0
				199	};
				200
				201	size_t totalBytes = m_Tensor.info()->total_size();
				202
				203	// Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
				204	// This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
				205	// We do this to match the behaviour of the Import function later on.
				206	auto cachelineAlignment =
				207	arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
Narumol Prangnawarat	e2af6f4	2022-01-28 17:59:18 +0000	[diff] [blame]	208	auto roundedSize = totalBytes;
				209	if (totalBytes % cachelineAlignment != 0)
				210	{
				211	roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
				212	}
Nikhil Raj	60ab976	2022-01-13 09:34:44 +0000	[diff] [blame]	213
				214	cl_int error = CL_SUCCESS;
				215	cl_mem buffer;
				216	buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
				217	CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
				218
				219	// If we fail to map we know the import will not succeed and can return false.
				220	// There is no memory to be released if error is not CL_SUCCESS
				221	if (error != CL_SUCCESS)
				222	{
				223	return false;
				224	}
				225	else
				226	{
				227	// If import was successful we can release the mapping knowing import will succeed at workload
				228	// execution and return true
				229	error = clReleaseMemObject(buffer);
				230	if (error == CL_SUCCESS)
				231	{
				232	return true;
				233	}
				234	else
				235	{
				236	// If we couldn't release the mapping this constitutes a memory leak and throw an exception
				237	throw MemoryImportException("ClImportTensorHandle::Failed to unmap cl_mem buffer: "
				238	+ std::to_string(error));
				239	}
				240	}
				241	}
				242	}
				243	else
				244	{
				245	throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
				246	}
				247	return false;
				248	}
				249
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	250	private:
Francis Murtagh	9db96e0	2021-08-13 16:15:09 +0100	[diff] [blame]	251	bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false)
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	252	{
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	253	size_t totalBytes = m_Tensor.info()->total_size();
				254
Nikhil Raj	60ab976	2022-01-13 09:34:44 +0000	[diff] [blame]	255	// Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
				256	// This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	257	auto cachelineAlignment =
				258	arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
Narumol Prangnawarat	e2af6f4	2022-01-28 17:59:18 +0000	[diff] [blame]	259	auto roundedSize = totalBytes;
				260	if (totalBytes % cachelineAlignment != 0)
				261	{
				262	roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
				263	}
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	264
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	265	cl_int error = CL_SUCCESS;
Francis Murtagh	9db96e0	2021-08-13 16:15:09 +0100	[diff] [blame]	266	cl_mem buffer;
				267	if (isProtected)
				268	{
				269	buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
				270	CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
				271	}
				272	else
				273	{
				274	buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
				275	CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
				276	}
				277
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	278	if (error != CL_SUCCESS)
				279	{
Francis Murtagh	f5d5e6c	2021-07-26 13:19:33 +0100	[diff] [blame]	280	throw MemoryImportException("ClImportTensorHandle::Invalid imported memory" + std::to_string(error));
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	281	}
				282
				283	cl::Buffer wrappedBuffer(buffer);
				284	arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer);
				285
				286	// Use the overloaded bool operator of Status to check if it is success, if not throw an exception
				287	// with the Status error message
				288	bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
				289	if (!imported)
				290	{
				291	throw MemoryImportException(status.error_description());
				292	}
				293
				294	ARMNN_ASSERT(!m_Tensor.info()->is_resizable());
				295	return imported;
				296	}
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	297	// Only used for testing
				298	void CopyOutTo(void* memory) const override
				299	{
				300	const_cast<armnn::ClImportTensorHandle*>(this)->Map(true);
				301	switch(this->GetDataType())
				302	{
				303	case arm_compute::DataType::F32:
				304	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				305	static_cast<float*>(memory));
				306	break;
				307	case arm_compute::DataType::U8:
				308	case arm_compute::DataType::QASYMM8:
				309	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				310	static_cast<uint8_t*>(memory));
				311	break;
				312	case arm_compute::DataType::QSYMM8_PER_CHANNEL:
				313	case arm_compute::DataType::QASYMM8_SIGNED:
				314	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				315	static_cast<int8_t*>(memory));
				316	break;
				317	case arm_compute::DataType::F16:
				318	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				319	static_cast<armnn::Half*>(memory));
				320	break;
				321	case arm_compute::DataType::S16:
				322	case arm_compute::DataType::QSYMM16:
				323	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				324	static_cast<int16_t*>(memory));
				325	break;
				326	case arm_compute::DataType::S32:
				327	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				328	static_cast<int32_t*>(memory));
				329	break;
				330	default:
				331	{
				332	throw armnn::UnimplementedException();
				333	}
				334	}
				335	const_cast<armnn::ClImportTensorHandle*>(this)->Unmap();
				336	}
				337
				338	// Only used for testing
				339	void CopyInFrom(const void* memory) override
				340	{
				341	this->Map(true);
				342	switch(this->GetDataType())
				343	{
				344	case arm_compute::DataType::F32:
				345	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
				346	this->GetTensor());
				347	break;
				348	case arm_compute::DataType::U8:
				349	case arm_compute::DataType::QASYMM8:
				350	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
				351	this->GetTensor());
				352	break;
				353	case arm_compute::DataType::F16:
				354	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
				355	this->GetTensor());
				356	break;
				357	case arm_compute::DataType::S16:
				358	case arm_compute::DataType::QSYMM8_PER_CHANNEL:
				359	case arm_compute::DataType::QASYMM8_SIGNED:
				360	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
				361	this->GetTensor());
				362	break;
				363	case arm_compute::DataType::QSYMM16:
				364	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
				365	this->GetTensor());
				366	break;
				367	case arm_compute::DataType::S32:
				368	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
				369	this->GetTensor());
				370	break;
				371	default:
				372	{
				373	throw armnn::UnimplementedException();
				374	}
				375	}
				376	this->Unmap();
				377	}
				378
				379	arm_compute::CLTensor m_Tensor;
				380	MemorySourceFlags m_ImportFlags;
David Monahan	6642b8a	2021-11-04 16:31:46 +0000	[diff] [blame]	381	bool m_Imported;
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	382	};
				383
Narumol Prangnawarat	9ef3614	2022-01-25 15:15:34 +0000	[diff] [blame]	384	class ClImportSubTensorHandle : public IClTensorHandle
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	385	{
				386	public:
Narumol Prangnawarat	9ef3614	2022-01-25 15:15:34 +0000	[diff] [blame]	387	ClImportSubTensorHandle(IClTensorHandle* parent,
				388	const arm_compute::TensorShape& shape,
				389	const arm_compute::Coordinates& coords)
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	390	: m_Tensor(&parent->GetTensor(), shape, coords)
				391	{
				392	parentHandle = parent;
				393	}
				394
				395	arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; }
				396	arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; }
				397
				398	virtual void Allocate() override {}
				399	virtual void Manage() override {}
				400
				401	virtual const void* Map(bool blocking = true) const override
				402	{
				403	IgnoreUnused(blocking);
				404	return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
				405	}
				406	virtual void Unmap() const override {}
				407
				408	virtual ITensorHandle* GetParent() const override { return parentHandle; }
				409
				410	virtual arm_compute::DataType GetDataType() const override
				411	{
				412	return m_Tensor.info()->data_type();
				413	}
				414
				415	virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
				416	{
				417	IgnoreUnused(memoryGroup);
				418	}
				419
				420	TensorShape GetStrides() const override
				421	{
				422	return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
				423	}
				424
				425	TensorShape GetShape() const override
				426	{
				427	return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
				428	}
				429
				430	private:
				431	// Only used for testing
				432	void CopyOutTo(void* memory) const override
				433	{
				434	const_cast<ClImportSubTensorHandle*>(this)->Map(true);
				435	switch(this->GetDataType())
				436	{
				437	case arm_compute::DataType::F32:
				438	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				439	static_cast<float*>(memory));
				440	break;
				441	case arm_compute::DataType::U8:
				442	case arm_compute::DataType::QASYMM8:
				443	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				444	static_cast<uint8_t*>(memory));
				445	break;
				446	case arm_compute::DataType::F16:
				447	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				448	static_cast<armnn::Half*>(memory));
				449	break;
				450	case arm_compute::DataType::QSYMM8_PER_CHANNEL:
				451	case arm_compute::DataType::QASYMM8_SIGNED:
				452	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				453	static_cast<int8_t*>(memory));
				454	break;
				455	case arm_compute::DataType::S16:
				456	case arm_compute::DataType::QSYMM16:
				457	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				458	static_cast<int16_t*>(memory));
				459	break;
				460	case arm_compute::DataType::S32:
				461	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				462	static_cast<int32_t*>(memory));
				463	break;
				464	default:
				465	{
				466	throw armnn::UnimplementedException();
				467	}
				468	}
				469	const_cast<ClImportSubTensorHandle*>(this)->Unmap();
				470	}
				471
				472	// Only used for testing
				473	void CopyInFrom(const void* memory) override
				474	{
				475	this->Map(true);
				476	switch(this->GetDataType())
				477	{
				478	case arm_compute::DataType::F32:
				479	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
				480	this->GetTensor());
				481	break;
				482	case arm_compute::DataType::U8:
				483	case arm_compute::DataType::QASYMM8:
				484	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
				485	this->GetTensor());
				486	break;
				487	case arm_compute::DataType::F16:
				488	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
				489	this->GetTensor());
				490	break;
				491	case arm_compute::DataType::QSYMM8_PER_CHANNEL:
				492	case arm_compute::DataType::QASYMM8_SIGNED:
				493	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
				494	this->GetTensor());
				495	break;
				496	case arm_compute::DataType::S16:
				497	case arm_compute::DataType::QSYMM16:
				498	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
				499	this->GetTensor());
				500	break;
				501	case arm_compute::DataType::S32:
				502	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
				503	this->GetTensor());
				504	break;
				505	default:
				506	{
				507	throw armnn::UnimplementedException();
				508	}
				509	}
				510	this->Unmap();
				511	}
				512
				513	mutable arm_compute::CLSubTensor m_Tensor;
				514	ITensorHandle* parentHandle = nullptr;
				515	};
				516
				517	} // namespace armnn