Blame - src/backends/cl/ClImportTensorHandle.hpp - ml/armnn

blob: b863f08758ae461f10dd5103e8c0e69d8aa36841 [file] [log] [blame]

David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	1	//
Colm Donelan	b4ef163	2024-02-01 15:00:43 +0000	[diff] [blame]	2	// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#pragma once
				7
				8	#include <aclCommon/ArmComputeTensorHandle.hpp>
				9	#include <aclCommon/ArmComputeTensorUtils.hpp>
				10
				11	#include <Half.hpp>
				12
				13	#include <armnn/utility/PolymorphicDowncast.hpp>
				14
				15	#include <arm_compute/runtime/CL/CLTensor.h>
				16	#include <arm_compute/runtime/CL/CLSubTensor.h>
				17	#include <arm_compute/runtime/IMemoryGroup.h>
				18	#include <arm_compute/runtime/MemoryGroup.h>
				19	#include <arm_compute/core/TensorShape.h>
				20	#include <arm_compute/core/Coordinates.h>
				21
Cathal Corbett	d9e55f0	2023-01-11 13:03:21 +0000	[diff] [blame]	22	#include <aclCommon/IClTensorHandle.hpp>
Narumol Prangnawarat	9ef3614	2022-01-25 15:15:34 +0000	[diff] [blame]	23
Francis Murtagh	e73eda9	2021-05-21 13:36:54 +0100	[diff] [blame]	24	#include <CL/cl_ext.h>
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	25	#include <arm_compute/core/CL/CLKernelLibrary.h>
				26
				27	namespace armnn
				28	{
				29
Narumol Prangnawarat	9ef3614	2022-01-25 15:15:34 +0000	[diff] [blame]	30	class ClImportTensorHandle : public IClTensorHandle
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	31	{
				32	public:
				33	ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags)
				34	: m_ImportFlags(importFlags)
				35	{
				36	armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
				37	}
				38
				39	ClImportTensorHandle(const TensorInfo& tensorInfo,
				40	DataLayout dataLayout,
				41	MemorySourceFlags importFlags)
David Monahan	6642b8a	2021-11-04 16:31:46 +0000	[diff] [blame]	42	: m_ImportFlags(importFlags), m_Imported(false)
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	43	{
				44	armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
				45	}
				46
				47	arm_compute::CLTensor& GetTensor() override { return m_Tensor; }
				48	arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; }
				49	virtual void Allocate() override {}
				50	virtual void Manage() override {}
				51
				52	virtual const void* Map(bool blocking = true) const override
				53	{
				54	IgnoreUnused(blocking);
				55	return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
				56	}
				57
				58	virtual void Unmap() const override {}
				59
				60	virtual ITensorHandle* GetParent() const override { return nullptr; }
				61
				62	virtual arm_compute::DataType GetDataType() const override
				63	{
				64	return m_Tensor.info()->data_type();
				65	}
				66
				67	virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
				68	{
				69	IgnoreUnused(memoryGroup);
				70	}
				71
				72	TensorShape GetStrides() const override
				73	{
				74	return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
				75	}
				76
				77	TensorShape GetShape() const override
				78	{
				79	return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
				80	}
				81
				82	void SetImportFlags(MemorySourceFlags importFlags)
				83	{
				84	m_ImportFlags = importFlags;
				85	}
				86
				87	MemorySourceFlags GetImportFlags() const override
				88	{
				89	return m_ImportFlags;
				90	}
				91
				92	virtual bool Import(void* memory, MemorySource source) override
				93	{
				94	if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
				95	{
				96	if (source == MemorySource::Malloc)
				97	{
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	98	const cl_import_properties_arm importProperties[] =
				99	{
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	100	CL_IMPORT_TYPE_ARM,
				101	CL_IMPORT_TYPE_HOST_ARM,
				102	0
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	103	};
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	104	return ClImport(importProperties, memory);
				105	}
				106	if (source == MemorySource::DmaBuf)
				107	{
				108	const cl_import_properties_arm importProperties[] =
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	109	{
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	110	CL_IMPORT_TYPE_ARM,
				111	CL_IMPORT_TYPE_DMA_BUF_ARM,
Francis Murtagh	f5d5e6c	2021-07-26 13:19:33 +0100	[diff] [blame]	112	CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
				113	CL_TRUE,
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	114	0
				115	};
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	116
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	117	return ClImport(importProperties, memory);
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	118
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	119	}
Francis Murtagh	9db96e0	2021-08-13 16:15:09 +0100	[diff] [blame]	120	if (source == MemorySource::DmaBufProtected)
				121	{
				122	const cl_import_properties_arm importProperties[] =
				123	{
				124	CL_IMPORT_TYPE_ARM,
				125	CL_IMPORT_TYPE_DMA_BUF_ARM,
				126	CL_IMPORT_TYPE_PROTECTED_ARM,
				127	CL_TRUE,
				128	0
				129	};
				130
				131	return ClImport(importProperties, memory, true);
				132
				133	}
David Monahan	6642b8a	2021-11-04 16:31:46 +0000	[diff] [blame]	134	// Case for importing memory allocated by OpenCl externally directly into the tensor
				135	else if (source == MemorySource::Gralloc)
				136	{
				137	// m_Tensor not yet Allocated
				138	if (!m_Imported && !m_Tensor.buffer())
				139	{
				140	// Importing memory allocated by OpenCl into the tensor directly.
				141	arm_compute::Status status =
				142	m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
				143	m_Imported = bool(status);
				144	if (!m_Imported)
				145	{
				146	throw MemoryImportException(status.error_description());
				147	}
				148	return m_Imported;
				149	}
				150
				151	// m_Tensor.buffer() initially allocated with Allocate().
				152	else if (!m_Imported && m_Tensor.buffer())
				153	{
				154	throw MemoryImportException(
				155	"ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
				156	}
				157
				158	// m_Tensor.buffer() previously imported.
				159	else if (m_Imported)
				160	{
				161	// Importing memory allocated by OpenCl into the tensor directly.
				162	arm_compute::Status status =
				163	m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
				164	m_Imported = bool(status);
				165	if (!m_Imported)
				166	{
				167	throw MemoryImportException(status.error_description());
				168	}
				169	return m_Imported;
				170	}
				171	else
				172	{
				173	throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory");
				174	}
				175	}
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	176	else
				177	{
				178	throw MemoryImportException("ClImportTensorHandle::Import flag is not supported");
				179	}
				180	}
				181	else
				182	{
				183	throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
				184	}
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	185	}
				186
Sadik Armagan	a045ac0	2022-07-01 14:32:05 +0100	[diff] [blame]	187	virtual bool CanBeImported(void* /memory/, MemorySource source) override
Nikhil Raj	60ab976	2022-01-13 09:34:44 +0000	[diff] [blame]	188	{
				189	if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
				190	{
				191	if (source == MemorySource::Malloc)
				192	{
Sadik Armagan	a045ac0	2022-07-01 14:32:05 +0100	[diff] [blame]	193	// Returning true as ClImport() function will decide if memory can be imported or not
				194	return true;
Nikhil Raj	60ab976	2022-01-13 09:34:44 +0000	[diff] [blame]	195	}
				196	}
				197	else
				198	{
				199	throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
				200	}
				201	return false;
				202	}
				203
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	204	private:
Francis Murtagh	9db96e0	2021-08-13 16:15:09 +0100	[diff] [blame]	205	bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false)
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	206	{
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	207	size_t totalBytes = m_Tensor.info()->total_size();
				208
Nikhil Raj	60ab976	2022-01-13 09:34:44 +0000	[diff] [blame]	209	// Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
				210	// This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	211	auto cachelineAlignment =
				212	arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
Narumol Prangnawarat	e2af6f4	2022-01-28 17:59:18 +0000	[diff] [blame]	213	auto roundedSize = totalBytes;
				214	if (totalBytes % cachelineAlignment != 0)
				215	{
				216	roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
				217	}
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	218
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	219	cl_int error = CL_SUCCESS;
Francis Murtagh	9db96e0	2021-08-13 16:15:09 +0100	[diff] [blame]	220	cl_mem buffer;
				221	if (isProtected)
				222	{
				223	buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
				224	CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
				225	}
				226	else
				227	{
				228	buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
				229	CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
				230	}
				231
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	232	if (error != CL_SUCCESS)
				233	{
Colm Donelan	194086f	2022-11-14 17:23:07 +0000	[diff] [blame]	234	throw MemoryImportException("ClImportTensorHandle::Invalid imported memory: " + std::to_string(error));
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	235	}
				236
				237	cl::Buffer wrappedBuffer(buffer);
				238	arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer);
				239
				240	// Use the overloaded bool operator of Status to check if it is success, if not throw an exception
				241	// with the Status error message
				242	bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
				243	if (!imported)
				244	{
				245	throw MemoryImportException(status.error_description());
				246	}
Narumol Prangnawarat	ff9a29d	2021-05-10 11:02:58 +0100	[diff] [blame]	247	return imported;
				248	}
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	249	// Only used for testing
				250	void CopyOutTo(void* memory) const override
				251	{
				252	const_cast<armnn::ClImportTensorHandle*>(this)->Map(true);
				253	switch(this->GetDataType())
				254	{
				255	case arm_compute::DataType::F32:
				256	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				257	static_cast<float*>(memory));
				258	break;
				259	case arm_compute::DataType::U8:
				260	case arm_compute::DataType::QASYMM8:
				261	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				262	static_cast<uint8_t*>(memory));
				263	break;
				264	case arm_compute::DataType::QSYMM8_PER_CHANNEL:
				265	case arm_compute::DataType::QASYMM8_SIGNED:
				266	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				267	static_cast<int8_t*>(memory));
				268	break;
				269	case arm_compute::DataType::F16:
				270	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				271	static_cast<armnn::Half*>(memory));
				272	break;
				273	case arm_compute::DataType::S16:
				274	case arm_compute::DataType::QSYMM16:
				275	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				276	static_cast<int16_t*>(memory));
				277	break;
				278	case arm_compute::DataType::S32:
				279	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				280	static_cast<int32_t*>(memory));
				281	break;
				282	default:
				283	{
				284	throw armnn::UnimplementedException();
				285	}
				286	}
				287	const_cast<armnn::ClImportTensorHandle*>(this)->Unmap();
				288	}
				289
				290	// Only used for testing
				291	void CopyInFrom(const void* memory) override
				292	{
				293	this->Map(true);
				294	switch(this->GetDataType())
				295	{
				296	case arm_compute::DataType::F32:
				297	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
				298	this->GetTensor());
				299	break;
				300	case arm_compute::DataType::U8:
				301	case arm_compute::DataType::QASYMM8:
				302	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
				303	this->GetTensor());
				304	break;
				305	case arm_compute::DataType::F16:
				306	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
				307	this->GetTensor());
				308	break;
				309	case arm_compute::DataType::S16:
				310	case arm_compute::DataType::QSYMM8_PER_CHANNEL:
				311	case arm_compute::DataType::QASYMM8_SIGNED:
				312	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
				313	this->GetTensor());
				314	break;
				315	case arm_compute::DataType::QSYMM16:
				316	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
				317	this->GetTensor());
				318	break;
				319	case arm_compute::DataType::S32:
				320	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
				321	this->GetTensor());
				322	break;
				323	default:
				324	{
				325	throw armnn::UnimplementedException();
				326	}
				327	}
				328	this->Unmap();
				329	}
				330
				331	arm_compute::CLTensor m_Tensor;
				332	MemorySourceFlags m_ImportFlags;
David Monahan	6642b8a	2021-11-04 16:31:46 +0000	[diff] [blame]	333	bool m_Imported;
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	334	};
				335
Narumol Prangnawarat	9ef3614	2022-01-25 15:15:34 +0000	[diff] [blame]	336	class ClImportSubTensorHandle : public IClTensorHandle
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	337	{
				338	public:
Narumol Prangnawarat	9ef3614	2022-01-25 15:15:34 +0000	[diff] [blame]	339	ClImportSubTensorHandle(IClTensorHandle* parent,
				340	const arm_compute::TensorShape& shape,
				341	const arm_compute::Coordinates& coords)
David Monahan	e4a41dc	2021-04-14 16:55:36 +0100	[diff] [blame]	342	: m_Tensor(&parent->GetTensor(), shape, coords)
				343	{
				344	parentHandle = parent;
				345	}
				346
				347	arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; }
				348	arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; }
				349
				350	virtual void Allocate() override {}
				351	virtual void Manage() override {}
				352
				353	virtual const void* Map(bool blocking = true) const override
				354	{
				355	IgnoreUnused(blocking);
				356	return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
				357	}
				358	virtual void Unmap() const override {}
				359
				360	virtual ITensorHandle* GetParent() const override { return parentHandle; }
				361
				362	virtual arm_compute::DataType GetDataType() const override
				363	{
				364	return m_Tensor.info()->data_type();
				365	}
				366
				367	virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
				368	{
				369	IgnoreUnused(memoryGroup);
				370	}
				371
				372	TensorShape GetStrides() const override
				373	{
				374	return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
				375	}
				376
				377	TensorShape GetShape() const override
				378	{
				379	return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
				380	}
				381
				382	private:
				383	// Only used for testing
				384	void CopyOutTo(void* memory) const override
				385	{
				386	const_cast<ClImportSubTensorHandle*>(this)->Map(true);
				387	switch(this->GetDataType())
				388	{
				389	case arm_compute::DataType::F32:
				390	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				391	static_cast<float*>(memory));
				392	break;
				393	case arm_compute::DataType::U8:
				394	case arm_compute::DataType::QASYMM8:
				395	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				396	static_cast<uint8_t*>(memory));
				397	break;
				398	case arm_compute::DataType::F16:
				399	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				400	static_cast<armnn::Half*>(memory));
				401	break;
				402	case arm_compute::DataType::QSYMM8_PER_CHANNEL:
				403	case arm_compute::DataType::QASYMM8_SIGNED:
				404	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				405	static_cast<int8_t*>(memory));
				406	break;
				407	case arm_compute::DataType::S16:
				408	case arm_compute::DataType::QSYMM16:
				409	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				410	static_cast<int16_t*>(memory));
				411	break;
				412	case arm_compute::DataType::S32:
				413	armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
				414	static_cast<int32_t*>(memory));
				415	break;
				416	default:
				417	{
				418	throw armnn::UnimplementedException();
				419	}
				420	}
				421	const_cast<ClImportSubTensorHandle*>(this)->Unmap();
				422	}
				423
				424	// Only used for testing
				425	void CopyInFrom(const void* memory) override
				426	{
				427	this->Map(true);
				428	switch(this->GetDataType())
				429	{
				430	case arm_compute::DataType::F32:
				431	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
				432	this->GetTensor());
				433	break;
				434	case arm_compute::DataType::U8:
				435	case arm_compute::DataType::QASYMM8:
				436	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
				437	this->GetTensor());
				438	break;
				439	case arm_compute::DataType::F16:
				440	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
				441	this->GetTensor());
				442	break;
				443	case arm_compute::DataType::QSYMM8_PER_CHANNEL:
				444	case arm_compute::DataType::QASYMM8_SIGNED:
				445	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
				446	this->GetTensor());
				447	break;
				448	case arm_compute::DataType::S16:
				449	case arm_compute::DataType::QSYMM16:
				450	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
				451	this->GetTensor());
				452	break;
				453	case arm_compute::DataType::S32:
				454	armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
				455	this->GetTensor());
				456	break;
				457	default:
				458	{
				459	throw armnn::UnimplementedException();
				460	}
				461	}
				462	this->Unmap();
				463	}
				464
				465	mutable arm_compute::CLSubTensor m_Tensor;
				466	ITensorHandle* parentHandle = nullptr;
				467	};
				468
				469	} // namespace armnn