blob: b863f08758ae461f10dd5103e8c0e69d8aa36841 [file] [log] [blame]
David Monahane4a41dc2021-04-14 16:55:36 +01001//
Colm Donelanb4ef1632024-02-01 15:00:43 +00002// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
David Monahane4a41dc2021-04-14 16:55:36 +01003// SPDX-License-Identifier: MIT
4//
5
6#pragma once
7
8#include <aclCommon/ArmComputeTensorHandle.hpp>
9#include <aclCommon/ArmComputeTensorUtils.hpp>
10
11#include <Half.hpp>
12
13#include <armnn/utility/PolymorphicDowncast.hpp>
14
15#include <arm_compute/runtime/CL/CLTensor.h>
16#include <arm_compute/runtime/CL/CLSubTensor.h>
17#include <arm_compute/runtime/IMemoryGroup.h>
18#include <arm_compute/runtime/MemoryGroup.h>
19#include <arm_compute/core/TensorShape.h>
20#include <arm_compute/core/Coordinates.h>
21
Cathal Corbettd9e55f02023-01-11 13:03:21 +000022#include <aclCommon/IClTensorHandle.hpp>
Narumol Prangnawarat9ef36142022-01-25 15:15:34 +000023
Francis Murtaghe73eda92021-05-21 13:36:54 +010024#include <CL/cl_ext.h>
David Monahane4a41dc2021-04-14 16:55:36 +010025#include <arm_compute/core/CL/CLKernelLibrary.h>
26
27namespace armnn
28{
29
Narumol Prangnawarat9ef36142022-01-25 15:15:34 +000030class ClImportTensorHandle : public IClTensorHandle
David Monahane4a41dc2021-04-14 16:55:36 +010031{
32public:
33 ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags)
34 : m_ImportFlags(importFlags)
35 {
36 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
37 }
38
39 ClImportTensorHandle(const TensorInfo& tensorInfo,
40 DataLayout dataLayout,
41 MemorySourceFlags importFlags)
David Monahan6642b8a2021-11-04 16:31:46 +000042 : m_ImportFlags(importFlags), m_Imported(false)
David Monahane4a41dc2021-04-14 16:55:36 +010043 {
44 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
45 }
46
47 arm_compute::CLTensor& GetTensor() override { return m_Tensor; }
48 arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; }
49 virtual void Allocate() override {}
50 virtual void Manage() override {}
51
52 virtual const void* Map(bool blocking = true) const override
53 {
54 IgnoreUnused(blocking);
55 return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
56 }
57
58 virtual void Unmap() const override {}
59
60 virtual ITensorHandle* GetParent() const override { return nullptr; }
61
62 virtual arm_compute::DataType GetDataType() const override
63 {
64 return m_Tensor.info()->data_type();
65 }
66
67 virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
68 {
69 IgnoreUnused(memoryGroup);
70 }
71
72 TensorShape GetStrides() const override
73 {
74 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
75 }
76
77 TensorShape GetShape() const override
78 {
79 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
80 }
81
82 void SetImportFlags(MemorySourceFlags importFlags)
83 {
84 m_ImportFlags = importFlags;
85 }
86
87 MemorySourceFlags GetImportFlags() const override
88 {
89 return m_ImportFlags;
90 }
91
92 virtual bool Import(void* memory, MemorySource source) override
93 {
94 if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
95 {
96 if (source == MemorySource::Malloc)
97 {
David Monahane4a41dc2021-04-14 16:55:36 +010098 const cl_import_properties_arm importProperties[] =
99 {
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100100 CL_IMPORT_TYPE_ARM,
101 CL_IMPORT_TYPE_HOST_ARM,
102 0
David Monahane4a41dc2021-04-14 16:55:36 +0100103 };
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100104 return ClImport(importProperties, memory);
105 }
106 if (source == MemorySource::DmaBuf)
107 {
108 const cl_import_properties_arm importProperties[] =
David Monahane4a41dc2021-04-14 16:55:36 +0100109 {
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100110 CL_IMPORT_TYPE_ARM,
111 CL_IMPORT_TYPE_DMA_BUF_ARM,
Francis Murtaghf5d5e6c2021-07-26 13:19:33 +0100112 CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
113 CL_TRUE,
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100114 0
115 };
David Monahane4a41dc2021-04-14 16:55:36 +0100116
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100117 return ClImport(importProperties, memory);
David Monahane4a41dc2021-04-14 16:55:36 +0100118
David Monahane4a41dc2021-04-14 16:55:36 +0100119 }
Francis Murtagh9db96e02021-08-13 16:15:09 +0100120 if (source == MemorySource::DmaBufProtected)
121 {
122 const cl_import_properties_arm importProperties[] =
123 {
124 CL_IMPORT_TYPE_ARM,
125 CL_IMPORT_TYPE_DMA_BUF_ARM,
126 CL_IMPORT_TYPE_PROTECTED_ARM,
127 CL_TRUE,
128 0
129 };
130
131 return ClImport(importProperties, memory, true);
132
133 }
David Monahan6642b8a2021-11-04 16:31:46 +0000134 // Case for importing memory allocated by OpenCl externally directly into the tensor
135 else if (source == MemorySource::Gralloc)
136 {
137 // m_Tensor not yet Allocated
138 if (!m_Imported && !m_Tensor.buffer())
139 {
140 // Importing memory allocated by OpenCl into the tensor directly.
141 arm_compute::Status status =
142 m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
143 m_Imported = bool(status);
144 if (!m_Imported)
145 {
146 throw MemoryImportException(status.error_description());
147 }
148 return m_Imported;
149 }
150
151 // m_Tensor.buffer() initially allocated with Allocate().
152 else if (!m_Imported && m_Tensor.buffer())
153 {
154 throw MemoryImportException(
155 "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
156 }
157
158 // m_Tensor.buffer() previously imported.
159 else if (m_Imported)
160 {
161 // Importing memory allocated by OpenCl into the tensor directly.
162 arm_compute::Status status =
163 m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
164 m_Imported = bool(status);
165 if (!m_Imported)
166 {
167 throw MemoryImportException(status.error_description());
168 }
169 return m_Imported;
170 }
171 else
172 {
173 throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory");
174 }
175 }
David Monahane4a41dc2021-04-14 16:55:36 +0100176 else
177 {
178 throw MemoryImportException("ClImportTensorHandle::Import flag is not supported");
179 }
180 }
181 else
182 {
183 throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
184 }
David Monahane4a41dc2021-04-14 16:55:36 +0100185 }
186
Sadik Armagana045ac02022-07-01 14:32:05 +0100187 virtual bool CanBeImported(void* /*memory*/, MemorySource source) override
Nikhil Raj60ab9762022-01-13 09:34:44 +0000188 {
189 if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
190 {
191 if (source == MemorySource::Malloc)
192 {
Sadik Armagana045ac02022-07-01 14:32:05 +0100193 // Returning true as ClImport() function will decide if memory can be imported or not
194 return true;
Nikhil Raj60ab9762022-01-13 09:34:44 +0000195 }
196 }
197 else
198 {
199 throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
200 }
201 return false;
202 }
203
David Monahane4a41dc2021-04-14 16:55:36 +0100204private:
Francis Murtagh9db96e02021-08-13 16:15:09 +0100205 bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false)
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100206 {
Jan Eilersc1c872f2021-07-22 13:17:04 +0100207 size_t totalBytes = m_Tensor.info()->total_size();
208
Nikhil Raj60ab9762022-01-13 09:34:44 +0000209 // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
210 // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
Jan Eilersc1c872f2021-07-22 13:17:04 +0100211 auto cachelineAlignment =
212 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000213 auto roundedSize = totalBytes;
214 if (totalBytes % cachelineAlignment != 0)
215 {
216 roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
217 }
Jan Eilersc1c872f2021-07-22 13:17:04 +0100218
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100219 cl_int error = CL_SUCCESS;
Francis Murtagh9db96e02021-08-13 16:15:09 +0100220 cl_mem buffer;
221 if (isProtected)
222 {
223 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
224 CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
225 }
226 else
227 {
228 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
229 CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
230 }
231
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100232 if (error != CL_SUCCESS)
233 {
Colm Donelan194086f2022-11-14 17:23:07 +0000234 throw MemoryImportException("ClImportTensorHandle::Invalid imported memory: " + std::to_string(error));
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100235 }
236
237 cl::Buffer wrappedBuffer(buffer);
238 arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer);
239
240 // Use the overloaded bool operator of Status to check if it is success, if not throw an exception
241 // with the Status error message
242 bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
243 if (!imported)
244 {
245 throw MemoryImportException(status.error_description());
246 }
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100247 return imported;
248 }
David Monahane4a41dc2021-04-14 16:55:36 +0100249 // Only used for testing
250 void CopyOutTo(void* memory) const override
251 {
252 const_cast<armnn::ClImportTensorHandle*>(this)->Map(true);
253 switch(this->GetDataType())
254 {
255 case arm_compute::DataType::F32:
256 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
257 static_cast<float*>(memory));
258 break;
259 case arm_compute::DataType::U8:
260 case arm_compute::DataType::QASYMM8:
261 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
262 static_cast<uint8_t*>(memory));
263 break;
264 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
265 case arm_compute::DataType::QASYMM8_SIGNED:
266 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
267 static_cast<int8_t*>(memory));
268 break;
269 case arm_compute::DataType::F16:
270 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
271 static_cast<armnn::Half*>(memory));
272 break;
273 case arm_compute::DataType::S16:
274 case arm_compute::DataType::QSYMM16:
275 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
276 static_cast<int16_t*>(memory));
277 break;
278 case arm_compute::DataType::S32:
279 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
280 static_cast<int32_t*>(memory));
281 break;
282 default:
283 {
284 throw armnn::UnimplementedException();
285 }
286 }
287 const_cast<armnn::ClImportTensorHandle*>(this)->Unmap();
288 }
289
290 // Only used for testing
291 void CopyInFrom(const void* memory) override
292 {
293 this->Map(true);
294 switch(this->GetDataType())
295 {
296 case arm_compute::DataType::F32:
297 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
298 this->GetTensor());
299 break;
300 case arm_compute::DataType::U8:
301 case arm_compute::DataType::QASYMM8:
302 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
303 this->GetTensor());
304 break;
305 case arm_compute::DataType::F16:
306 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
307 this->GetTensor());
308 break;
309 case arm_compute::DataType::S16:
310 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
311 case arm_compute::DataType::QASYMM8_SIGNED:
312 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
313 this->GetTensor());
314 break;
315 case arm_compute::DataType::QSYMM16:
316 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
317 this->GetTensor());
318 break;
319 case arm_compute::DataType::S32:
320 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
321 this->GetTensor());
322 break;
323 default:
324 {
325 throw armnn::UnimplementedException();
326 }
327 }
328 this->Unmap();
329 }
330
331 arm_compute::CLTensor m_Tensor;
332 MemorySourceFlags m_ImportFlags;
David Monahan6642b8a2021-11-04 16:31:46 +0000333 bool m_Imported;
David Monahane4a41dc2021-04-14 16:55:36 +0100334};
335
Narumol Prangnawarat9ef36142022-01-25 15:15:34 +0000336class ClImportSubTensorHandle : public IClTensorHandle
David Monahane4a41dc2021-04-14 16:55:36 +0100337{
338public:
Narumol Prangnawarat9ef36142022-01-25 15:15:34 +0000339 ClImportSubTensorHandle(IClTensorHandle* parent,
340 const arm_compute::TensorShape& shape,
341 const arm_compute::Coordinates& coords)
David Monahane4a41dc2021-04-14 16:55:36 +0100342 : m_Tensor(&parent->GetTensor(), shape, coords)
343 {
344 parentHandle = parent;
345 }
346
347 arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; }
348 arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; }
349
350 virtual void Allocate() override {}
351 virtual void Manage() override {}
352
353 virtual const void* Map(bool blocking = true) const override
354 {
355 IgnoreUnused(blocking);
356 return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
357 }
358 virtual void Unmap() const override {}
359
360 virtual ITensorHandle* GetParent() const override { return parentHandle; }
361
362 virtual arm_compute::DataType GetDataType() const override
363 {
364 return m_Tensor.info()->data_type();
365 }
366
367 virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
368 {
369 IgnoreUnused(memoryGroup);
370 }
371
372 TensorShape GetStrides() const override
373 {
374 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
375 }
376
377 TensorShape GetShape() const override
378 {
379 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
380 }
381
382private:
383 // Only used for testing
384 void CopyOutTo(void* memory) const override
385 {
386 const_cast<ClImportSubTensorHandle*>(this)->Map(true);
387 switch(this->GetDataType())
388 {
389 case arm_compute::DataType::F32:
390 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
391 static_cast<float*>(memory));
392 break;
393 case arm_compute::DataType::U8:
394 case arm_compute::DataType::QASYMM8:
395 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
396 static_cast<uint8_t*>(memory));
397 break;
398 case arm_compute::DataType::F16:
399 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
400 static_cast<armnn::Half*>(memory));
401 break;
402 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
403 case arm_compute::DataType::QASYMM8_SIGNED:
404 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
405 static_cast<int8_t*>(memory));
406 break;
407 case arm_compute::DataType::S16:
408 case arm_compute::DataType::QSYMM16:
409 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
410 static_cast<int16_t*>(memory));
411 break;
412 case arm_compute::DataType::S32:
413 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
414 static_cast<int32_t*>(memory));
415 break;
416 default:
417 {
418 throw armnn::UnimplementedException();
419 }
420 }
421 const_cast<ClImportSubTensorHandle*>(this)->Unmap();
422 }
423
424 // Only used for testing
425 void CopyInFrom(const void* memory) override
426 {
427 this->Map(true);
428 switch(this->GetDataType())
429 {
430 case arm_compute::DataType::F32:
431 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
432 this->GetTensor());
433 break;
434 case arm_compute::DataType::U8:
435 case arm_compute::DataType::QASYMM8:
436 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
437 this->GetTensor());
438 break;
439 case arm_compute::DataType::F16:
440 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
441 this->GetTensor());
442 break;
443 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
444 case arm_compute::DataType::QASYMM8_SIGNED:
445 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
446 this->GetTensor());
447 break;
448 case arm_compute::DataType::S16:
449 case arm_compute::DataType::QSYMM16:
450 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
451 this->GetTensor());
452 break;
453 case arm_compute::DataType::S32:
454 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
455 this->GetTensor());
456 break;
457 default:
458 {
459 throw armnn::UnimplementedException();
460 }
461 }
462 this->Unmap();
463 }
464
465 mutable arm_compute::CLSubTensor m_Tensor;
466 ITensorHandle* parentHandle = nullptr;
467};
468
469} // namespace armnn