blob: 54710d813570d29055771368992ab2b8c188bf18 [file] [log] [blame]
David Monahane4a41dc2021-04-14 16:55:36 +01001//
2// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#pragma once
7
8#include <aclCommon/ArmComputeTensorHandle.hpp>
9#include <aclCommon/ArmComputeTensorUtils.hpp>
10
11#include <Half.hpp>
12
13#include <armnn/utility/PolymorphicDowncast.hpp>
14
15#include <arm_compute/runtime/CL/CLTensor.h>
16#include <arm_compute/runtime/CL/CLSubTensor.h>
17#include <arm_compute/runtime/IMemoryGroup.h>
18#include <arm_compute/runtime/MemoryGroup.h>
19#include <arm_compute/core/TensorShape.h>
20#include <arm_compute/core/Coordinates.h>
21
Narumol Prangnawarat9ef36142022-01-25 15:15:34 +000022#include <cl/IClTensorHandle.hpp>
23
Francis Murtaghe73eda92021-05-21 13:36:54 +010024#include <CL/cl_ext.h>
David Monahane4a41dc2021-04-14 16:55:36 +010025#include <arm_compute/core/CL/CLKernelLibrary.h>
26
27namespace armnn
28{
29
Narumol Prangnawarat9ef36142022-01-25 15:15:34 +000030class ClImportTensorHandle : public IClTensorHandle
David Monahane4a41dc2021-04-14 16:55:36 +010031{
32public:
33 ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags)
34 : m_ImportFlags(importFlags)
35 {
36 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
37 }
38
39 ClImportTensorHandle(const TensorInfo& tensorInfo,
40 DataLayout dataLayout,
41 MemorySourceFlags importFlags)
David Monahan6642b8a2021-11-04 16:31:46 +000042 : m_ImportFlags(importFlags), m_Imported(false)
David Monahane4a41dc2021-04-14 16:55:36 +010043 {
44 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
45 }
46
47 arm_compute::CLTensor& GetTensor() override { return m_Tensor; }
48 arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; }
49 virtual void Allocate() override {}
50 virtual void Manage() override {}
51
52 virtual const void* Map(bool blocking = true) const override
53 {
54 IgnoreUnused(blocking);
55 return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
56 }
57
58 virtual void Unmap() const override {}
59
60 virtual ITensorHandle* GetParent() const override { return nullptr; }
61
62 virtual arm_compute::DataType GetDataType() const override
63 {
64 return m_Tensor.info()->data_type();
65 }
66
67 virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
68 {
69 IgnoreUnused(memoryGroup);
70 }
71
72 TensorShape GetStrides() const override
73 {
74 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
75 }
76
77 TensorShape GetShape() const override
78 {
79 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
80 }
81
82 void SetImportFlags(MemorySourceFlags importFlags)
83 {
84 m_ImportFlags = importFlags;
85 }
86
87 MemorySourceFlags GetImportFlags() const override
88 {
89 return m_ImportFlags;
90 }
91
92 virtual bool Import(void* memory, MemorySource source) override
93 {
94 if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
95 {
96 if (source == MemorySource::Malloc)
97 {
David Monahane4a41dc2021-04-14 16:55:36 +010098 const cl_import_properties_arm importProperties[] =
99 {
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100100 CL_IMPORT_TYPE_ARM,
101 CL_IMPORT_TYPE_HOST_ARM,
102 0
David Monahane4a41dc2021-04-14 16:55:36 +0100103 };
104
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100105 return ClImport(importProperties, memory);
106 }
107 if (source == MemorySource::DmaBuf)
108 {
109 const cl_import_properties_arm importProperties[] =
David Monahane4a41dc2021-04-14 16:55:36 +0100110 {
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100111 CL_IMPORT_TYPE_ARM,
112 CL_IMPORT_TYPE_DMA_BUF_ARM,
Francis Murtaghf5d5e6c2021-07-26 13:19:33 +0100113 CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
114 CL_TRUE,
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100115 0
116 };
David Monahane4a41dc2021-04-14 16:55:36 +0100117
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100118 return ClImport(importProperties, memory);
David Monahane4a41dc2021-04-14 16:55:36 +0100119
David Monahane4a41dc2021-04-14 16:55:36 +0100120 }
Francis Murtagh9db96e02021-08-13 16:15:09 +0100121 if (source == MemorySource::DmaBufProtected)
122 {
123 const cl_import_properties_arm importProperties[] =
124 {
125 CL_IMPORT_TYPE_ARM,
126 CL_IMPORT_TYPE_DMA_BUF_ARM,
127 CL_IMPORT_TYPE_PROTECTED_ARM,
128 CL_TRUE,
129 0
130 };
131
132 return ClImport(importProperties, memory, true);
133
134 }
David Monahan6642b8a2021-11-04 16:31:46 +0000135 // Case for importing memory allocated by OpenCl externally directly into the tensor
136 else if (source == MemorySource::Gralloc)
137 {
138 // m_Tensor not yet Allocated
139 if (!m_Imported && !m_Tensor.buffer())
140 {
141 // Importing memory allocated by OpenCl into the tensor directly.
142 arm_compute::Status status =
143 m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
144 m_Imported = bool(status);
145 if (!m_Imported)
146 {
147 throw MemoryImportException(status.error_description());
148 }
149 return m_Imported;
150 }
151
152 // m_Tensor.buffer() initially allocated with Allocate().
153 else if (!m_Imported && m_Tensor.buffer())
154 {
155 throw MemoryImportException(
156 "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
157 }
158
159 // m_Tensor.buffer() previously imported.
160 else if (m_Imported)
161 {
162 // Importing memory allocated by OpenCl into the tensor directly.
163 arm_compute::Status status =
164 m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
165 m_Imported = bool(status);
166 if (!m_Imported)
167 {
168 throw MemoryImportException(status.error_description());
169 }
170 return m_Imported;
171 }
172 else
173 {
174 throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory");
175 }
176 }
David Monahane4a41dc2021-04-14 16:55:36 +0100177 else
178 {
179 throw MemoryImportException("ClImportTensorHandle::Import flag is not supported");
180 }
181 }
182 else
183 {
184 throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
185 }
David Monahane4a41dc2021-04-14 16:55:36 +0100186 }
187
Nikhil Raj60ab9762022-01-13 09:34:44 +0000188 virtual bool CanBeImported(void* memory, MemorySource source) override
189 {
190 if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
191 {
192 if (source == MemorySource::Malloc)
193 {
194 const cl_import_properties_arm importProperties[] =
195 {
196 CL_IMPORT_TYPE_ARM,
197 CL_IMPORT_TYPE_HOST_ARM,
198 0
199 };
200
201 size_t totalBytes = m_Tensor.info()->total_size();
202
203 // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
204 // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
205 // We do this to match the behaviour of the Import function later on.
206 auto cachelineAlignment =
207 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000208 auto roundedSize = totalBytes;
209 if (totalBytes % cachelineAlignment != 0)
210 {
211 roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
212 }
Nikhil Raj60ab9762022-01-13 09:34:44 +0000213
214 cl_int error = CL_SUCCESS;
215 cl_mem buffer;
216 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
217 CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
218
219 // If we fail to map we know the import will not succeed and can return false.
220 // There is no memory to be released if error is not CL_SUCCESS
221 if (error != CL_SUCCESS)
222 {
223 return false;
224 }
225 else
226 {
227 // If import was successful we can release the mapping knowing import will succeed at workload
228 // execution and return true
229 error = clReleaseMemObject(buffer);
230 if (error == CL_SUCCESS)
231 {
232 return true;
233 }
234 else
235 {
236 // If we couldn't release the mapping this constitutes a memory leak and throw an exception
237 throw MemoryImportException("ClImportTensorHandle::Failed to unmap cl_mem buffer: "
238 + std::to_string(error));
239 }
240 }
241 }
242 }
243 else
244 {
245 throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
246 }
247 return false;
248 }
249
David Monahane4a41dc2021-04-14 16:55:36 +0100250private:
Francis Murtagh9db96e02021-08-13 16:15:09 +0100251 bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false)
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100252 {
Jan Eilersc1c872f2021-07-22 13:17:04 +0100253 size_t totalBytes = m_Tensor.info()->total_size();
254
Nikhil Raj60ab9762022-01-13 09:34:44 +0000255 // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
256 // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
Jan Eilersc1c872f2021-07-22 13:17:04 +0100257 auto cachelineAlignment =
258 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
Narumol Prangnawarate2af6f42022-01-28 17:59:18 +0000259 auto roundedSize = totalBytes;
260 if (totalBytes % cachelineAlignment != 0)
261 {
262 roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
263 }
Jan Eilersc1c872f2021-07-22 13:17:04 +0100264
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100265 cl_int error = CL_SUCCESS;
Francis Murtagh9db96e02021-08-13 16:15:09 +0100266 cl_mem buffer;
267 if (isProtected)
268 {
269 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
270 CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
271 }
272 else
273 {
274 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
275 CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
276 }
277
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100278 if (error != CL_SUCCESS)
279 {
Francis Murtaghf5d5e6c2021-07-26 13:19:33 +0100280 throw MemoryImportException("ClImportTensorHandle::Invalid imported memory" + std::to_string(error));
Narumol Prangnawaratff9a29d2021-05-10 11:02:58 +0100281 }
282
283 cl::Buffer wrappedBuffer(buffer);
284 arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer);
285
286 // Use the overloaded bool operator of Status to check if it is success, if not throw an exception
287 // with the Status error message
288 bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
289 if (!imported)
290 {
291 throw MemoryImportException(status.error_description());
292 }
293
294 ARMNN_ASSERT(!m_Tensor.info()->is_resizable());
295 return imported;
296 }
David Monahane4a41dc2021-04-14 16:55:36 +0100297 // Only used for testing
298 void CopyOutTo(void* memory) const override
299 {
300 const_cast<armnn::ClImportTensorHandle*>(this)->Map(true);
301 switch(this->GetDataType())
302 {
303 case arm_compute::DataType::F32:
304 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
305 static_cast<float*>(memory));
306 break;
307 case arm_compute::DataType::U8:
308 case arm_compute::DataType::QASYMM8:
309 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
310 static_cast<uint8_t*>(memory));
311 break;
312 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
313 case arm_compute::DataType::QASYMM8_SIGNED:
314 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
315 static_cast<int8_t*>(memory));
316 break;
317 case arm_compute::DataType::F16:
318 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
319 static_cast<armnn::Half*>(memory));
320 break;
321 case arm_compute::DataType::S16:
322 case arm_compute::DataType::QSYMM16:
323 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
324 static_cast<int16_t*>(memory));
325 break;
326 case arm_compute::DataType::S32:
327 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
328 static_cast<int32_t*>(memory));
329 break;
330 default:
331 {
332 throw armnn::UnimplementedException();
333 }
334 }
335 const_cast<armnn::ClImportTensorHandle*>(this)->Unmap();
336 }
337
338 // Only used for testing
339 void CopyInFrom(const void* memory) override
340 {
341 this->Map(true);
342 switch(this->GetDataType())
343 {
344 case arm_compute::DataType::F32:
345 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
346 this->GetTensor());
347 break;
348 case arm_compute::DataType::U8:
349 case arm_compute::DataType::QASYMM8:
350 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
351 this->GetTensor());
352 break;
353 case arm_compute::DataType::F16:
354 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
355 this->GetTensor());
356 break;
357 case arm_compute::DataType::S16:
358 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
359 case arm_compute::DataType::QASYMM8_SIGNED:
360 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
361 this->GetTensor());
362 break;
363 case arm_compute::DataType::QSYMM16:
364 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
365 this->GetTensor());
366 break;
367 case arm_compute::DataType::S32:
368 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
369 this->GetTensor());
370 break;
371 default:
372 {
373 throw armnn::UnimplementedException();
374 }
375 }
376 this->Unmap();
377 }
378
379 arm_compute::CLTensor m_Tensor;
380 MemorySourceFlags m_ImportFlags;
David Monahan6642b8a2021-11-04 16:31:46 +0000381 bool m_Imported;
David Monahane4a41dc2021-04-14 16:55:36 +0100382};
383
Narumol Prangnawarat9ef36142022-01-25 15:15:34 +0000384class ClImportSubTensorHandle : public IClTensorHandle
David Monahane4a41dc2021-04-14 16:55:36 +0100385{
386public:
Narumol Prangnawarat9ef36142022-01-25 15:15:34 +0000387 ClImportSubTensorHandle(IClTensorHandle* parent,
388 const arm_compute::TensorShape& shape,
389 const arm_compute::Coordinates& coords)
David Monahane4a41dc2021-04-14 16:55:36 +0100390 : m_Tensor(&parent->GetTensor(), shape, coords)
391 {
392 parentHandle = parent;
393 }
394
395 arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; }
396 arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; }
397
398 virtual void Allocate() override {}
399 virtual void Manage() override {}
400
401 virtual const void* Map(bool blocking = true) const override
402 {
403 IgnoreUnused(blocking);
404 return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
405 }
406 virtual void Unmap() const override {}
407
408 virtual ITensorHandle* GetParent() const override { return parentHandle; }
409
410 virtual arm_compute::DataType GetDataType() const override
411 {
412 return m_Tensor.info()->data_type();
413 }
414
415 virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
416 {
417 IgnoreUnused(memoryGroup);
418 }
419
420 TensorShape GetStrides() const override
421 {
422 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
423 }
424
425 TensorShape GetShape() const override
426 {
427 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
428 }
429
430private:
431 // Only used for testing
432 void CopyOutTo(void* memory) const override
433 {
434 const_cast<ClImportSubTensorHandle*>(this)->Map(true);
435 switch(this->GetDataType())
436 {
437 case arm_compute::DataType::F32:
438 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
439 static_cast<float*>(memory));
440 break;
441 case arm_compute::DataType::U8:
442 case arm_compute::DataType::QASYMM8:
443 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
444 static_cast<uint8_t*>(memory));
445 break;
446 case arm_compute::DataType::F16:
447 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
448 static_cast<armnn::Half*>(memory));
449 break;
450 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
451 case arm_compute::DataType::QASYMM8_SIGNED:
452 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
453 static_cast<int8_t*>(memory));
454 break;
455 case arm_compute::DataType::S16:
456 case arm_compute::DataType::QSYMM16:
457 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
458 static_cast<int16_t*>(memory));
459 break;
460 case arm_compute::DataType::S32:
461 armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
462 static_cast<int32_t*>(memory));
463 break;
464 default:
465 {
466 throw armnn::UnimplementedException();
467 }
468 }
469 const_cast<ClImportSubTensorHandle*>(this)->Unmap();
470 }
471
472 // Only used for testing
473 void CopyInFrom(const void* memory) override
474 {
475 this->Map(true);
476 switch(this->GetDataType())
477 {
478 case arm_compute::DataType::F32:
479 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
480 this->GetTensor());
481 break;
482 case arm_compute::DataType::U8:
483 case arm_compute::DataType::QASYMM8:
484 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
485 this->GetTensor());
486 break;
487 case arm_compute::DataType::F16:
488 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
489 this->GetTensor());
490 break;
491 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
492 case arm_compute::DataType::QASYMM8_SIGNED:
493 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
494 this->GetTensor());
495 break;
496 case arm_compute::DataType::S16:
497 case arm_compute::DataType::QSYMM16:
498 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
499 this->GetTensor());
500 break;
501 case arm_compute::DataType::S32:
502 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
503 this->GetTensor());
504 break;
505 default:
506 {
507 throw armnn::UnimplementedException();
508 }
509 }
510 this->Unmap();
511 }
512
513 mutable arm_compute::CLSubTensor m_Tensor;
514 ITensorHandle* parentHandle = nullptr;
515};
516
517} // namespace armnn