blob: b863f08758ae461f10dd5103e8c0e69d8aa36841 [file] [log] [blame]
//
// Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
#include <aclCommon/ArmComputeTensorHandle.hpp>
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include <Half.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <arm_compute/runtime/CL/CLTensor.h>
#include <arm_compute/runtime/CL/CLSubTensor.h>
#include <arm_compute/runtime/IMemoryGroup.h>
#include <arm_compute/runtime/MemoryGroup.h>
#include <arm_compute/core/TensorShape.h>
#include <arm_compute/core/Coordinates.h>
#include <aclCommon/IClTensorHandle.hpp>
#include <CL/cl_ext.h>
#include <arm_compute/core/CL/CLKernelLibrary.h>
namespace armnn
{
class ClImportTensorHandle : public IClTensorHandle
{
public:
ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags)
: m_ImportFlags(importFlags)
{
armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
}
ClImportTensorHandle(const TensorInfo& tensorInfo,
DataLayout dataLayout,
MemorySourceFlags importFlags)
: m_ImportFlags(importFlags), m_Imported(false)
{
armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
}
arm_compute::CLTensor& GetTensor() override { return m_Tensor; }
arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; }
virtual void Allocate() override {}
virtual void Manage() override {}
virtual const void* Map(bool blocking = true) const override
{
IgnoreUnused(blocking);
return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
}
virtual void Unmap() const override {}
virtual ITensorHandle* GetParent() const override { return nullptr; }
virtual arm_compute::DataType GetDataType() const override
{
return m_Tensor.info()->data_type();
}
virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
{
IgnoreUnused(memoryGroup);
}
TensorShape GetStrides() const override
{
return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
}
TensorShape GetShape() const override
{
return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
}
void SetImportFlags(MemorySourceFlags importFlags)
{
m_ImportFlags = importFlags;
}
MemorySourceFlags GetImportFlags() const override
{
return m_ImportFlags;
}
virtual bool Import(void* memory, MemorySource source) override
{
if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
{
if (source == MemorySource::Malloc)
{
const cl_import_properties_arm importProperties[] =
{
CL_IMPORT_TYPE_ARM,
CL_IMPORT_TYPE_HOST_ARM,
0
};
return ClImport(importProperties, memory);
}
if (source == MemorySource::DmaBuf)
{
const cl_import_properties_arm importProperties[] =
{
CL_IMPORT_TYPE_ARM,
CL_IMPORT_TYPE_DMA_BUF_ARM,
CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
CL_TRUE,
0
};
return ClImport(importProperties, memory);
}
if (source == MemorySource::DmaBufProtected)
{
const cl_import_properties_arm importProperties[] =
{
CL_IMPORT_TYPE_ARM,
CL_IMPORT_TYPE_DMA_BUF_ARM,
CL_IMPORT_TYPE_PROTECTED_ARM,
CL_TRUE,
0
};
return ClImport(importProperties, memory, true);
}
// Case for importing memory allocated by OpenCl externally directly into the tensor
else if (source == MemorySource::Gralloc)
{
// m_Tensor not yet Allocated
if (!m_Imported && !m_Tensor.buffer())
{
// Importing memory allocated by OpenCl into the tensor directly.
arm_compute::Status status =
m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
m_Imported = bool(status);
if (!m_Imported)
{
throw MemoryImportException(status.error_description());
}
return m_Imported;
}
// m_Tensor.buffer() initially allocated with Allocate().
else if (!m_Imported && m_Tensor.buffer())
{
throw MemoryImportException(
"ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
}
// m_Tensor.buffer() previously imported.
else if (m_Imported)
{
// Importing memory allocated by OpenCl into the tensor directly.
arm_compute::Status status =
m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
m_Imported = bool(status);
if (!m_Imported)
{
throw MemoryImportException(status.error_description());
}
return m_Imported;
}
else
{
throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory");
}
}
else
{
throw MemoryImportException("ClImportTensorHandle::Import flag is not supported");
}
}
else
{
throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
}
}
virtual bool CanBeImported(void* /*memory*/, MemorySource source) override
{
if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
{
if (source == MemorySource::Malloc)
{
// Returning true as ClImport() function will decide if memory can be imported or not
return true;
}
}
else
{
throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
}
return false;
}
private:
bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false)
{
size_t totalBytes = m_Tensor.info()->total_size();
// Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
// This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
auto cachelineAlignment =
arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
auto roundedSize = totalBytes;
if (totalBytes % cachelineAlignment != 0)
{
roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
}
cl_int error = CL_SUCCESS;
cl_mem buffer;
if (isProtected)
{
buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
}
else
{
buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
}
if (error != CL_SUCCESS)
{
throw MemoryImportException("ClImportTensorHandle::Invalid imported memory: " + std::to_string(error));
}
cl::Buffer wrappedBuffer(buffer);
arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer);
// Use the overloaded bool operator of Status to check if it is success, if not throw an exception
// with the Status error message
bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
if (!imported)
{
throw MemoryImportException(status.error_description());
}
return imported;
}
// Only used for testing
void CopyOutTo(void* memory) const override
{
const_cast<armnn::ClImportTensorHandle*>(this)->Map(true);
switch(this->GetDataType())
{
case arm_compute::DataType::F32:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<float*>(memory));
break;
case arm_compute::DataType::U8:
case arm_compute::DataType::QASYMM8:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<uint8_t*>(memory));
break;
case arm_compute::DataType::QSYMM8_PER_CHANNEL:
case arm_compute::DataType::QASYMM8_SIGNED:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int8_t*>(memory));
break;
case arm_compute::DataType::F16:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<armnn::Half*>(memory));
break;
case arm_compute::DataType::S16:
case arm_compute::DataType::QSYMM16:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int16_t*>(memory));
break;
case arm_compute::DataType::S32:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int32_t*>(memory));
break;
default:
{
throw armnn::UnimplementedException();
}
}
const_cast<armnn::ClImportTensorHandle*>(this)->Unmap();
}
// Only used for testing
void CopyInFrom(const void* memory) override
{
this->Map(true);
switch(this->GetDataType())
{
case arm_compute::DataType::F32:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::U8:
case arm_compute::DataType::QASYMM8:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::F16:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::S16:
case arm_compute::DataType::QSYMM8_PER_CHANNEL:
case arm_compute::DataType::QASYMM8_SIGNED:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::QSYMM16:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::S32:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
this->GetTensor());
break;
default:
{
throw armnn::UnimplementedException();
}
}
this->Unmap();
}
arm_compute::CLTensor m_Tensor;
MemorySourceFlags m_ImportFlags;
bool m_Imported;
};
class ClImportSubTensorHandle : public IClTensorHandle
{
public:
ClImportSubTensorHandle(IClTensorHandle* parent,
const arm_compute::TensorShape& shape,
const arm_compute::Coordinates& coords)
: m_Tensor(&parent->GetTensor(), shape, coords)
{
parentHandle = parent;
}
arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; }
arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; }
virtual void Allocate() override {}
virtual void Manage() override {}
virtual const void* Map(bool blocking = true) const override
{
IgnoreUnused(blocking);
return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
}
virtual void Unmap() const override {}
virtual ITensorHandle* GetParent() const override { return parentHandle; }
virtual arm_compute::DataType GetDataType() const override
{
return m_Tensor.info()->data_type();
}
virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
{
IgnoreUnused(memoryGroup);
}
TensorShape GetStrides() const override
{
return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
}
TensorShape GetShape() const override
{
return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
}
private:
// Only used for testing
void CopyOutTo(void* memory) const override
{
const_cast<ClImportSubTensorHandle*>(this)->Map(true);
switch(this->GetDataType())
{
case arm_compute::DataType::F32:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<float*>(memory));
break;
case arm_compute::DataType::U8:
case arm_compute::DataType::QASYMM8:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<uint8_t*>(memory));
break;
case arm_compute::DataType::F16:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<armnn::Half*>(memory));
break;
case arm_compute::DataType::QSYMM8_PER_CHANNEL:
case arm_compute::DataType::QASYMM8_SIGNED:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int8_t*>(memory));
break;
case arm_compute::DataType::S16:
case arm_compute::DataType::QSYMM16:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int16_t*>(memory));
break;
case arm_compute::DataType::S32:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int32_t*>(memory));
break;
default:
{
throw armnn::UnimplementedException();
}
}
const_cast<ClImportSubTensorHandle*>(this)->Unmap();
}
// Only used for testing
void CopyInFrom(const void* memory) override
{
this->Map(true);
switch(this->GetDataType())
{
case arm_compute::DataType::F32:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::U8:
case arm_compute::DataType::QASYMM8:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::F16:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::QSYMM8_PER_CHANNEL:
case arm_compute::DataType::QASYMM8_SIGNED:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::S16:
case arm_compute::DataType::QSYMM16:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::S32:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
this->GetTensor());
break;
default:
{
throw armnn::UnimplementedException();
}
}
this->Unmap();
}
mutable arm_compute::CLSubTensor m_Tensor;
ITensorHandle* parentHandle = nullptr;
};
} // namespace armnn