blob: 303676043e0037ca6251e367ac5b178e97c24736 [file] [log] [blame]
//
// Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
#include <BFloat16.hpp>
#include <Half.hpp>
#include <aclCommon/ArmComputeTensorHandle.hpp>
#include <aclCommon/ArmComputeTensorUtils.hpp>
#include <armnn/Exceptions.hpp>
#include <armnn/utility/PolymorphicDowncast.hpp>
#include <arm_compute/runtime/MemoryGroup.h>
#include <arm_compute/runtime/IMemoryGroup.h>
#include <arm_compute/runtime/Tensor.h>
#include <arm_compute/runtime/SubTensor.h>
#include <arm_compute/core/TensorShape.h>
#include <arm_compute/core/Coordinates.h>
#include "armnn/TypesUtils.hpp"
namespace armnn
{
class NeonTensorHandleDecorator;
class NeonTensorHandle : public IAclTensorHandle
{
public:
NeonTensorHandle(const TensorInfo& tensorInfo)
: m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Malloc)),
m_Imported(false),
m_IsImportEnabled(false),
m_TypeAlignment(GetDataTypeSize(tensorInfo.GetDataType()))
{
armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
}
NeonTensorHandle(const TensorInfo& tensorInfo,
DataLayout dataLayout,
MemorySourceFlags importFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc))
: m_ImportFlags(importFlags),
m_Imported(false),
m_IsImportEnabled(false),
m_TypeAlignment(GetDataTypeSize(tensorInfo.GetDataType()))
{
armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
}
arm_compute::ITensor& GetTensor() override { return m_Tensor; }
arm_compute::ITensor const& GetTensor() const override { return m_Tensor; }
virtual void Allocate() override
{
// If we have enabled Importing, don't Allocate the tensor
if (!m_IsImportEnabled)
{
armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor);
}
};
virtual void Manage() override
{
// If we have enabled Importing, don't manage the tensor
if (!m_IsImportEnabled)
{
ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(m_MemoryGroup, "arm_compute::MemoryGroup is null.");
m_MemoryGroup->manage(&m_Tensor);
}
}
virtual ITensorHandle* GetParent() const override { return nullptr; }
virtual arm_compute::DataType GetDataType() const override
{
return m_Tensor.info()->data_type();
}
virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
{
m_MemoryGroup = PolymorphicPointerDowncast<arm_compute::MemoryGroup>(memoryGroup);
}
virtual const void* Map(bool /* blocking = true */) const override
{
return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
}
virtual void Unmap() const override {}
TensorShape GetStrides() const override
{
return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
}
TensorShape GetShape() const override
{
return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
}
void SetImportFlags(MemorySourceFlags importFlags)
{
m_ImportFlags = importFlags;
}
MemorySourceFlags GetImportFlags() const override
{
return m_ImportFlags;
}
void SetImportEnabledFlag(bool importEnabledFlag)
{
m_IsImportEnabled = importEnabledFlag;
}
bool CanBeImported(void* memory, MemorySource source) override
{
if (source != MemorySource::Malloc || reinterpret_cast<uintptr_t>(memory) % m_TypeAlignment)
{
return false;
}
return true;
}
virtual bool Import(void* memory, MemorySource source) override
{
if (m_ImportFlags& static_cast<MemorySourceFlags>(source))
{
if (source == MemorySource::Malloc && m_IsImportEnabled)
{
if (!CanBeImported(memory, source))
{
throw MemoryImportException("NeonTensorHandle::Import Attempting to import unaligned memory");
}
// m_Tensor not yet Allocated
if (!m_Imported && !m_Tensor.buffer())
{
arm_compute::Status status = m_Tensor.allocator()->import_memory(memory);
// Use the overloaded bool operator of Status to check if it worked, if not throw an exception
// with the Status error message
m_Imported = bool(status);
if (!m_Imported)
{
throw MemoryImportException(status.error_description());
}
return m_Imported;
}
// m_Tensor.buffer() initially allocated with Allocate().
if (!m_Imported && m_Tensor.buffer())
{
throw MemoryImportException(
"NeonTensorHandle::Import Attempting to import on an already allocated tensor");
}
// m_Tensor.buffer() previously imported.
if (m_Imported)
{
arm_compute::Status status = m_Tensor.allocator()->import_memory(memory);
// Use the overloaded bool operator of Status to check if it worked, if not throw an exception
// with the Status error message
m_Imported = bool(status);
if (!m_Imported)
{
throw MemoryImportException(status.error_description());
}
return m_Imported;
}
}
else
{
throw MemoryImportException("NeonTensorHandle::Import is disabled");
}
}
else
{
throw MemoryImportException("NeonTensorHandle::Incorrect import flag");
}
return false;
}
virtual std::shared_ptr<ITensorHandle> DecorateTensorHandle(const TensorInfo& tensorInfo) override;
private:
// Only used for testing
void CopyOutTo(void* memory) const override
{
switch (this->GetDataType())
{
case arm_compute::DataType::F32:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<float*>(memory));
break;
case arm_compute::DataType::U8:
case arm_compute::DataType::QASYMM8:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<uint8_t*>(memory));
break;
case arm_compute::DataType::QSYMM8:
case arm_compute::DataType::QASYMM8_SIGNED:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int8_t*>(memory));
break;
case arm_compute::DataType::BFLOAT16:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<armnn::BFloat16*>(memory));
break;
case arm_compute::DataType::F16:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<armnn::Half*>(memory));
break;
case arm_compute::DataType::S16:
case arm_compute::DataType::QSYMM16:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int16_t*>(memory));
break;
case arm_compute::DataType::S32:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int32_t*>(memory));
break;
default:
{
throw armnn::UnimplementedException();
}
}
}
// Only used for testing
void CopyInFrom(const void* memory) override
{
switch (this->GetDataType())
{
case arm_compute::DataType::F32:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::U8:
case arm_compute::DataType::QASYMM8:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::QSYMM8:
case arm_compute::DataType::QASYMM8_SIGNED:
case arm_compute::DataType::QSYMM8_PER_CHANNEL:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::BFLOAT16:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::BFloat16*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::F16:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::S16:
case arm_compute::DataType::QSYMM16:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::S32:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
this->GetTensor());
break;
default:
{
throw armnn::UnimplementedException();
}
}
}
arm_compute::Tensor m_Tensor;
std::shared_ptr<arm_compute::MemoryGroup> m_MemoryGroup;
MemorySourceFlags m_ImportFlags;
bool m_Imported;
bool m_IsImportEnabled;
const uintptr_t m_TypeAlignment;
std::vector<std::shared_ptr<NeonTensorHandleDecorator>> m_Decorated;
};
class NeonSubTensorHandle : public IAclTensorHandle
{
public:
NeonSubTensorHandle(IAclTensorHandle* parent,
const arm_compute::TensorShape& shape,
const arm_compute::Coordinates& coords)
: m_Tensor(&parent->GetTensor(), shape, coords, true)
{
parentHandle = parent;
}
arm_compute::ITensor& GetTensor() override { return m_Tensor; }
arm_compute::ITensor const& GetTensor() const override { return m_Tensor; }
virtual void Allocate() override {}
virtual void Manage() override {}
virtual ITensorHandle* GetParent() const override { return parentHandle; }
virtual arm_compute::DataType GetDataType() const override
{
return m_Tensor.info()->data_type();
}
virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>&) override {}
virtual const void* Map(bool /* blocking = true */) const override
{
return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
}
virtual void Unmap() const override {}
TensorShape GetStrides() const override
{
return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
}
TensorShape GetShape() const override
{
return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
}
virtual std::shared_ptr<ITensorHandle> DecorateTensorHandle(const TensorInfo&) override
{
return nullptr;
};
private:
// Only used for testing
void CopyOutTo(void* memory) const override
{
switch (this->GetDataType())
{
case arm_compute::DataType::F32:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<float*>(memory));
break;
case arm_compute::DataType::U8:
case arm_compute::DataType::QASYMM8:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<uint8_t*>(memory));
break;
case arm_compute::DataType::QSYMM8:
case arm_compute::DataType::QASYMM8_SIGNED:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int8_t*>(memory));
break;
case arm_compute::DataType::S16:
case arm_compute::DataType::QSYMM16:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int16_t*>(memory));
break;
case arm_compute::DataType::S32:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int32_t*>(memory));
break;
default:
{
throw armnn::UnimplementedException();
}
}
}
// Only used for testing
void CopyInFrom(const void* memory) override
{
switch (this->GetDataType())
{
case arm_compute::DataType::F32:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::U8:
case arm_compute::DataType::QASYMM8:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::QSYMM8:
case arm_compute::DataType::QASYMM8_SIGNED:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::S16:
case arm_compute::DataType::QSYMM16:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::S32:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
this->GetTensor());
break;
default:
{
throw armnn::UnimplementedException();
}
}
}
arm_compute::SubTensor m_Tensor;
ITensorHandle* parentHandle = nullptr;
};
/// NeonTensorDecorator wraps an existing Neon tensor allowing us to override the TensorInfo for it
class NeonTensorDecorator : public arm_compute::ITensor
{
public:
NeonTensorDecorator();
NeonTensorDecorator(arm_compute::ITensor* original, const TensorInfo& info);
~NeonTensorDecorator() = default;
NeonTensorDecorator(const NeonTensorDecorator&) = delete;
NeonTensorDecorator& operator=(const NeonTensorDecorator&) = delete;
NeonTensorDecorator(NeonTensorDecorator&&) = default;
NeonTensorDecorator& operator=(NeonTensorDecorator&&) = default;
// Inherited methods overridden:
arm_compute::ITensorInfo* info() const override;
arm_compute::ITensorInfo* info() override;
uint8_t* buffer() const override;
private:
arm_compute::ITensor* m_Original;
mutable arm_compute::TensorInfo m_TensorInfo;
};
class NeonTensorHandleDecorator : public IAclTensorHandle
{
public:
NeonTensorHandleDecorator(IAclTensorHandle* parent, const TensorInfo& info)
: m_Tensor(&parent->GetTensor(), info)
{
parentHandle = parent;
}
arm_compute::ITensor& GetTensor() override { return m_Tensor; }
arm_compute::ITensor const& GetTensor() const override { return m_Tensor; }
virtual void Allocate() override {}
virtual void Manage() override {}
virtual ITensorHandle* GetParent() const override { return nullptr; }
virtual arm_compute::DataType GetDataType() const override
{
return m_Tensor.info()->data_type();
}
virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>&) override {}
virtual const void* Map(bool /* blocking = true */) const override
{
return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
}
virtual void Unmap() const override {}
TensorShape GetStrides() const override
{
return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
}
TensorShape GetShape() const override
{
return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
}
virtual std::shared_ptr<ITensorHandle> DecorateTensorHandle(const TensorInfo&) override
{
return nullptr;
};
private:
// Only used for testing
void CopyOutTo(void* memory) const override
{
switch (this->GetDataType())
{
case arm_compute::DataType::F32:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<float*>(memory));
break;
case arm_compute::DataType::U8:
case arm_compute::DataType::QASYMM8:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<uint8_t*>(memory));
break;
case arm_compute::DataType::QSYMM8:
case arm_compute::DataType::QASYMM8_SIGNED:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int8_t*>(memory));
break;
case arm_compute::DataType::S16:
case arm_compute::DataType::QSYMM16:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int16_t*>(memory));
break;
case arm_compute::DataType::S32:
armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
static_cast<int32_t*>(memory));
break;
default:
{
throw armnn::UnimplementedException();
}
}
}
// Only used for testing
void CopyInFrom(const void* memory) override
{
switch (this->GetDataType())
{
case arm_compute::DataType::F32:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::U8:
case arm_compute::DataType::QASYMM8:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::QSYMM8:
case arm_compute::DataType::QASYMM8_SIGNED:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::S16:
case arm_compute::DataType::QSYMM16:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
this->GetTensor());
break;
case arm_compute::DataType::S32:
armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
this->GetTensor());
break;
default:
{
throw armnn::UnimplementedException();
}
}
}
NeonTensorDecorator m_Tensor;
ITensorHandle* parentHandle = nullptr;
};
} // namespace armnn