blob: 251c98fcad06d90544beb9c15d0681a7f920ee79 [file] [log] [blame]
//
// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include <armnn/backends/ICustomAllocator.hpp>
#include <armnn/Descriptors.hpp>
#include <armnn/Exceptions.hpp>
#include <armnn/INetwork.hpp>
#include <armnn/IRuntime.hpp>
#include <armnn/Utils.hpp>
#include <armnn/BackendRegistry.hpp>
#include <cl/ClBackend.hpp>
#if defined(ARMCOMPUTENEON_ENABLED)
#include <neon/NeonBackend.hpp>
#endif
#include <doctest/doctest.h>
#include <armnn/utility/IgnoreUnused.hpp>
// Contains the OpenCl interfaces for mapping memory in the Gpu Page Tables
// Requires the OpenCl backend to be included (GpuAcc)
#include <arm_compute/core/CL/CLKernelLibrary.h>
#include <CL/cl_ext.h>
#include <arm_compute/runtime/CL/CLScheduler.h>
/** Sample implementation of ICustomAllocator for use with the ClBackend.
* Note: any memory allocated must be host accessible with write access to allow for weights and biases
* to be passed in. Read access is not required.. */
class SampleClBackendCustomAllocator : public armnn::ICustomAllocator
{
public:
SampleClBackendCustomAllocator() = default;
void* allocate(size_t size, size_t alignment) override
{
// If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment
if (alignment == 0)
{
alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
}
size_t space = size + alignment + alignment;
auto allocatedMemPtr = std::malloc(space * sizeof(size_t));
if (std::align(alignment, size, allocatedMemPtr, space) == nullptr)
{
throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed");
}
return allocatedMemPtr;
}
/** Interface to be implemented by the child class to free the allocated tensor */
void free(void* ptr) override
{
std::free(ptr);
}
armnn::MemorySource GetMemorySourceType() override
{
return armnn::MemorySource::Malloc;
}
};
armnn::INetworkPtr CreateTestNetwork(armnn::TensorInfo& inputTensorInfo)
{
using namespace armnn;
armnn::FullyConnectedDescriptor fullyConnectedDesc;
float weightsData[] = {1.0f}; // Identity
TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32, 0.0f, 0, true);
weightsInfo.SetConstant(true);
armnn::ConstTensor weights(weightsInfo, weightsData);
armnn::INetworkPtr network = armnn::INetwork::Create();
armnn::IConnectableLayer* const inputLayer = network->AddInputLayer(0);
armnn::IConnectableLayer* const weightsLayer = network->AddConstantLayer(weights, "Weights");
armnn::IConnectableLayer* const fullyConnectedLayer =
network->AddFullyConnectedLayer(fullyConnectedDesc, "fully connected");
armnn::IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
inputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
weightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
fullyConnectedLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
//Set the tensors in the network.
inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
return network;
}
TEST_SUITE("ClCustomAllocatorTests")
{
// This is a copy of the SimpleSample app modified to use a custom
// allocator for the clbackend. It creates a FullyConnected network with a single layer
// taking a single number as an input
TEST_CASE("ClCustomAllocatorTest")
{
using namespace armnn;
float number = 3;
// Construct ArmNN network
armnn::NetworkId networkIdentifier;
TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
INetworkPtr myNetwork = CreateTestNetwork(inputTensorInfo);
// Create ArmNN runtime
IRuntime::CreationOptions options; // default options
auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
IRuntimePtr run = IRuntime::Create(options);
// Optimise ArmNN network
OptimizerOptions optOptions;
optOptions.m_ImportEnabled = true;
optOptions.m_ExportEnabled = true;
armnn::IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {"GpuAcc"}, run->GetDeviceSpec(), optOptions);
CHECK(optNet);
// Load graph into runtime
std::string ignoredErrorMessage;
INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
run->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
// Creates structures for input & output
unsigned int numElements = inputTensorInfo.GetNumElements();
size_t totalBytes = numElements * sizeof(float);
const size_t alignment =
arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
// Input with negative values
auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
std::fill_n(inputPtr, numElements, number);
void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
std::fill_n(outputPtr, numElements, -10.0f);
armnn::TensorInfo inputTensorInfo2 = run->GetInputTensorInfo(networkIdentifier, 0);
inputTensorInfo2.SetConstant(true);
armnn::InputTensors inputTensors
{
{0, armnn::ConstTensor(inputTensorInfo2, alignedInputPtr)},
};
armnn::OutputTensors outputTensors
{
{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
};
// Execute network
run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
run->UnloadNetwork(networkIdentifier);
// Tell the CLBackend to sync memory so we can read the output.
arm_compute::CLScheduler::get().sync();
auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
run->UnloadNetwork(networkIdentifier);
CHECK(outputResult[0] == number);
auto& backendRegistry = armnn::BackendRegistryInstance();
backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
}
// Only run this test if NEON is enabled
#if defined(ARMCOMPUTENEON_ENABLED)
TEST_CASE("ClCustomAllocatorCpuAccNegativeTest")
{
using namespace armnn;
// Create ArmNN runtime
IRuntime::CreationOptions options; // default options
auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
options.m_CustomAllocatorMap = {{"CpuAcc", std::move(customAllocator)}};
IRuntimePtr run = IRuntime::Create(options);
TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
INetworkPtr myNetwork = CreateTestNetwork(inputTensorInfo);
// Optimise ArmNN network
OptimizerOptions optOptions;
optOptions.m_ImportEnabled = true;
IOptimizedNetworkPtr optNet(nullptr, nullptr);
std::vector<std::string> errMessages;
CHECK_THROWS_AS_MESSAGE(Optimize(*myNetwork, {"CpuAcc"}, run->GetDeviceSpec(), optOptions, errMessages),
armnn::InvalidArgumentException,
"Expected an exception as GetAvailablePreferredBackends() should be empty in Optimize().");
auto& backendRegistry = armnn::BackendRegistryInstance();
backendRegistry.DeregisterAllocator(NeonBackend::GetIdStatic());
}
#endif
TEST_CASE("ClCustomAllocatorGpuAccNullptrTest")
{
using namespace armnn;
// Create ArmNN runtime
IRuntime::CreationOptions options; // default options
auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
options.m_CustomAllocatorMap = {{"GpuAcc", nullptr}};
CHECK_THROWS_AS_MESSAGE(IRuntimePtr run = IRuntime::Create(options),
armnn::Exception,
"Expected exception in RuntimeImpl::RuntimeImpl() as allocator was nullptr.");
}
} // test suite ClCustomAllocatorTests