Blame - src/backends/cl/test/ClCustomAllocatorTests.cpp - ml/armnn

blob: 4d1a0e1cfbce0875faf3d935f1ddbfe732467086 [file] [log] [blame]

Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame^]	1	//
				2	// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include <armnn/backends/ICustomAllocator.hpp>
				7	#include <armnn/Descriptors.hpp>
				8	#include <armnn/Exceptions.hpp>
				9	#include <armnn/INetwork.hpp>
				10	#include <armnn/IRuntime.hpp>
				11	#include <armnn/Utils.hpp>
				12	#include <armnn/BackendRegistry.hpp>
				13	#include <cl/ClBackend.hpp>
				14
				15	#include <doctest/doctest.h>
				16
				17	// Contains the OpenCl interfaces for mapping memory in the Gpu Page Tables
				18	// Requires the OpenCl backend to be included (GpuAcc)
				19	#include <arm_compute/core/CL/CLKernelLibrary.h>
				20	#include <CL/cl_ext.h>
				21	#include <arm_compute/runtime/CL/CLScheduler.h>
				22
				23
				24	/** Sample implementation of ICustomAllocator for use with the ClBackend.
				25	* Note: any memory allocated must be host accessible with write access to allow for weights and biases
				26	* to be passed in. Read access is not required.. */
				27	class SampleClBackendCustomAllocator : public armnn::ICustomAllocator
				28	{
				29	public:
				30	SampleClBackendCustomAllocator() = default;
				31
				32	void* allocate(size_t size, size_t alignment)
				33	{
				34	// If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment
				35	if (alignment == 0)
				36	{
				37	alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
				38	}
				39	size_t space = size + alignment + alignment;
				40	auto allocatedMemPtr = std::malloc(space * sizeof(size_t));
				41
				42	if (std::align(alignment, size, allocatedMemPtr, space) == nullptr)
				43	{
				44	throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed");
				45	}
				46	return allocatedMemPtr;
				47	}
				48
				49	/** Interface to be implemented by the child class to free the allocated tensor */
				50	void free(void* ptr)
				51	{
				52	std::free(ptr);
				53	}
				54
				55	armnn::MemorySource GetMemorySourceType()
				56	{
				57	return armnn::MemorySource::Malloc;
				58	}
				59	};
				60
				61	TEST_SUITE("ClCustomAllocatorTests")
				62	{
				63
				64	// This is a copy of the SimpleSample app modified to use a custom
				65	// allocator for the clbackend. It creates a FullyConnected network with a single layer
				66	// taking a single number as an input
				67	TEST_CASE("ClCustomAllocatorTest")
				68	{
				69	using namespace armnn;
				70
				71	float number = 3;
				72
				73	// Construct ArmNN network
				74	armnn::NetworkId networkIdentifier;
				75	INetworkPtr myNetwork = INetwork::Create();
				76
				77	armnn::FullyConnectedDescriptor fullyConnectedDesc;
				78	float weightsData[] = {1.0f}; // Identity
				79	TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32);
				80	weightsInfo.SetConstant(true);
				81	armnn::ConstTensor weights(weightsInfo, weightsData);
				82
				83	ARMNN_NO_DEPRECATE_WARN_BEGIN
				84	IConnectableLayer* fullyConnected = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc,
				85	weights,
				86	EmptyOptional(),
				87	"fully connected");
				88	ARMNN_NO_DEPRECATE_WARN_END
				89	IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0);
				90	IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0);
				91	InputLayer->GetOutputSlot(0).Connect(fullyConnected->GetInputSlot(0));
				92	fullyConnected->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
				93
				94
				95	// Create ArmNN runtime
				96	IRuntime::CreationOptions options; // default options
				97	auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
				98	options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
				99	IRuntimePtr run = IRuntime::Create(options);
				100
				101	//Set the tensors in the network.
				102	TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
				103	InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
				104
				105	TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
				106	fullyConnected->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
				107
				108	// Optimise ArmNN network
				109	OptimizerOptions optOptions;
				110	optOptions.m_ImportEnabled = true;
				111	armnn::IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {"GpuAcc"}, run->GetDeviceSpec(), optOptions);
				112	CHECK(optNet);
				113
				114	// Load graph into runtime
				115	std::string ignoredErrorMessage;
				116	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
				117	run->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
				118
				119	// Creates structures for input & output
				120	unsigned int numElements = inputTensorInfo.GetNumElements();
				121	size_t totalBytes = numElements * sizeof(float);
				122
				123	const size_t alignment =
				124	arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
				125
				126	void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
				127
				128	// Input with negative values
				129	auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
				130	std::fill_n(inputPtr, numElements, number);
				131
				132	void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
				133	auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
				134	std::fill_n(outputPtr, numElements, -10.0f);
				135
				136	armnn::InputTensors inputTensors
				137	{
				138	{0, armnn::ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), alignedInputPtr)},
				139	};
				140	armnn::OutputTensors outputTensors
				141	{
				142	{0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
				143	};
				144
				145	// Execute network
				146	run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
				147	run->UnloadNetwork(networkIdentifier);
				148
				149
				150	// Tell the CLBackend to sync memory so we can read the output.
				151	arm_compute::CLScheduler::get().sync();
				152	auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
				153
				154	run->UnloadNetwork(networkIdentifier);
				155	CHECK(outputResult[0] == number);
				156	auto& backendRegistry = armnn::BackendRegistryInstance();
				157	backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
				158	}
				159
				160	} // test suite ClCustomAllocatorTests