Blame - samples/CustomMemoryAllocatorSample.cpp - ml/armnn

blob: da249e0f4dcf2156da65da36655c9c8caa0badb9 [file] [log] [blame]

Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	1	//
				2	// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
				3	// SPDX-License-Identifier: MIT
				4	//
				5
				6	#include <armnn/ArmNN.hpp>
				7	#include <armnn/backends/ICustomAllocator.hpp>
				8
				9	#include <arm_compute/core/CL/CLKernelLibrary.h>
				10	#include <arm_compute/runtime/CL/CLScheduler.h>
				11
				12	#include <iostream>
				13
				14	/** Sample implementation of ICustomAllocator for use with the ClBackend.
				15	* Note: any memory allocated must be host addressable with write access
				16	* in order for ArmNN to be able to properly use it. */
				17	class SampleClBackendCustomAllocator : public armnn::ICustomAllocator
				18	{
				19	public:
				20	SampleClBackendCustomAllocator() = default;
				21
Francis Murtagh	e8d7ccb	2021-10-14 17:30:24 +0100	[diff] [blame]	22	void* allocate(size_t size, size_t alignment) override
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	23	{
				24	// If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment
				25	if (alignment == 0)
				26	{
				27	alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
				28	}
				29	size_t space = size + alignment + alignment;
				30	auto allocatedMemPtr = std::malloc(space * sizeof(size_t));
				31
				32	if (std::align(alignment, size, allocatedMemPtr, space) == nullptr)
				33	{
				34	throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed");
				35	}
				36	return allocatedMemPtr;
				37	}
David Monahan	6642b8a	2021-11-04 16:31:46 +0000	[diff] [blame]	38
				39	void free(void* ptr) override
				40	{
				41	std::free(ptr);
				42	}
				43
				44	armnn::MemorySource GetMemorySourceType() override
				45	{
				46	return armnn::MemorySource::Malloc;
				47	}
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	48	};
				49
				50
				51	// A simple example application to show the usage of a custom memory allocator. In this sample, the users single
				52	// input number is multiplied by 1.0f using a fully connected layer with a single neuron to produce an output
				53	// number that is the same as the input. All memory required to execute this mini network is allocated with
				54	// the provided custom allocator.
				55	//
				56	// Using a Custom Allocator is required for use with Protected Mode and Protected Memory.
				57	// This example is provided using only unprotected malloc as Protected Memory is platform
				58	// and implementation specific.
				59	//
				60	// Note: This example is similar to the SimpleSample application that can also be found in armnn/samples.
				61	// The differences are in the use of a custom allocator, the backend is GpuAcc, and the inputs/outputs
				62	// are being imported instead of copied. (Import must be enabled when using a Custom Allocator)
				63	// You might find this useful for comparison.
				64	int main()
				65	{
				66	using namespace armnn;
				67
				68	float number;
				69	std::cout << "Please enter a number: " << std::endl;
				70	std::cin >> number;
				71
				72	// Turn on logging to standard output
				73	// This is useful in this sample so that users can learn more about what is going on
Francis Murtagh	bb6c649	2022-02-09 15:13:38 +0000	[diff] [blame]	74	ConfigureLogging(true, false, LogSeverity::Info);
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	75
				76	// Construct ArmNN network
Francis Murtagh	bb6c649	2022-02-09 15:13:38 +0000	[diff] [blame]	77	NetworkId networkIdentifier;
				78	INetworkPtr network = INetwork::Create();
				79	FullyConnectedDescriptor fullyConnectedDesc;
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	80	float weightsData[] = {1.0f}; // Identity
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	81	TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32, 0.0f, 0, true);
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	82	weightsInfo.SetConstant(true);
Francis Murtagh	bb6c649	2022-02-09 15:13:38 +0000	[diff] [blame]	83	ConstTensor weights(weightsInfo, weightsData);
				84
				85	IConnectableLayer* inputLayer = network->AddInputLayer(0);
				86	IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
				87	IConnectableLayer* fullyConnectedLayer =
				88	network->AddFullyConnectedLayer(fullyConnectedDesc, "fully connected");
				89	IConnectableLayer* outputLayer = network->AddOutputLayer(0);
				90
				91	inputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
				92	weightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
				93	fullyConnectedLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
				94	weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	95
				96	// Create ArmNN runtime:
				97	//
				98	// This is the interesting bit when executing a model with a custom allocator.
				99	// You can have different allocators for different backends. To support this
				100	// the runtime creation option has a map that takes a BackendId and the corresponding
				101	// allocator that should be used for that backend.
				102	// Only GpuAcc supports a Custom Allocator for now
				103	//
				104	// Note: This is not covered in this example but if you want to run a model on
				105	// protected memory a custom allocator needs to be provided that supports
				106	// protected memory allocations and the MemorySource of that allocator is
				107	// set to MemorySource::DmaBufProtected
				108	IRuntime::CreationOptions options;
				109	auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
				110	options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
				111	IRuntimePtr runtime = IRuntime::Create(options);
				112
				113	//Set the tensors in the network.
				114	TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
Francis Murtagh	bb6c649	2022-02-09 15:13:38 +0000	[diff] [blame]	115	inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	116
				117	unsigned int numElements = inputTensorInfo.GetNumElements();
				118	size_t totalBytes = numElements * sizeof(float);
				119
				120	TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
Francis Murtagh	bb6c649	2022-02-09 15:13:38 +0000	[diff] [blame]	121	fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	122
				123	// Optimise ArmNN network
				124	OptimizerOptions optOptions;
				125	optOptions.m_ImportEnabled = true;
Francis Murtagh	bb6c649	2022-02-09 15:13:38 +0000	[diff] [blame]	126	IOptimizedNetworkPtr optNet =
				127	Optimize(*network, {"GpuAcc"}, runtime->GetDeviceSpec(), optOptions);
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	128	if (!optNet)
				129	{
				130	// This shouldn't happen for this simple sample, with GpuAcc backend.
				131	// But in general usage Optimize could fail if the backend at runtime cannot
				132	// support the model that has been provided.
				133	std::cerr << "Error: Failed to optimise the input network." << std::endl;
				134	return 1;
				135	}
				136
				137	// Load graph into runtime
				138	std::string ignoredErrorMessage;
				139	INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
				140	runtime->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
				141
				142	// Creates structures for input & output
				143	const size_t alignment =
				144	arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
				145
				146	void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
				147
				148	// Input with negative values
				149	auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
				150	std::fill_n(inputPtr, numElements, number);
				151
				152	void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
				153	auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
				154	std::fill_n(outputPtr, numElements, -10.0f);
				155
Cathal Corbett	5b8093c	2021-10-22 11:12:07 +0100	[diff] [blame]	156	inputTensorInfo = runtime->GetInputTensorInfo(networkIdentifier, 0);
				157	inputTensorInfo.SetConstant(true);
Francis Murtagh	bb6c649	2022-02-09 15:13:38 +0000	[diff] [blame]	158	InputTensors inputTensors
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	159	{
Francis Murtagh	bb6c649	2022-02-09 15:13:38 +0000	[diff] [blame]	160	{0, ConstTensor(inputTensorInfo, alignedInputPtr)},
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	161	};
Francis Murtagh	bb6c649	2022-02-09 15:13:38 +0000	[diff] [blame]	162	OutputTensors outputTensors
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	163	{
Francis Murtagh	bb6c649	2022-02-09 15:13:38 +0000	[diff] [blame]	164	{0, Tensor(runtime->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
Jan Eilers	c1c872f	2021-07-22 13:17:04 +0100	[diff] [blame]	165	};
				166
				167	// Execute network
				168	runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
				169
				170	// Tell the CLBackend to sync memory so we can read the output.
				171	arm_compute::CLScheduler::get().sync();
				172	auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
				173	std::cout << "Your number was " << outputResult[0] << std::endl;
				174	runtime->UnloadNetwork(networkIdentifier);
				175	return 0;
				176
				177	}