blob: da249e0f4dcf2156da65da36655c9c8caa0badb9 [file] [log] [blame]
Jan Eilersc1c872f2021-07-22 13:17:04 +01001//
2// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include <armnn/ArmNN.hpp>
7#include <armnn/backends/ICustomAllocator.hpp>
8
9#include <arm_compute/core/CL/CLKernelLibrary.h>
10#include <arm_compute/runtime/CL/CLScheduler.h>
11
12#include <iostream>
13
14/** Sample implementation of ICustomAllocator for use with the ClBackend.
15 * Note: any memory allocated must be host addressable with write access
16 * in order for ArmNN to be able to properly use it. */
17class SampleClBackendCustomAllocator : public armnn::ICustomAllocator
18{
19public:
20 SampleClBackendCustomAllocator() = default;
21
Francis Murtaghe8d7ccb2021-10-14 17:30:24 +010022 void* allocate(size_t size, size_t alignment) override
Jan Eilersc1c872f2021-07-22 13:17:04 +010023 {
24 // If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment
25 if (alignment == 0)
26 {
27 alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
28 }
29 size_t space = size + alignment + alignment;
30 auto allocatedMemPtr = std::malloc(space * sizeof(size_t));
31
32 if (std::align(alignment, size, allocatedMemPtr, space) == nullptr)
33 {
34 throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed");
35 }
36 return allocatedMemPtr;
37 }
David Monahan6642b8a2021-11-04 16:31:46 +000038
39 void free(void* ptr) override
40 {
41 std::free(ptr);
42 }
43
44 armnn::MemorySource GetMemorySourceType() override
45 {
46 return armnn::MemorySource::Malloc;
47 }
Jan Eilersc1c872f2021-07-22 13:17:04 +010048};
49
50
51// A simple example application to show the usage of a custom memory allocator. In this sample, the users single
52// input number is multiplied by 1.0f using a fully connected layer with a single neuron to produce an output
53// number that is the same as the input. All memory required to execute this mini network is allocated with
54// the provided custom allocator.
55//
56// Using a Custom Allocator is required for use with Protected Mode and Protected Memory.
57// This example is provided using only unprotected malloc as Protected Memory is platform
58// and implementation specific.
59//
60// Note: This example is similar to the SimpleSample application that can also be found in armnn/samples.
61// The differences are in the use of a custom allocator, the backend is GpuAcc, and the inputs/outputs
62// are being imported instead of copied. (Import must be enabled when using a Custom Allocator)
63// You might find this useful for comparison.
64int main()
65{
66 using namespace armnn;
67
68 float number;
69 std::cout << "Please enter a number: " << std::endl;
70 std::cin >> number;
71
72 // Turn on logging to standard output
73 // This is useful in this sample so that users can learn more about what is going on
Francis Murtaghbb6c6492022-02-09 15:13:38 +000074 ConfigureLogging(true, false, LogSeverity::Info);
Jan Eilersc1c872f2021-07-22 13:17:04 +010075
76 // Construct ArmNN network
Francis Murtaghbb6c6492022-02-09 15:13:38 +000077 NetworkId networkIdentifier;
78 INetworkPtr network = INetwork::Create();
79 FullyConnectedDescriptor fullyConnectedDesc;
Jan Eilersc1c872f2021-07-22 13:17:04 +010080 float weightsData[] = {1.0f}; // Identity
Cathal Corbett5b8093c2021-10-22 11:12:07 +010081 TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32, 0.0f, 0, true);
Jan Eilersc1c872f2021-07-22 13:17:04 +010082 weightsInfo.SetConstant(true);
Francis Murtaghbb6c6492022-02-09 15:13:38 +000083 ConstTensor weights(weightsInfo, weightsData);
84
85 IConnectableLayer* inputLayer = network->AddInputLayer(0);
86 IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
87 IConnectableLayer* fullyConnectedLayer =
88 network->AddFullyConnectedLayer(fullyConnectedDesc, "fully connected");
89 IConnectableLayer* outputLayer = network->AddOutputLayer(0);
90
91 inputLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(0));
92 weightsLayer->GetOutputSlot(0).Connect(fullyConnectedLayer->GetInputSlot(1));
93 fullyConnectedLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
94 weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
Jan Eilersc1c872f2021-07-22 13:17:04 +010095
96 // Create ArmNN runtime:
97 //
98 // This is the interesting bit when executing a model with a custom allocator.
99 // You can have different allocators for different backends. To support this
100 // the runtime creation option has a map that takes a BackendId and the corresponding
101 // allocator that should be used for that backend.
102 // Only GpuAcc supports a Custom Allocator for now
103 //
104 // Note: This is not covered in this example but if you want to run a model on
105 // protected memory a custom allocator needs to be provided that supports
106 // protected memory allocations and the MemorySource of that allocator is
107 // set to MemorySource::DmaBufProtected
108 IRuntime::CreationOptions options;
109 auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
110 options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
111 IRuntimePtr runtime = IRuntime::Create(options);
112
113 //Set the tensors in the network.
114 TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
Francis Murtaghbb6c6492022-02-09 15:13:38 +0000115 inputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
Jan Eilersc1c872f2021-07-22 13:17:04 +0100116
117 unsigned int numElements = inputTensorInfo.GetNumElements();
118 size_t totalBytes = numElements * sizeof(float);
119
120 TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
Francis Murtaghbb6c6492022-02-09 15:13:38 +0000121 fullyConnectedLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
Jan Eilersc1c872f2021-07-22 13:17:04 +0100122
123 // Optimise ArmNN network
124 OptimizerOptions optOptions;
125 optOptions.m_ImportEnabled = true;
Francis Murtaghbb6c6492022-02-09 15:13:38 +0000126 IOptimizedNetworkPtr optNet =
127 Optimize(*network, {"GpuAcc"}, runtime->GetDeviceSpec(), optOptions);
Jan Eilersc1c872f2021-07-22 13:17:04 +0100128 if (!optNet)
129 {
130 // This shouldn't happen for this simple sample, with GpuAcc backend.
131 // But in general usage Optimize could fail if the backend at runtime cannot
132 // support the model that has been provided.
133 std::cerr << "Error: Failed to optimise the input network." << std::endl;
134 return 1;
135 }
136
137 // Load graph into runtime
138 std::string ignoredErrorMessage;
139 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
140 runtime->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
141
142 // Creates structures for input & output
143 const size_t alignment =
144 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
145
146 void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
147
148 // Input with negative values
149 auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
150 std::fill_n(inputPtr, numElements, number);
151
152 void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
153 auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
154 std::fill_n(outputPtr, numElements, -10.0f);
155
Cathal Corbett5b8093c2021-10-22 11:12:07 +0100156 inputTensorInfo = runtime->GetInputTensorInfo(networkIdentifier, 0);
157 inputTensorInfo.SetConstant(true);
Francis Murtaghbb6c6492022-02-09 15:13:38 +0000158 InputTensors inputTensors
Jan Eilersc1c872f2021-07-22 13:17:04 +0100159 {
Francis Murtaghbb6c6492022-02-09 15:13:38 +0000160 {0, ConstTensor(inputTensorInfo, alignedInputPtr)},
Jan Eilersc1c872f2021-07-22 13:17:04 +0100161 };
Francis Murtaghbb6c6492022-02-09 15:13:38 +0000162 OutputTensors outputTensors
Jan Eilersc1c872f2021-07-22 13:17:04 +0100163 {
Francis Murtaghbb6c6492022-02-09 15:13:38 +0000164 {0, Tensor(runtime->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
Jan Eilersc1c872f2021-07-22 13:17:04 +0100165 };
166
167 // Execute network
168 runtime->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
169
170 // Tell the CLBackend to sync memory so we can read the output.
171 arm_compute::CLScheduler::get().sync();
172 auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
173 std::cout << "Your number was " << outputResult[0] << std::endl;
174 runtime->UnloadNetwork(networkIdentifier);
175 return 0;
176
177}