blob: 4d1a0e1cfbce0875faf3d935f1ddbfe732467086 [file] [log] [blame]
Jan Eilersc1c872f2021-07-22 13:17:04 +01001//
2// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5
6#include <armnn/backends/ICustomAllocator.hpp>
7#include <armnn/Descriptors.hpp>
8#include <armnn/Exceptions.hpp>
9#include <armnn/INetwork.hpp>
10#include <armnn/IRuntime.hpp>
11#include <armnn/Utils.hpp>
12#include <armnn/BackendRegistry.hpp>
13#include <cl/ClBackend.hpp>
14
15#include <doctest/doctest.h>
16
17// Contains the OpenCl interfaces for mapping memory in the Gpu Page Tables
18// Requires the OpenCl backend to be included (GpuAcc)
19#include <arm_compute/core/CL/CLKernelLibrary.h>
20#include <CL/cl_ext.h>
21#include <arm_compute/runtime/CL/CLScheduler.h>
22
23
24/** Sample implementation of ICustomAllocator for use with the ClBackend.
25 * Note: any memory allocated must be host accessible with write access to allow for weights and biases
26 * to be passed in. Read access is not required.. */
27class SampleClBackendCustomAllocator : public armnn::ICustomAllocator
28{
29public:
30 SampleClBackendCustomAllocator() = default;
31
32 void* allocate(size_t size, size_t alignment)
33 {
34 // If alignment is 0 just use the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE for alignment
35 if (alignment == 0)
36 {
37 alignment = arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
38 }
39 size_t space = size + alignment + alignment;
40 auto allocatedMemPtr = std::malloc(space * sizeof(size_t));
41
42 if (std::align(alignment, size, allocatedMemPtr, space) == nullptr)
43 {
44 throw armnn::Exception("SampleClBackendCustomAllocator::Alignment failed");
45 }
46 return allocatedMemPtr;
47 }
48
49 /** Interface to be implemented by the child class to free the allocated tensor */
50 void free(void* ptr)
51 {
52 std::free(ptr);
53 }
54
55 armnn::MemorySource GetMemorySourceType()
56 {
57 return armnn::MemorySource::Malloc;
58 }
59};
60
61TEST_SUITE("ClCustomAllocatorTests")
62{
63
64// This is a copy of the SimpleSample app modified to use a custom
65// allocator for the clbackend. It creates a FullyConnected network with a single layer
66// taking a single number as an input
67TEST_CASE("ClCustomAllocatorTest")
68{
69 using namespace armnn;
70
71 float number = 3;
72
73 // Construct ArmNN network
74 armnn::NetworkId networkIdentifier;
75 INetworkPtr myNetwork = INetwork::Create();
76
77 armnn::FullyConnectedDescriptor fullyConnectedDesc;
78 float weightsData[] = {1.0f}; // Identity
79 TensorInfo weightsInfo(TensorShape({1, 1}), DataType::Float32);
80 weightsInfo.SetConstant(true);
81 armnn::ConstTensor weights(weightsInfo, weightsData);
82
83 ARMNN_NO_DEPRECATE_WARN_BEGIN
84 IConnectableLayer* fullyConnected = myNetwork->AddFullyConnectedLayer(fullyConnectedDesc,
85 weights,
86 EmptyOptional(),
87 "fully connected");
88 ARMNN_NO_DEPRECATE_WARN_END
89 IConnectableLayer* InputLayer = myNetwork->AddInputLayer(0);
90 IConnectableLayer* OutputLayer = myNetwork->AddOutputLayer(0);
91 InputLayer->GetOutputSlot(0).Connect(fullyConnected->GetInputSlot(0));
92 fullyConnected->GetOutputSlot(0).Connect(OutputLayer->GetInputSlot(0));
93
94
95 // Create ArmNN runtime
96 IRuntime::CreationOptions options; // default options
97 auto customAllocator = std::make_shared<SampleClBackendCustomAllocator>();
98 options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}};
99 IRuntimePtr run = IRuntime::Create(options);
100
101 //Set the tensors in the network.
102 TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32);
103 InputLayer->GetOutputSlot(0).SetTensorInfo(inputTensorInfo);
104
105 TensorInfo outputTensorInfo(TensorShape({1, 1}), DataType::Float32);
106 fullyConnected->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
107
108 // Optimise ArmNN network
109 OptimizerOptions optOptions;
110 optOptions.m_ImportEnabled = true;
111 armnn::IOptimizedNetworkPtr optNet = Optimize(*myNetwork, {"GpuAcc"}, run->GetDeviceSpec(), optOptions);
112 CHECK(optNet);
113
114 // Load graph into runtime
115 std::string ignoredErrorMessage;
116 INetworkProperties networkProperties(false, MemorySource::Malloc, MemorySource::Malloc);
117 run->LoadNetwork(networkIdentifier, std::move(optNet), ignoredErrorMessage, networkProperties);
118
119 // Creates structures for input & output
120 unsigned int numElements = inputTensorInfo.GetNumElements();
121 size_t totalBytes = numElements * sizeof(float);
122
123 const size_t alignment =
124 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
125
126 void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
127
128 // Input with negative values
129 auto* inputPtr = reinterpret_cast<float*>(alignedInputPtr);
130 std::fill_n(inputPtr, numElements, number);
131
132 void* alignedOutputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, alignment);
133 auto* outputPtr = reinterpret_cast<float*>(alignedOutputPtr);
134 std::fill_n(outputPtr, numElements, -10.0f);
135
136 armnn::InputTensors inputTensors
137 {
138 {0, armnn::ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), alignedInputPtr)},
139 };
140 armnn::OutputTensors outputTensors
141 {
142 {0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), alignedOutputPtr)}
143 };
144
145 // Execute network
146 run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
147 run->UnloadNetwork(networkIdentifier);
148
149
150 // Tell the CLBackend to sync memory so we can read the output.
151 arm_compute::CLScheduler::get().sync();
152 auto* outputResult = reinterpret_cast<float*>(alignedOutputPtr);
153
154 run->UnloadNetwork(networkIdentifier);
155 CHECK(outputResult[0] == number);
156 auto& backendRegistry = armnn::BackendRegistryInstance();
157 backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic());
158}
159
160} // test suite ClCustomAllocatorTests