blob: 26960065c763478e24d50829d8f3903044dbecc7 [file] [log] [blame]
David Monahan8a570462023-11-22 13:24:25 +00001//
2// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
3// SPDX-License-Identifier: MIT
4//
5#pragma once
6
7#include <armnn/backends/IBackendInternal.hpp>
8#include <aclCommon/BaseMemoryManager.hpp>
9
10#include <arm_compute/runtime/CL/CLBufferAllocator.h>
11#include <arm_compute/runtime/CL/CLMemoryRegion.h>
12#include <arm_compute/core/CL/CLKernelLibrary.h>
13#include <CL/cl_ext.h>
14
15// System includes for mapping and unmapping memory
16#include <sys/mman.h>
17
18namespace armnn
19{
20
21// add new capabilities here..
22const BackendCapabilities gpuFsaCapabilities("GpuFsa",
23 {
24 {"NonConstWeights", false},
25 {"AsyncExecution", false},
26 {"ProtectedContentAllocation", false},
27 {"ConstantTensorsAsInputs", false},
28 {"PreImportIOTensors", false},
29 {"ExternallyManagedMemory", false},
30 {"MultiAxisPacking", false},
31 {"SingleAxisPacking", false}
32 });
33
34class GpuFsaBackend : public IBackendInternal
35{
36public:
37 GpuFsaBackend() : m_CustomAllocator(nullptr) {};
38 GpuFsaBackend(std::shared_ptr<ICustomAllocator> allocator)
39 {
40 UseCustomMemoryAllocator(allocator, armnn::EmptyOptional());
41 }
42 ~GpuFsaBackend() = default;
43
44 static const BackendId& GetIdStatic();
45 const BackendId& GetId() const override { return GetIdStatic(); }
46
47 IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override;
48
49 IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
50 const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override;
51
52 IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(TensorHandleFactoryRegistry& registry) const override;
53
54 IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
55 const ModelOptions& modelOptions,
56 MemorySourceFlags inputFlags,
57 MemorySourceFlags outputFlags) const override;
58
59 std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;
60
61 void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override;
62
63 void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry,
64 MemorySourceFlags inputFlags,
65 MemorySourceFlags outputFlags) override;
66
67 IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
68 IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(
69 const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override;
70
71 IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override;
72
73 OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph,
74 const ModelOptions& modelOptions) const override;
75
76 std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override;
77
78 BackendCapabilities GetCapabilities() const override
79 {
80 return gpuFsaCapabilities;
81 };
82
83 virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,
84 armnn::Optional<std::string&>) override
85 {
86 ARMNN_LOG(info) << "Using Custom Allocator for GpuFsaBackend";
87
88 // Set flag to signal the backend to use a custom memory allocator
89 m_CustomAllocator = std::make_shared<GpuFsaBackendCustomAllocatorWrapper>(std::move(allocator));
90 m_UsingCustomAllocator = true;
91 return m_UsingCustomAllocator;
92 }
93
94 // Cl requires a arm_compute::IAllocator we wrap the Arm NN ICustomAllocator to achieve this
95 class GpuFsaBackendCustomAllocatorWrapper : public arm_compute::IAllocator
96 {
97 public:
98 GpuFsaBackendCustomAllocatorWrapper(std::shared_ptr<ICustomAllocator> alloc) : m_CustomAllocator(alloc)
99 {}
100 // Inherited methods overridden:
101 void* allocate(size_t size, size_t alignment) override
102 {
103 auto alloc = m_CustomAllocator->allocate(size, alignment);
104 return MapAllocatedMemory(alloc, size, m_CustomAllocator->GetMemorySourceType());
105 }
106 void free(void* ptr) override
107 {
108 auto hostMemPtr = m_AllocatedBufferMappings[ptr];
109 clReleaseMemObject(static_cast<cl_mem>(ptr));
110 m_CustomAllocator->free(hostMemPtr);
111 }
112 std::unique_ptr<arm_compute::IMemoryRegion> make_region(size_t size, size_t alignment) override
113 {
114 auto hostMemPtr = m_CustomAllocator->allocate(size, alignment);
115 cl_mem buffer = MapAllocatedMemory(hostMemPtr, size, m_CustomAllocator->GetMemorySourceType());
116
117 return std::make_unique<ClBackendCustomAllocatorMemoryRegion>(cl::Buffer(buffer),
118 hostMemPtr,
119 m_CustomAllocator->GetMemorySourceType());
120 }
121 private:
122 cl_mem MapAllocatedMemory(void* memory, size_t size, MemorySource source)
123 {
124 // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
125 auto cachelineAlignment =
126 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
127 auto roundedSize = cachelineAlignment + size - (size % cachelineAlignment);
128
129 if (source == MemorySource::Malloc)
130 {
131 const cl_import_properties_arm importProperties[] =
132 {
133 CL_IMPORT_TYPE_ARM,
134 CL_IMPORT_TYPE_HOST_ARM,
135 0
136 };
137 cl_int error = CL_SUCCESS;
138 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
139 CL_MEM_READ_WRITE,
140 importProperties,
141 memory,
142 roundedSize,
143 &error);
144 if (error == CL_SUCCESS)
145 {
146 m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
147 return buffer;
148 }
149 throw armnn::Exception(
150 "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + std::to_string(error));
151 }
152 else if (source == MemorySource::DmaBuf)
153 {
154 const cl_import_properties_arm importProperties[] =
155 {
156 CL_IMPORT_TYPE_ARM,
157 CL_IMPORT_TYPE_DMA_BUF_ARM,
158 CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
159 CL_TRUE,
160 0
161 };
162 cl_int error = CL_SUCCESS;
163 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
164 CL_MEM_READ_WRITE,
165 importProperties,
166 memory,
167 roundedSize,
168 &error);
169 if (error == CL_SUCCESS)
170 {
171 m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
172 return buffer;
173 }
174 throw armnn::Exception(
175 "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
176 + std::to_string(error));
177 }
178 else if (source == MemorySource::DmaBufProtected)
179 {
180 const cl_import_properties_arm importProperties[] =
181 {
182 CL_IMPORT_TYPE_ARM,
183 CL_IMPORT_TYPE_DMA_BUF_ARM,
184 CL_IMPORT_TYPE_PROTECTED_ARM,
185 CL_TRUE,
186 0
187 };
188 cl_int error = CL_SUCCESS;
189 cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
190 CL_MEM_READ_WRITE,
191 importProperties,
192 memory,
193 roundedSize,
194 &error);
195 if (error == CL_SUCCESS)
196 {
197 m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
198 return buffer;
199 }
200 throw armnn::Exception(
201 "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
202 + std::to_string(error));
203 }
204 throw armnn::Exception(
205 "Attempting to allocate memory with unsupported MemorySource type in CustomAllocator");
206 }
207 std::shared_ptr<ICustomAllocator> m_CustomAllocator;
208 std::map<void*, void*> m_AllocatedBufferMappings;
209 };
210
211 class ClBackendCustomAllocatorMemoryRegion : public arm_compute::ICLMemoryRegion
212 {
213 public:
214 // We need to have a new version of ICLMemoryRegion which holds a hostMemPtr to allow for cpu copy access
215 ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void* hostMemPtr, armnn::MemorySource source)
216 : ICLMemoryRegion(buffer.getInfo<CL_MEM_SIZE>())
217 {
218 _mem = buffer;
219 m_HostMemPtr = hostMemPtr;
220 m_MemorySource = source;
221 }
222
223 // Inherited methods overridden :
224 void* ptr() override
225 {
226 return nullptr;
227 }
228
229 void* map(cl::CommandQueue &q, bool blocking) override
230 {
231 armnn::IgnoreUnused(q, blocking);
232 if (m_HostMemPtr == nullptr)
233 {
234 throw armnn::Exception("ClBackend: Attempting to map memory with an invalid host ptr");
235 }
236 if (_mapping != nullptr)
237 {
238 throw armnn::Exception("ClBackend: Attempting to map memory which has not yet been unmapped");
239 }
240 switch (m_MemorySource)
241 {
242 case armnn::MemorySource::Malloc:
243 _mapping = m_HostMemPtr;
244 return _mapping;
245 break;
246 case armnn::MemorySource::DmaBuf:
247 case armnn::MemorySource::DmaBufProtected:
248 // If the source is a Dmabuf then the memory ptr should be pointing to an integer value for the fd
249 _mapping = mmap(NULL, _size, PROT_WRITE, MAP_SHARED, *(reinterpret_cast<int*>(m_HostMemPtr)), 0);
250 return _mapping;
251 break;
252 default:
253 throw armnn::Exception("ClBackend: Attempting to map imported memory without a valid source");
254 break;
255 }
256 }
257
258 void unmap(cl::CommandQueue &q) override
259 {
260 armnn::IgnoreUnused(q);
261 switch (m_MemorySource)
262 {
263 case armnn::MemorySource::Malloc:
264 _mapping = nullptr;
265 break;
266 case armnn::MemorySource::DmaBuf:
267 case armnn::MemorySource::DmaBufProtected:
268 munmap(_mapping, _size);
269 _mapping = nullptr;
270 break;
271 default:
272 throw armnn::Exception("ClBackend: Attempting to unmap imported memory without a valid source");
273 break;
274 }
275 }
276 private:
277 void* m_HostMemPtr = nullptr;
278 armnn::MemorySource m_MemorySource;
279 };
280
281 std::shared_ptr<GpuFsaBackendCustomAllocatorWrapper> m_CustomAllocator;
282 bool m_UsingCustomAllocator = false;
283};
284
285} // namespace armnn