blob: c5524b1ccb6633f80df53ec75549699135d2e0bd [file] [log] [blame]
Anthony Barbier6ff3b192017-09-04 18:44:23 +01001/*
Pablo Telloe86a09f2018-01-11 15:44:48 +00002 * Copyright (c) 2016-2018 ARM Limited.
Anthony Barbier6ff3b192017-09-04 18:44:23 +01003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CL/CLTensorAllocator.h"
25
26#include "arm_compute/core/Error.h"
27#include "arm_compute/core/TensorInfo.h"
Georgios Pinitasbaf174e2017-09-08 19:47:30 +010028#include "arm_compute/runtime/CL/CLMemoryGroup.h"
Anthony Barbier6ff3b192017-09-04 18:44:23 +010029#include "arm_compute/runtime/CL/CLScheduler.h"
30
31using namespace arm_compute;
32
Georgios Pinitasbaf174e2017-09-08 19:47:30 +010033CLTensorAllocator::CLTensorAllocator(CLTensor *owner)
Pablo Telloe86a09f2018-01-11 15:44:48 +000034 : _associated_memory_group(nullptr), _buffer(), _mapping(nullptr), _owner(owner), _svm_memory()
Anthony Barbier6ff3b192017-09-04 18:44:23 +010035{
36}
37
Georgios Pinitasbaf174e2017-09-08 19:47:30 +010038CLTensorAllocator::~CLTensorAllocator()
39{
40 _buffer = cl::Buffer();
41}
42
Anthony Barbier6ff3b192017-09-04 18:44:23 +010043uint8_t *CLTensorAllocator::data()
44{
45 return _mapping;
46}
47
48const cl::Buffer &CLTensorAllocator::cl_data() const
49{
50 return _buffer;
51}
52
Pablo Telloe86a09f2018-01-11 15:44:48 +000053void *SVMMemory::allocate(cl_context context, size_t size, cl_svm_mem_flags flags, cl_uint alignment)
54{
55 ARM_COMPUTE_ERROR_ON_NULLPTR(context);
56 ARM_COMPUTE_ERROR_ON(size == 0);
57 ARM_COMPUTE_ERROR_ON(_ptr != nullptr);
58 ARM_COMPUTE_ERROR_ON(size > CL_DEVICE_MAX_MEM_ALLOC_SIZE);
59 _ptr = clSVMAlloc(context, flags, size, alignment);
Georgios Pinitasceff0f92018-03-19 19:57:01 +000060 if(_ptr != nullptr)
Pablo Telloe86a09f2018-01-11 15:44:48 +000061 {
62 _size = size;
63 _fine_grain = static_cast<bool>(flags & CL_MEM_SVM_FINE_GRAIN_BUFFER);
64 }
65 return _ptr;
66}
67void *CLTensorAllocator::svm_ptr()
68{
69 return _svm_memory.ptr();
70}
71
Anthony Barbier6ff3b192017-09-04 18:44:23 +010072void CLTensorAllocator::allocate()
73{
Georgios Pinitasbaf174e2017-09-08 19:47:30 +010074 if(_associated_memory_group == nullptr)
75 {
Georgios Pinitasceff0f92018-03-19 19:57:01 +000076 ARM_COMPUTE_ERROR_ON(_buffer.get() != nullptr);
Pablo Telloe86a09f2018-01-11 15:44:48 +000077 if(_svm_memory.allocate(CLScheduler::get().context()(), CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, info().total_size(), 0) == nullptr)
78 {
79 // try at coarse grain svm memory
80 _svm_memory.allocate(CLScheduler::get().context()(), CL_MEM_READ_WRITE, info().total_size(), 0);
81 }
82 if(_svm_memory.ptr() != nullptr)
83 {
84 _buffer = cl::Buffer(CLScheduler::get().context(), CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, info().total_size(), _svm_memory.ptr());
85 }
86 else
87 {
88 _buffer = cl::Buffer(CLScheduler::get().context(), CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_WRITE, info().total_size());
89 }
Georgios Pinitasbaf174e2017-09-08 19:47:30 +010090 }
91 else
92 {
93 _associated_memory_group->finalize_memory(_owner, reinterpret_cast<void **>(&_buffer()), info().total_size());
94 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +010095 info().set_is_resizable(false);
96}
97
98void CLTensorAllocator::free()
99{
Georgios Pinitasbaf174e2017-09-08 19:47:30 +0100100 if(_associated_memory_group == nullptr)
101 {
102 _buffer = cl::Buffer();
Pablo Telloe86a09f2018-01-11 15:44:48 +0000103 if(_svm_memory.ptr() != nullptr)
104 {
105 clSVMFree(CLScheduler::get().context()(), _svm_memory.ptr());
106 }
Georgios Pinitasbaf174e2017-09-08 19:47:30 +0100107 info().set_is_resizable(true);
108 }
109}
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100110
Georgios Pinitasbaf174e2017-09-08 19:47:30 +0100111void CLTensorAllocator::set_associated_memory_group(CLMemoryGroup *associated_memory_group)
112{
113 ARM_COMPUTE_ERROR_ON(associated_memory_group == nullptr);
114 ARM_COMPUTE_ERROR_ON(_associated_memory_group != nullptr);
115 ARM_COMPUTE_ERROR_ON(_buffer.get() != nullptr);
116 _associated_memory_group = associated_memory_group;
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100117}
118
119uint8_t *CLTensorAllocator::lock()
120{
121 ARM_COMPUTE_ERROR_ON(_mapping != nullptr);
122 _mapping = map(CLScheduler::get().queue(), true);
123 return _mapping;
124}
125
126void CLTensorAllocator::unlock()
127{
128 ARM_COMPUTE_ERROR_ON(_mapping == nullptr);
129 unmap(CLScheduler::get().queue(), _mapping);
130 _mapping = nullptr;
131}
132
133uint8_t *CLTensorAllocator::map(cl::CommandQueue &q, bool blocking)
134{
Pablo Telloe86a09f2018-01-11 15:44:48 +0000135 const bool svm_mem = _svm_memory.ptr() != nullptr;
136 const bool fine_grain_svm = _svm_memory.fine_grain();
137 if(!svm_mem)
138 {
139 ARM_COMPUTE_ERROR_ON(_buffer.get() == nullptr);
140 return static_cast<uint8_t *>(q.enqueueMapBuffer(_buffer, blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 0, info().total_size()));
141 }
142 else if(!fine_grain_svm)
143 {
144 const cl_int ret = clEnqueueSVMMap(q(), blocking ? CL_TRUE : CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, _svm_memory.ptr(), _svm_memory.size(), 0, nullptr, nullptr);
145 ARM_COMPUTE_ERROR_ON(ret != CL_SUCCESS);
146 if(ret == CL_SUCCESS)
147 {
148 return reinterpret_cast<uint8_t *>(_svm_memory.ptr());
149 }
150 else
151 {
152 return nullptr;
153 }
154 }
155 else
156 {
157 if(blocking)
158 {
159 clFinish(q());
160 }
161 return reinterpret_cast<uint8_t *>(_svm_memory.ptr());
162 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100163}
164
165void CLTensorAllocator::unmap(cl::CommandQueue &q, uint8_t *mapping)
166{
Pablo Telloe86a09f2018-01-11 15:44:48 +0000167 const bool svm_mem = _svm_memory.ptr() != nullptr;
168 const bool fine_grain_svm = _svm_memory.fine_grain();
169 if(!svm_mem)
170 {
171 ARM_COMPUTE_ERROR_ON(_buffer.get() == nullptr);
172 q.enqueueUnmapMemObject(_buffer, mapping);
173 }
174 else if(!fine_grain_svm)
175 {
176 clEnqueueSVMUnmap(q(), _svm_memory.ptr(), 0, nullptr, nullptr);
177 }
Anthony Barbier6ff3b192017-09-04 18:44:23 +0100178}