blob: b9a6999f849da90192200d208c831744906825c1 [file] [log] [blame]
Georgios Pinitas8a5146f2021-01-12 15:51:07 +00001/*
2 * Copyright (c) 2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "src/cpu/CpuContext.h"
25
26#include "arm_compute/core/CPP/CPPTypes.h"
Georgios Pinitasc3c352e2021-03-18 10:59:40 +000027#include "src/cpu/CpuQueue.h"
Georgios Pinitas3f26ef42021-02-23 10:01:33 +000028#include "src/cpu/CpuTensor.h"
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000029#include "src/runtime/CPUUtils.h"
30
31#include <cstdlib>
Michalis Spyrou86ee2372021-03-09 19:33:12 +000032#include <malloc.h>
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000033
34namespace arm_compute
35{
36namespace cpu
37{
38namespace
39{
40void *default_allocate(void *user_data, size_t size)
41{
42 ARM_COMPUTE_UNUSED(user_data);
43 return ::operator new(size);
44}
45void default_free(void *user_data, void *ptr)
46{
47 ARM_COMPUTE_UNUSED(user_data);
48 ::operator delete(ptr);
49}
50void *default_aligned_allocate(void *user_data, size_t size, size_t alignment)
51{
52 ARM_COMPUTE_UNUSED(user_data);
53 void *ptr = nullptr;
54#if defined(BARE_METAL) || defined(__APPLE__)
55 size_t rem = size % alignment;
56 size_t real_size = (rem) ? (size + alignment - rem) : size;
Michalis Spyrou86ee2372021-03-09 19:33:12 +000057 ptr = memalign(alignment, real_size);
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000058#else /* defined(BARE_METAL) || defined(__APPLE__) */
Sang-Hoon Park57d73662021-03-04 11:06:48 +000059 if(posix_memalign(&ptr, alignment, size) != 0)
60 {
61 // posix_memalign returns non-zero on failures, the return values will be
62 // - EINVAL: wrong alignment
63 // - ENOMEM: insufficient memory
Michalis Spyrou86ee2372021-03-09 19:33:12 +000064 ARM_COMPUTE_LOG_ERROR_ACL("posix_memalign failed, the returned pointer will be invalid");
Sang-Hoon Park57d73662021-03-04 11:06:48 +000065 }
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000066#endif /* defined(BARE_METAL) || defined(__APPLE__) */
67 return ptr;
68}
69void default_aligned_free(void *user_data, void *ptr)
70{
71 ARM_COMPUTE_UNUSED(user_data);
72 free(ptr);
73}
74static AclAllocator default_allocator = { &default_allocate,
75 &default_free,
76 &default_aligned_allocate,
77 &default_aligned_free,
78 nullptr
79 };
80
81AllocatorWrapper populate_allocator(AclAllocator *external_allocator)
82{
83 bool is_valid = (external_allocator != nullptr);
84 if(is_valid)
85 {
86 is_valid = is_valid && (external_allocator->alloc != nullptr);
87 is_valid = is_valid && (external_allocator->free != nullptr);
88 is_valid = is_valid && (external_allocator->aligned_alloc != nullptr);
89 is_valid = is_valid && (external_allocator->aligned_free != nullptr);
90 }
91 return is_valid ? AllocatorWrapper(*external_allocator) : AllocatorWrapper(default_allocator);
92}
93
94CpuCapabilities populate_capabilities_legacy(const CPUInfo &cpu_info)
95{
96 CpuCapabilities caps;
97
98 // Extract SIMD extension
99 caps.neon = true;
100#ifdef SVE2
101 caps.sve2 = true;
102#endif /* SVE2 */
103 // Extract data-type support
104 caps.fp16 = cpu_info.has_fp16();
105#ifdef V8P6_BF
106 caps.bf16 = true;
107#endif /* V8P6_BF */
108
109 // Extract ISA extensions
110 caps.dot = cpu_info.has_dotprod();
111#ifdef MMLA_FP32
112 caps.mmla_fp = true;
113#endif /* MMLA_FP32 */
114#ifdef MMLA_INT8
115 caps.mmla_int8 = true;
116#endif /* MMLA_INT8 */
117
118 return caps;
119}
120
121CpuCapabilities populate_capabilities_flags(AclTargetCapabilities external_caps)
122{
123 CpuCapabilities caps;
124
125 // Extract SIMD extension
126 caps.neon = external_caps & AclCpuCapabilitiesNeon;
127 caps.sve = external_caps & AclCpuCapabilitiesSve;
128 caps.sve2 = external_caps & AclCpuCapabilitiesSve2;
129 // Extract data-type support
130 caps.fp16 = external_caps & AclCpuCapabilitiesFp16;
131 caps.bf16 = external_caps & AclCpuCapabilitiesBf16;
132 // Extract ISA extensions
133 caps.dot = external_caps & AclCpuCapabilitiesDot;
134 caps.mmla_fp = external_caps & AclCpuCapabilitiesMmlaFp;
135 caps.mmla_int8 = external_caps & AclCpuCapabilitiesMmlaInt8;
136
137 return caps;
138}
139
140CpuCapabilities populate_capabilities(AclTargetCapabilities external_caps,
141 int32_t max_threads)
142{
143 // Extract legacy structure
144 CPUInfo cpu_info;
145 arm_compute::utils::cpu::get_cpu_configuration(cpu_info);
146
147 CpuCapabilities caps;
148 if(external_caps != AclCpuCapabilitiesAuto)
149 {
150 caps = populate_capabilities_flags(external_caps);
151 }
152 else
153 {
154 caps = populate_capabilities_legacy(cpu_info);
155 }
156
157 // Set max number of threads
158#if defined(BARE_METAL)
159 ARM_COMPUTE_UNUSED(max_threads);
160 caps.max_threads = 1;
161#else /* defined(BARE_METAL) */
162 caps.max_threads = (max_threads > 0) ? max_threads : std::thread::hardware_concurrency();
163#endif /* defined(BARE_METAL) */
164
165 return caps;
166}
167} // namespace
168
169CpuContext::CpuContext(const AclContextOptions *options)
170 : IContext(Target::Cpu),
171 _allocator(default_allocator),
172 _caps(populate_capabilities(AclCpuCapabilitiesAuto, -1))
173{
174 if(options != nullptr)
175 {
176 _allocator = populate_allocator(options->allocator);
177 _caps = populate_capabilities(options->capabilities, options->max_compute_units);
178 }
179}
180
181const CpuCapabilities &CpuContext::capabilities() const
182{
183 return _caps;
184}
185
186AllocatorWrapper &CpuContext::allocator()
187{
188 return _allocator;
189}
Georgios Pinitas3f26ef42021-02-23 10:01:33 +0000190
191ITensorV2 *CpuContext::create_tensor(const AclTensorDescriptor &desc, bool allocate)
192{
193 CpuTensor *tensor = new CpuTensor(this, desc);
194 if(tensor != nullptr && allocate)
195 {
196 tensor->allocate();
197 }
198 return tensor;
199}
Georgios Pinitasc3c352e2021-03-18 10:59:40 +0000200
201IQueue *CpuContext::create_queue(const AclQueueOptions *options)
202{
203 return new CpuQueue(this, options);
204}
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000205} // namespace cpu
206} // namespace arm_compute