blob: a1c6413c98c7cd120242edf6d7653a2bc9b30c22 [file] [log] [blame]
Georgios Pinitas8a5146f2021-01-12 15:51:07 +00001/*
2 * Copyright (c) 2021 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "src/cpu/CpuContext.h"
25
26#include "arm_compute/core/CPP/CPPTypes.h"
Georgios Pinitasc3c352e2021-03-18 10:59:40 +000027#include "src/cpu/CpuQueue.h"
Georgios Pinitas3f26ef42021-02-23 10:01:33 +000028#include "src/cpu/CpuTensor.h"
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000029
30#include <cstdlib>
Michalis Spyrou86ee2372021-03-09 19:33:12 +000031#include <malloc.h>
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000032
33namespace arm_compute
34{
35namespace cpu
36{
37namespace
38{
39void *default_allocate(void *user_data, size_t size)
40{
41 ARM_COMPUTE_UNUSED(user_data);
42 return ::operator new(size);
43}
44void default_free(void *user_data, void *ptr)
45{
46 ARM_COMPUTE_UNUSED(user_data);
47 ::operator delete(ptr);
48}
49void *default_aligned_allocate(void *user_data, size_t size, size_t alignment)
50{
51 ARM_COMPUTE_UNUSED(user_data);
52 void *ptr = nullptr;
53#if defined(BARE_METAL) || defined(__APPLE__)
54 size_t rem = size % alignment;
55 size_t real_size = (rem) ? (size + alignment - rem) : size;
Michalis Spyrou86ee2372021-03-09 19:33:12 +000056 ptr = memalign(alignment, real_size);
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000057#else /* defined(BARE_METAL) || defined(__APPLE__) */
Sang-Hoon Park57d73662021-03-04 11:06:48 +000058 if(posix_memalign(&ptr, alignment, size) != 0)
59 {
60 // posix_memalign returns non-zero on failures, the return values will be
61 // - EINVAL: wrong alignment
62 // - ENOMEM: insufficient memory
Michalis Spyrou86ee2372021-03-09 19:33:12 +000063 ARM_COMPUTE_LOG_ERROR_ACL("posix_memalign failed, the returned pointer will be invalid");
Sang-Hoon Park57d73662021-03-04 11:06:48 +000064 }
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000065#endif /* defined(BARE_METAL) || defined(__APPLE__) */
66 return ptr;
67}
68void default_aligned_free(void *user_data, void *ptr)
69{
70 ARM_COMPUTE_UNUSED(user_data);
71 free(ptr);
72}
73static AclAllocator default_allocator = { &default_allocate,
74 &default_free,
75 &default_aligned_allocate,
76 &default_aligned_free,
77 nullptr
78 };
79
80AllocatorWrapper populate_allocator(AclAllocator *external_allocator)
81{
82 bool is_valid = (external_allocator != nullptr);
83 if(is_valid)
84 {
85 is_valid = is_valid && (external_allocator->alloc != nullptr);
86 is_valid = is_valid && (external_allocator->free != nullptr);
87 is_valid = is_valid && (external_allocator->aligned_alloc != nullptr);
88 is_valid = is_valid && (external_allocator->aligned_free != nullptr);
89 }
90 return is_valid ? AllocatorWrapper(*external_allocator) : AllocatorWrapper(default_allocator);
91}
92
Michalis Spyrou20fca522021-06-07 14:23:57 +010093cpuinfo::CpuIsaInfo populate_capabilities_legacy(const CPUInfo &cpu_info)
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000094{
Michalis Spyrou20fca522021-06-07 14:23:57 +010095 cpuinfo::CpuIsaInfo isa_caps;
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000096
97 // Extract SIMD extension
Michalis Spyrou20fca522021-06-07 14:23:57 +010098 isa_caps.neon = true;
99 isa_caps.sve = cpu_info.has_sve();
100 isa_caps.sve2 = cpu_info.has_sve2();
101
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000102 // Extract data-type support
Michalis Spyrou20fca522021-06-07 14:23:57 +0100103 isa_caps.fp16 = cpu_info.has_fp16();
104 isa_caps.bf16 = cpu_info.has_bf16();
105 isa_caps.svebf16 = cpu_info.has_svebf16();
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000106
107 // Extract ISA extensions
Michalis Spyrou20fca522021-06-07 14:23:57 +0100108 isa_caps.dot = cpu_info.has_dotprod();
109 isa_caps.i8mm = cpu_info.has_i8mm();
110 isa_caps.svei8mm = cpu_info.has_svei8mm();
111 isa_caps.svef32mm = cpu_info.has_svef32mm();
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000112
Michalis Spyrou20fca522021-06-07 14:23:57 +0100113 return isa_caps;
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000114}
115
Michalis Spyrou20fca522021-06-07 14:23:57 +0100116cpuinfo::CpuIsaInfo populate_capabilities_flags(AclTargetCapabilities external_caps)
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000117{
Michalis Spyrou20fca522021-06-07 14:23:57 +0100118 cpuinfo::CpuIsaInfo isa_caps;
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000119
120 // Extract SIMD extension
Michalis Spyrou20fca522021-06-07 14:23:57 +0100121 isa_caps.neon = external_caps & AclCpuCapabilitiesNeon;
122 isa_caps.sve = external_caps & AclCpuCapabilitiesSve;
123 isa_caps.sve2 = external_caps & AclCpuCapabilitiesSve2;
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000124
Michalis Spyrou20fca522021-06-07 14:23:57 +0100125 // Extract data-type support
126 isa_caps.fp16 = external_caps & AclCpuCapabilitiesFp16;
127 isa_caps.bf16 = external_caps & AclCpuCapabilitiesBf16;
128
129 // Extract ISA extensions
130 isa_caps.dot = external_caps & AclCpuCapabilitiesDot;
131 isa_caps.i8mm = external_caps & AclCpuCapabilitiesMmlaInt8;
132 isa_caps.svef32mm = external_caps & AclCpuCapabilitiesMmlaFp;
133
134 return isa_caps;
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000135}
136
137CpuCapabilities populate_capabilities(AclTargetCapabilities external_caps,
138 int32_t max_threads)
139{
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000140 CpuCapabilities caps;
Michalis Spyrou20fca522021-06-07 14:23:57 +0100141
142 // Extract legacy structure
143 cpuinfo::CpuIsaInfo isa_caps;
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000144 if(external_caps != AclCpuCapabilitiesAuto)
145 {
Michalis Spyrou20fca522021-06-07 14:23:57 +0100146 isa_caps = populate_capabilities_flags(external_caps);
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000147 }
148 else
149 {
Michalis Spyrou20fca522021-06-07 14:23:57 +0100150 isa_caps = populate_capabilities_legacy(CPUInfo::get());
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000151 }
Michalis Spyrou20fca522021-06-07 14:23:57 +0100152 caps.cpu_info = cpuinfo::CpuInfo(isa_caps, {});
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000153
154 // Set max number of threads
155#if defined(BARE_METAL)
156 ARM_COMPUTE_UNUSED(max_threads);
157 caps.max_threads = 1;
158#else /* defined(BARE_METAL) */
159 caps.max_threads = (max_threads > 0) ? max_threads : std::thread::hardware_concurrency();
160#endif /* defined(BARE_METAL) */
161
162 return caps;
163}
164} // namespace
165
166CpuContext::CpuContext(const AclContextOptions *options)
167 : IContext(Target::Cpu),
168 _allocator(default_allocator),
169 _caps(populate_capabilities(AclCpuCapabilitiesAuto, -1))
170{
171 if(options != nullptr)
172 {
173 _allocator = populate_allocator(options->allocator);
174 _caps = populate_capabilities(options->capabilities, options->max_compute_units);
175 }
176}
177
178const CpuCapabilities &CpuContext::capabilities() const
179{
180 return _caps;
181}
182
183AllocatorWrapper &CpuContext::allocator()
184{
185 return _allocator;
186}
Georgios Pinitas3f26ef42021-02-23 10:01:33 +0000187
188ITensorV2 *CpuContext::create_tensor(const AclTensorDescriptor &desc, bool allocate)
189{
190 CpuTensor *tensor = new CpuTensor(this, desc);
191 if(tensor != nullptr && allocate)
192 {
193 tensor->allocate();
194 }
195 return tensor;
196}
Georgios Pinitasc3c352e2021-03-18 10:59:40 +0000197
198IQueue *CpuContext::create_queue(const AclQueueOptions *options)
199{
200 return new CpuQueue(this, options);
201}
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000202} // namespace cpu
203} // namespace arm_compute