blob: b745af8229305c8adc4b3297215b5aab21cac80a [file] [log] [blame]
Georgios Pinitas8a5146f2021-01-12 15:51:07 +00001/*
Matthew Bentham1d062042023-07-06 13:13:59 +00002 * Copyright (c) 2021-2023 Arm Limited.
Georgios Pinitas8a5146f2021-01-12 15:51:07 +00003 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "src/cpu/CpuContext.h"
25
26#include "arm_compute/core/CPP/CPPTypes.h"
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010027
Georgios Pinitasc3c352e2021-03-18 10:59:40 +000028#include "src/cpu/CpuQueue.h"
Georgios Pinitas3f26ef42021-02-23 10:01:33 +000029#include "src/cpu/CpuTensor.h"
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000030
31#include <cstdlib>
Kevin Lo7195f712022-01-07 15:46:02 +080032#if !defined(__APPLE__) && !defined(__OpenBSD__)
Michalis Spyrou86ee2372021-03-09 19:33:12 +000033#include <malloc.h>
Pablo Marquez Tello48f26152021-11-18 10:15:23 +000034
Pablo Tello4e66d702022-03-07 18:20:12 +000035#if defined(_WIN64)
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010036#define posix_memalign _aligned_realloc
Pablo Tello4e66d702022-03-07 18:20:12 +000037#define posix_memalign_free _aligned_free
38#endif // defined(_WIN64)
39#endif // !defined(__APPLE__) && !defined(__OpenBSD__)
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000040
Matthew Bentham1d062042023-07-06 13:13:59 +000041#ifndef BARE_METAL
42#include <thread>
43#endif /* BARE_METAL */
44
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000045namespace arm_compute
46{
47namespace cpu
48{
49namespace
50{
51void *default_allocate(void *user_data, size_t size)
52{
53 ARM_COMPUTE_UNUSED(user_data);
54 return ::operator new(size);
55}
56void default_free(void *user_data, void *ptr)
57{
58 ARM_COMPUTE_UNUSED(user_data);
59 ::operator delete(ptr);
60}
61void *default_aligned_allocate(void *user_data, size_t size, size_t alignment)
62{
63 ARM_COMPUTE_UNUSED(user_data);
64 void *ptr = nullptr;
Pablo Marquez Tello48f26152021-11-18 10:15:23 +000065#if defined(BARE_METAL)
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000066 size_t rem = size % alignment;
67 size_t real_size = (rem) ? (size + alignment - rem) : size;
Michalis Spyrou86ee2372021-03-09 19:33:12 +000068 ptr = memalign(alignment, real_size);
Pablo Marquez Tello48f26152021-11-18 10:15:23 +000069#else /* defined(BARE_METAL) */
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010070 if (posix_memalign(&ptr, alignment, size) != 0)
Sang-Hoon Park57d73662021-03-04 11:06:48 +000071 {
72 // posix_memalign returns non-zero on failures, the return values will be
73 // - EINVAL: wrong alignment
74 // - ENOMEM: insufficient memory
Michalis Spyrou86ee2372021-03-09 19:33:12 +000075 ARM_COMPUTE_LOG_ERROR_ACL("posix_memalign failed, the returned pointer will be invalid");
Sang-Hoon Park57d73662021-03-04 11:06:48 +000076 }
Pablo Marquez Tello48f26152021-11-18 10:15:23 +000077#endif /* defined(BARE_METAL) */
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000078 return ptr;
79}
80void default_aligned_free(void *user_data, void *ptr)
81{
82 ARM_COMPUTE_UNUSED(user_data);
83 free(ptr);
84}
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010085static AclAllocator default_allocator = {&default_allocate, &default_free, &default_aligned_allocate,
86 &default_aligned_free, nullptr};
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000087
88AllocatorWrapper populate_allocator(AclAllocator *external_allocator)
89{
90 bool is_valid = (external_allocator != nullptr);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +010091 if (is_valid)
Georgios Pinitas8a5146f2021-01-12 15:51:07 +000092 {
93 is_valid = is_valid && (external_allocator->alloc != nullptr);
94 is_valid = is_valid && (external_allocator->free != nullptr);
95 is_valid = is_valid && (external_allocator->aligned_alloc != nullptr);
96 is_valid = is_valid && (external_allocator->aligned_free != nullptr);
97 }
98 return is_valid ? AllocatorWrapper(*external_allocator) : AllocatorWrapper(default_allocator);
99}
100
Michalis Spyrou20fca522021-06-07 14:23:57 +0100101cpuinfo::CpuIsaInfo populate_capabilities_flags(AclTargetCapabilities external_caps)
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000102{
Michalis Spyrou20fca522021-06-07 14:23:57 +0100103 cpuinfo::CpuIsaInfo isa_caps;
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000104
105 // Extract SIMD extension
Michalis Spyrou20fca522021-06-07 14:23:57 +0100106 isa_caps.neon = external_caps & AclCpuCapabilitiesNeon;
107 isa_caps.sve = external_caps & AclCpuCapabilitiesSve;
108 isa_caps.sve2 = external_caps & AclCpuCapabilitiesSve2;
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000109
Michalis Spyrou20fca522021-06-07 14:23:57 +0100110 // Extract data-type support
Georgios Pinitas731fe662021-06-24 20:32:11 +0100111 isa_caps.fp16 = external_caps & AclCpuCapabilitiesFp16;
112 isa_caps.bf16 = external_caps & AclCpuCapabilitiesBf16;
113 isa_caps.svebf16 = isa_caps.bf16;
Michalis Spyrou20fca522021-06-07 14:23:57 +0100114
115 // Extract ISA extensions
116 isa_caps.dot = external_caps & AclCpuCapabilitiesDot;
117 isa_caps.i8mm = external_caps & AclCpuCapabilitiesMmlaInt8;
118 isa_caps.svef32mm = external_caps & AclCpuCapabilitiesMmlaFp;
119
120 return isa_caps;
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000121}
122
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100123CpuCapabilities populate_capabilities(AclTargetCapabilities external_caps, int32_t max_threads)
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000124{
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000125 CpuCapabilities caps;
Michalis Spyrou20fca522021-06-07 14:23:57 +0100126
Georgios Pinitas731fe662021-06-24 20:32:11 +0100127 // Populate capabilities with system information
128 caps.cpu_info = cpuinfo::CpuInfo::build();
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100129 if (external_caps != AclCpuCapabilitiesAuto)
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000130 {
Georgios Pinitas731fe662021-06-24 20:32:11 +0100131 cpuinfo::CpuIsaInfo isa = populate_capabilities_flags(external_caps);
132 auto cpus = caps.cpu_info.cpus();
133
134 caps.cpu_info = cpuinfo::CpuInfo(isa, cpus);
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000135 }
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000136
137 // Set max number of threads
138#if defined(BARE_METAL)
139 ARM_COMPUTE_UNUSED(max_threads);
140 caps.max_threads = 1;
141#else /* defined(BARE_METAL) */
142 caps.max_threads = (max_threads > 0) ? max_threads : std::thread::hardware_concurrency();
143#endif /* defined(BARE_METAL) */
144
145 return caps;
146}
147} // namespace
148
149CpuContext::CpuContext(const AclContextOptions *options)
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100150 : IContext(Target::Cpu), _allocator(default_allocator), _caps(populate_capabilities(AclCpuCapabilitiesAuto, -1))
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000151{
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100152 if (options != nullptr)
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000153 {
154 _allocator = populate_allocator(options->allocator);
155 _caps = populate_capabilities(options->capabilities, options->max_compute_units);
156 }
157}
158
159const CpuCapabilities &CpuContext::capabilities() const
160{
161 return _caps;
162}
163
164AllocatorWrapper &CpuContext::allocator()
165{
166 return _allocator;
167}
Georgios Pinitas3f26ef42021-02-23 10:01:33 +0000168
169ITensorV2 *CpuContext::create_tensor(const AclTensorDescriptor &desc, bool allocate)
170{
171 CpuTensor *tensor = new CpuTensor(this, desc);
Felix Thomasmathibalanafd38f02023-09-27 17:46:17 +0100172 if (tensor != nullptr && allocate)
Georgios Pinitas3f26ef42021-02-23 10:01:33 +0000173 {
174 tensor->allocate();
175 }
176 return tensor;
177}
Georgios Pinitasc3c352e2021-03-18 10:59:40 +0000178
179IQueue *CpuContext::create_queue(const AclQueueOptions *options)
180{
181 return new CpuQueue(this, options);
182}
Georgios Pinitas8a5146f2021-01-12 15:51:07 +0000183} // namespace cpu
184} // namespace arm_compute