blob: f3355a740ba691c33cd1ea356e86de14f4b29685 [file] [log] [blame]
Pablo Tello7fad9b12018-03-14 17:55:27 +00001/*
2 * Copyright (c) 2018 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CPUUtils.h"
25
26#include "arm_compute/core/CPP/CPPTypes.h"
27#include "arm_compute/core/Error.h"
28#include "support/ToolchainSupport.h"
29
30#include <array>
31#include <cstdlib>
32#include <cstring>
33#include <fcntl.h>
34#include <fstream>
35#include <map>
36#include <sched.h>
37#include <sys/stat.h>
38#include <sys/types.h>
39#include <unistd.h>
40
41#ifndef BARE_METAL
Michalis Spyrou8e5174c2018-12-04 11:43:23 +000042/* C++ std::regex takes up a lot of space in the standalone builds */
43#include <regex.h>
Pablo Tello7fad9b12018-03-14 17:55:27 +000044#include <thread>
45#endif /* BARE_METAL */
46
47#if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
48#include <sys/auxv.h>
49
50/* Get HWCAP bits from asm/hwcap.h */
51#include <asm/hwcap.h>
52#endif /* !BARE_METAL */
53
54/* Make sure the bits we care about are defined, just in case asm/hwcap.h is
55 * out of date (or for bare metal mode) */
56#ifndef HWCAP_ASIMDHP
57#define HWCAP_ASIMDHP (1 << 10)
58#endif /* HWCAP_ASIMDHP */
59
60#ifndef HWCAP_CPUID
61#define HWCAP_CPUID (1 << 11)
62#endif /* HWCAP_CPUID */
63
64#ifndef HWCAP_ASIMDDP
65#define HWCAP_ASIMDDP (1 << 20)
66#endif /* HWCAP_ASIMDDP */
67
68namespace
69{
70using namespace arm_compute;
71
72#if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
Pablo Tello7fad9b12018-03-14 17:55:27 +000073
Anthony Barbier3efb3752018-07-20 15:30:46 +010074bool model_supports_dot(CPUModel model)
75{
76 switch(model)
77 {
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +010078 case CPUModel::GENERIC_FP16_DOT:
Anthony Barbier3efb3752018-07-20 15:30:46 +010079 case CPUModel::A55r1:
Anthony Barbier3efb3752018-07-20 15:30:46 +010080 return true;
81 default:
82 return false;
83 }
84}
85
86bool model_supports_fp16(CPUModel model)
87{
88 switch(model)
89 {
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +010090 case CPUModel::GENERIC_FP16:
91 case CPUModel::GENERIC_FP16_DOT:
Anthony Barbier3efb3752018-07-20 15:30:46 +010092 case CPUModel::A55r1:
Anthony Barbier3efb3752018-07-20 15:30:46 +010093 return true;
94 default:
95 return false;
96 }
97}
Georgios Pinitas18960692018-12-21 18:18:46 +000098
Pablo Tello7fad9b12018-03-14 17:55:27 +000099/* Convert an MIDR register value to a CPUModel enum value. */
100CPUModel midr_to_model(const unsigned int midr)
101{
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100102 CPUModel model = CPUModel::GENERIC;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000103
104 // Unpack variant and CPU ID
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100105 const int implementer = (midr >> 24) & 0xFF;
106 const int variant = (midr >> 20) & 0xF;
107 const int cpunum = (midr >> 4) & 0xFFF;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000108
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100109 if(implementer == 0x41) // Arm CPUs
Pablo Tello7fad9b12018-03-14 17:55:27 +0000110 {
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100111 // Only CPUs we have code paths for are detected. All other CPUs can be safely classed as "GENERIC"
112 switch(cpunum)
113 {
114 case 0xd03: // A53
115 case 0xd04: // A35
116 model = CPUModel::A53;
117 break;
118 case 0xd05: // A55
119 if(variant != 0)
120 {
121 model = CPUModel::A55r1;
122 }
123 else
124 {
125 model = CPUModel::A55r0;
126 }
127 break;
128 case 0xd0a: // A75
129 if(variant != 0)
130 {
131 model = CPUModel::GENERIC_FP16_DOT;
132 }
133 else
134 {
135 model = CPUModel::GENERIC_FP16;
136 }
137 break;
138 case 0xd0b: // A76
Isabella Gottardibe2de402018-11-21 15:23:49 +0000139 case 0xd06:
140 case 0xd0c:
141 case 0xd0d:
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100142 model = CPUModel::GENERIC_FP16_DOT;
143 break;
144 default:
145 model = CPUModel::GENERIC;
146 break;
147 }
Pablo Tello7fad9b12018-03-14 17:55:27 +0000148 }
Georgios Pinitas18960692018-12-21 18:18:46 +0000149 else if(implementer == 0x48) // HiSilicon CPUs
150 {
151 // Only CPUs we have code paths for are detected. All other CPUs can be safely classed as "GENERIC"
152 switch(cpunum)
153 {
154 case 0xd40: // A76 (Kirin 980)
155 model = CPUModel::GENERIC_FP16_DOT;
156 break;
157 default:
158 model = CPUModel::GENERIC;
159 break;
160 }
161 }
Pablo Tello7fad9b12018-03-14 17:55:27 +0000162
163 return model;
164}
165
Anthony Barbier3efb3752018-07-20 15:30:46 +0100166void populate_models_cpuid(std::vector<CPUModel> &cpusv)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000167{
168 // If the CPUID capability is present, MIDR information is provided in /sys. Use that to populate the CPU model table.
169 uint32_t i = 0;
170 for(auto &c : cpusv)
171 {
172 std::stringstream str;
173 str << "/sys/devices/system/cpu/cpu" << i++ << "/regs/identification/midr_el1";
174 std::ifstream file;
175 file.open(str.str(), std::ios::in);
176 if(file.is_open())
177 {
178 std::string line;
179 if(bool(getline(file, line)))
180 {
Georgios Pinitas9c2ec7e2018-11-12 17:07:18 +0000181 const uint32_t midr = support::cpp11::stoul(line, nullptr, support::cpp11::NumericBase::BASE_16);
182 c = midr_to_model(midr & 0xffffffff);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000183 }
184 }
185 }
186}
187
Anthony Barbier3efb3752018-07-20 15:30:46 +0100188void populate_models_cpuinfo(std::vector<CPUModel> &cpusv)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000189{
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000190 regex_t proc_regex;
191 regex_t imp_regex;
192 regex_t var_regex;
193 regex_t part_regex;
194 regex_t rev_regex;
195
196 memset(&proc_regex, 0, sizeof(regex_t));
197 memset(&imp_regex, 0, sizeof(regex_t));
198 memset(&var_regex, 0, sizeof(regex_t));
199 memset(&part_regex, 0, sizeof(regex_t));
200 memset(&rev_regex, 0, sizeof(regex_t));
201
202 int ret_status = 0;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000203 // If "long-form" cpuinfo is present, parse that to populate models.
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000204 ret_status |= regcomp(&proc_regex, R"(^processor.*([[:digit:]]+)$)", REG_EXTENDED);
205 ret_status |= regcomp(&imp_regex, R"(^CPU implementer.*0x(..)$)", REG_EXTENDED);
206 ret_status |= regcomp(&var_regex, R"(^CPU variant.*0x(.)$)", REG_EXTENDED);
207 ret_status |= regcomp(&part_regex, R"(^CPU part.*0x(...)$)", REG_EXTENDED);
208 ret_status |= regcomp(&rev_regex, R"(^CPU revision.*([[:digit:]]+)$)", REG_EXTENDED);
209 ARM_COMPUTE_UNUSED(ret_status);
210 ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
Pablo Tello7fad9b12018-03-14 17:55:27 +0000211
212 std::ifstream file;
213 file.open("/proc/cpuinfo", std::ios::in);
214
215 if(file.is_open())
216 {
217 std::string line;
218 int midr = 0;
219 int curcpu = -1;
220
221 while(bool(getline(file, line)))
222 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000223 regmatch_t match[2];
224 ret_status = regexec(&proc_regex, line.c_str(), 2, match, 0);
225 if(ret_status == 0)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000226 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000227 std::string id = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
Pablo Tello65f99822018-05-24 11:40:15 +0100228 int newcpu = support::cpp11::stoi(id, nullptr);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000229
230 if(curcpu >= 0 && midr == 0)
231 {
232 // Matched a new CPU ID without any description of the previous one - looks like old format.
233 return;
234 }
235
236 if(curcpu >= 0)
237 {
Anthony Barbier3efb3752018-07-20 15:30:46 +0100238 cpusv[curcpu] = midr_to_model(midr);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000239 }
240
241 midr = 0;
242 curcpu = newcpu;
243
244 continue;
245 }
246
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000247 ret_status = regexec(&imp_regex, line.c_str(), 2, match, 0);
248 if(ret_status == 0)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000249 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000250 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
251 int impv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000252 midr |= (impv << 24);
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000253
Pablo Tello7fad9b12018-03-14 17:55:27 +0000254 continue;
255 }
256
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000257 ret_status = regexec(&var_regex, line.c_str(), 2, match, 0);
258 if(ret_status == 0)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000259 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000260 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
261 int varv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
Anthony Barbier3efb3752018-07-20 15:30:46 +0100262 midr |= (varv << 20);
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000263
Pablo Tello7fad9b12018-03-14 17:55:27 +0000264 continue;
265 }
266
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000267 ret_status = regexec(&part_regex, line.c_str(), 2, match, 0);
268 if(ret_status == 0)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000269 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000270 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
271 int partv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000272 midr |= (partv << 4);
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000273
Pablo Tello7fad9b12018-03-14 17:55:27 +0000274 continue;
275 }
276
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000277 ret_status = regexec(&rev_regex, line.c_str(), 2, match, 0);
278 if(ret_status == 0)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000279 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000280 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
281 int regv = support::cpp11::stoi(subexp, nullptr);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000282 midr |= (regv);
283 midr |= (0xf << 16);
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000284
Pablo Tello7fad9b12018-03-14 17:55:27 +0000285 continue;
286 }
287 }
288
289 if(curcpu >= 0)
290 {
Anthony Barbier3efb3752018-07-20 15:30:46 +0100291 cpusv[curcpu] = midr_to_model(midr);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000292 }
293 }
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000294
295 // Free allocated memory
296 regfree(&proc_regex);
297 regfree(&imp_regex);
298 regfree(&var_regex);
299 regfree(&part_regex);
300 regfree(&rev_regex);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000301}
302
303int get_max_cpus()
304{
305 int max_cpus = 1;
306#if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
307 std::ifstream CPUspresent;
308 CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in);
309 bool success = false;
310
311 if(CPUspresent.is_open())
312 {
313 std::string line;
314
315 if(bool(getline(CPUspresent, line)))
316 {
317 /* The content of this file is a list of ranges or single values, e.g.
318 * 0-5, or 1-3,5,7 or similar. As we are interested in the
319 * max valid ID, we just need to find the last valid
320 * delimiter ('-' or ',') and parse the integer immediately after that.
321 */
322 auto startfrom = line.begin();
323
324 for(auto i = line.begin(); i < line.end(); ++i)
325 {
326 if(*i == '-' || *i == ',')
327 {
328 startfrom = i + 1;
329 }
330 }
331
332 line.erase(line.begin(), startfrom);
333
Pablo Tello65f99822018-05-24 11:40:15 +0100334 max_cpus = support::cpp11::stoi(line, nullptr) + 1;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000335 success = true;
336 }
337 }
338
339 // Return std::thread::hardware_concurrency() as a fallback.
340 if(!success)
341 {
342 max_cpus = std::thread::hardware_concurrency();
343 }
344#endif /* BARE_METAL */
Pablo Tello7fad9b12018-03-14 17:55:27 +0000345 return max_cpus;
346}
347#endif /* !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__)) */
348
349} // namespace
350
351namespace arm_compute
352{
353void get_cpu_configuration(CPUInfo &cpuinfo)
354{
355#if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100356 bool cpuid = false;
357 bool hwcaps_fp16_support = false;
358 bool hwcaps_dot_support = false;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000359
360 const uint32_t hwcaps = getauxval(AT_HWCAP);
361
362 if((hwcaps & HWCAP_CPUID) != 0)
363 {
364 cpuid = true;
365 }
366
367 if((hwcaps & HWCAP_ASIMDHP) != 0)
368 {
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100369 hwcaps_fp16_support = true;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000370 }
371
Pablo Tellodf3b5bb2018-10-09 10:51:51 +0100372#if defined(__aarch64__)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000373 if((hwcaps & HWCAP_ASIMDDP) != 0)
374 {
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100375 hwcaps_dot_support = true;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000376 }
Pablo Tellodf3b5bb2018-10-09 10:51:51 +0100377#endif /* defined(__aarch64__) */
Pablo Tello7fad9b12018-03-14 17:55:27 +0000378
Pablo Tello7fad9b12018-03-14 17:55:27 +0000379 const unsigned int max_cpus = get_max_cpus();
380 cpuinfo.set_cpu_num(max_cpus);
Anthony Barbier3efb3752018-07-20 15:30:46 +0100381 std::vector<CPUModel> percpu(max_cpus, CPUModel::GENERIC);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000382 if(cpuid)
383 {
384 populate_models_cpuid(percpu);
385 }
386 else
387 {
388 populate_models_cpuinfo(percpu);
389 }
390 int j(0);
Georgios Pinitasecae3a12018-10-09 15:13:12 +0100391 // Update dot product and FP16 support if one of the CPUs support these features
392 // We assume that the system does not have mixed architectures
393 bool one_supports_dot = false;
394 bool one_supports_fp16 = false;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000395 for(const auto &v : percpu)
396 {
Georgios Pinitas9c2ec7e2018-11-12 17:07:18 +0000397 one_supports_dot = one_supports_dot || model_supports_dot(v);
398 one_supports_fp16 = one_supports_fp16 || model_supports_fp16(v);
Anthony Barbier3efb3752018-07-20 15:30:46 +0100399 cpuinfo.set_cpu_model(j++, v);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000400 }
Georgios Pinitasecae3a12018-10-09 15:13:12 +0100401 cpuinfo.set_dotprod(one_supports_dot || hwcaps_dot_support);
402 cpuinfo.set_fp16(one_supports_fp16 || hwcaps_fp16_support);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000403#else /* !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__)) */
404 ARM_COMPUTE_UNUSED(cpuinfo);
405#endif /* !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__)) */
406}
407
408unsigned int get_threads_hint()
409{
410 unsigned int num_threads_hint = 1;
411
412#ifndef BARE_METAL
413 std::map<std::string, unsigned int> cpu_part_occurrence_map;
414
415 // CPU part regex
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000416 regex_t cpu_part_rgx;
417 memset(&cpu_part_rgx, 0, sizeof(regex_t));
Georgios Pinitasde6dbfe2018-12-24 15:02:55 +0000418 int ret_status = regcomp(&cpu_part_rgx, R"(.*CPU part.+/?\:[[:space:]]+([[:alnum:]]+).*)", REG_EXTENDED);
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000419 ARM_COMPUTE_UNUSED(ret_status);
420 ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
Pablo Tello7fad9b12018-03-14 17:55:27 +0000421
422 // Read cpuinfo and get occurrence of each core
423 std::ifstream cpuinfo;
424 cpuinfo.open("/proc/cpuinfo", std::ios::in);
425 if(cpuinfo.is_open())
426 {
427 std::string line;
428 while(bool(getline(cpuinfo, line)))
429 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000430 regmatch_t match[2];
431 ret_status = regexec(&cpu_part_rgx, line.c_str(), 2, match, 0);
432 if(ret_status == 0)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000433 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000434 std::string cpu_part = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
Pablo Tello7fad9b12018-03-14 17:55:27 +0000435 if(cpu_part_occurrence_map.find(cpu_part) != cpu_part_occurrence_map.end())
436 {
437 cpu_part_occurrence_map[cpu_part]++;
438 }
439 else
440 {
441 cpu_part_occurrence_map[cpu_part] = 1;
442 }
443 }
444 }
445 }
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000446 regfree(&cpu_part_rgx);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000447
448 // Get min number of threads
449 auto min_common_cores = std::min_element(cpu_part_occurrence_map.begin(), cpu_part_occurrence_map.end(),
450 [](const std::pair<std::string, unsigned int> &p1, const std::pair<std::string, unsigned int> &p2)
451 {
452 return p1.second < p2.second;
453 });
454
455 // Set thread hint
456 num_threads_hint = cpu_part_occurrence_map.empty() ? std::thread::hardware_concurrency() : min_common_cores->second;
457#endif /* BARE_METAL */
458
459 return num_threads_hint;
460}
461
462} // namespace arm_compute