blob: b7aa0d548722ea18532dcfde13be6259a00f9773 [file] [log] [blame]
Pablo Tello7fad9b12018-03-14 17:55:27 +00001/*
2 * Copyright (c) 2018 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#include "arm_compute/runtime/CPUUtils.h"
25
26#include "arm_compute/core/CPP/CPPTypes.h"
27#include "arm_compute/core/Error.h"
28#include "support/ToolchainSupport.h"
29
30#include <array>
31#include <cstdlib>
32#include <cstring>
33#include <fcntl.h>
34#include <fstream>
35#include <map>
36#include <sched.h>
37#include <sys/stat.h>
38#include <sys/types.h>
39#include <unistd.h>
40
41#ifndef BARE_METAL
Michalis Spyrou8e5174c2018-12-04 11:43:23 +000042/* C++ std::regex takes up a lot of space in the standalone builds */
43#include <regex.h>
Pablo Tello7fad9b12018-03-14 17:55:27 +000044#include <thread>
45#endif /* BARE_METAL */
46
47#if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
48#include <sys/auxv.h>
49
50/* Get HWCAP bits from asm/hwcap.h */
51#include <asm/hwcap.h>
52#endif /* !BARE_METAL */
53
54/* Make sure the bits we care about are defined, just in case asm/hwcap.h is
55 * out of date (or for bare metal mode) */
56#ifndef HWCAP_ASIMDHP
57#define HWCAP_ASIMDHP (1 << 10)
58#endif /* HWCAP_ASIMDHP */
59
60#ifndef HWCAP_CPUID
61#define HWCAP_CPUID (1 << 11)
62#endif /* HWCAP_CPUID */
63
64#ifndef HWCAP_ASIMDDP
65#define HWCAP_ASIMDDP (1 << 20)
66#endif /* HWCAP_ASIMDDP */
67
68namespace
69{
70using namespace arm_compute;
71
72#if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
Pablo Tello7fad9b12018-03-14 17:55:27 +000073
Anthony Barbier3efb3752018-07-20 15:30:46 +010074bool model_supports_dot(CPUModel model)
75{
76 switch(model)
77 {
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +010078 case CPUModel::GENERIC_FP16_DOT:
Anthony Barbier3efb3752018-07-20 15:30:46 +010079 case CPUModel::A55r1:
Anthony Barbier3efb3752018-07-20 15:30:46 +010080 return true;
81 default:
82 return false;
83 }
84}
85
86bool model_supports_fp16(CPUModel model)
87{
88 switch(model)
89 {
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +010090 case CPUModel::GENERIC_FP16:
91 case CPUModel::GENERIC_FP16_DOT:
Anthony Barbier3efb3752018-07-20 15:30:46 +010092 case CPUModel::A55r1:
Anthony Barbier3efb3752018-07-20 15:30:46 +010093 return true;
94 default:
95 return false;
96 }
97}
Pablo Tello7fad9b12018-03-14 17:55:27 +000098/* Convert an MIDR register value to a CPUModel enum value. */
99CPUModel midr_to_model(const unsigned int midr)
100{
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100101 CPUModel model = CPUModel::GENERIC;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000102
103 // Unpack variant and CPU ID
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100104 const int implementer = (midr >> 24) & 0xFF;
105 const int variant = (midr >> 20) & 0xF;
106 const int cpunum = (midr >> 4) & 0xFFF;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000107
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100108 if(implementer == 0x41) // Arm CPUs
Pablo Tello7fad9b12018-03-14 17:55:27 +0000109 {
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100110 // Only CPUs we have code paths for are detected. All other CPUs can be safely classed as "GENERIC"
111 switch(cpunum)
112 {
113 case 0xd03: // A53
114 case 0xd04: // A35
115 model = CPUModel::A53;
116 break;
117 case 0xd05: // A55
118 if(variant != 0)
119 {
120 model = CPUModel::A55r1;
121 }
122 else
123 {
124 model = CPUModel::A55r0;
125 }
126 break;
127 case 0xd0a: // A75
128 if(variant != 0)
129 {
130 model = CPUModel::GENERIC_FP16_DOT;
131 }
132 else
133 {
134 model = CPUModel::GENERIC_FP16;
135 }
136 break;
137 case 0xd0b: // A76
Isabella Gottardibe2de402018-11-21 15:23:49 +0000138 case 0xd06:
139 case 0xd0c:
140 case 0xd0d:
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100141 model = CPUModel::GENERIC_FP16_DOT;
142 break;
143 default:
144 model = CPUModel::GENERIC;
145 break;
146 }
Pablo Tello7fad9b12018-03-14 17:55:27 +0000147 }
148
149 return model;
150}
151
Anthony Barbier3efb3752018-07-20 15:30:46 +0100152void populate_models_cpuid(std::vector<CPUModel> &cpusv)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000153{
154 // If the CPUID capability is present, MIDR information is provided in /sys. Use that to populate the CPU model table.
155 uint32_t i = 0;
156 for(auto &c : cpusv)
157 {
158 std::stringstream str;
159 str << "/sys/devices/system/cpu/cpu" << i++ << "/regs/identification/midr_el1";
160 std::ifstream file;
161 file.open(str.str(), std::ios::in);
162 if(file.is_open())
163 {
164 std::string line;
165 if(bool(getline(file, line)))
166 {
Georgios Pinitas9c2ec7e2018-11-12 17:07:18 +0000167 const uint32_t midr = support::cpp11::stoul(line, nullptr, support::cpp11::NumericBase::BASE_16);
168 c = midr_to_model(midr & 0xffffffff);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000169 }
170 }
171 }
172}
173
Anthony Barbier3efb3752018-07-20 15:30:46 +0100174void populate_models_cpuinfo(std::vector<CPUModel> &cpusv)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000175{
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000176 regex_t proc_regex;
177 regex_t imp_regex;
178 regex_t var_regex;
179 regex_t part_regex;
180 regex_t rev_regex;
181
182 memset(&proc_regex, 0, sizeof(regex_t));
183 memset(&imp_regex, 0, sizeof(regex_t));
184 memset(&var_regex, 0, sizeof(regex_t));
185 memset(&part_regex, 0, sizeof(regex_t));
186 memset(&rev_regex, 0, sizeof(regex_t));
187
188 int ret_status = 0;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000189 // If "long-form" cpuinfo is present, parse that to populate models.
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000190 ret_status |= regcomp(&proc_regex, R"(^processor.*([[:digit:]]+)$)", REG_EXTENDED);
191 ret_status |= regcomp(&imp_regex, R"(^CPU implementer.*0x(..)$)", REG_EXTENDED);
192 ret_status |= regcomp(&var_regex, R"(^CPU variant.*0x(.)$)", REG_EXTENDED);
193 ret_status |= regcomp(&part_regex, R"(^CPU part.*0x(...)$)", REG_EXTENDED);
194 ret_status |= regcomp(&rev_regex, R"(^CPU revision.*([[:digit:]]+)$)", REG_EXTENDED);
195 ARM_COMPUTE_UNUSED(ret_status);
196 ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
Pablo Tello7fad9b12018-03-14 17:55:27 +0000197
198 std::ifstream file;
199 file.open("/proc/cpuinfo", std::ios::in);
200
201 if(file.is_open())
202 {
203 std::string line;
204 int midr = 0;
205 int curcpu = -1;
206
207 while(bool(getline(file, line)))
208 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000209 regmatch_t match[2];
210 ret_status = regexec(&proc_regex, line.c_str(), 2, match, 0);
211 if(ret_status == 0)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000212 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000213 std::string id = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
Pablo Tello65f99822018-05-24 11:40:15 +0100214 int newcpu = support::cpp11::stoi(id, nullptr);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000215
216 if(curcpu >= 0 && midr == 0)
217 {
218 // Matched a new CPU ID without any description of the previous one - looks like old format.
219 return;
220 }
221
222 if(curcpu >= 0)
223 {
Anthony Barbier3efb3752018-07-20 15:30:46 +0100224 cpusv[curcpu] = midr_to_model(midr);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000225 }
226
227 midr = 0;
228 curcpu = newcpu;
229
230 continue;
231 }
232
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000233 ret_status = regexec(&imp_regex, line.c_str(), 2, match, 0);
234 if(ret_status == 0)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000235 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000236 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
237 int impv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000238 midr |= (impv << 24);
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000239
Pablo Tello7fad9b12018-03-14 17:55:27 +0000240 continue;
241 }
242
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000243 ret_status = regexec(&var_regex, line.c_str(), 2, match, 0);
244 if(ret_status == 0)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000245 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000246 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
247 int varv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
Anthony Barbier3efb3752018-07-20 15:30:46 +0100248 midr |= (varv << 20);
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000249
Pablo Tello7fad9b12018-03-14 17:55:27 +0000250 continue;
251 }
252
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000253 ret_status = regexec(&part_regex, line.c_str(), 2, match, 0);
254 if(ret_status == 0)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000255 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000256 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
257 int partv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000258 midr |= (partv << 4);
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000259
Pablo Tello7fad9b12018-03-14 17:55:27 +0000260 continue;
261 }
262
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000263 ret_status = regexec(&rev_regex, line.c_str(), 2, match, 0);
264 if(ret_status == 0)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000265 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000266 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
267 int regv = support::cpp11::stoi(subexp, nullptr);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000268 midr |= (regv);
269 midr |= (0xf << 16);
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000270
Pablo Tello7fad9b12018-03-14 17:55:27 +0000271 continue;
272 }
273 }
274
275 if(curcpu >= 0)
276 {
Anthony Barbier3efb3752018-07-20 15:30:46 +0100277 cpusv[curcpu] = midr_to_model(midr);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000278 }
279 }
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000280
281 // Free allocated memory
282 regfree(&proc_regex);
283 regfree(&imp_regex);
284 regfree(&var_regex);
285 regfree(&part_regex);
286 regfree(&rev_regex);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000287}
288
289int get_max_cpus()
290{
291 int max_cpus = 1;
292#if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
293 std::ifstream CPUspresent;
294 CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in);
295 bool success = false;
296
297 if(CPUspresent.is_open())
298 {
299 std::string line;
300
301 if(bool(getline(CPUspresent, line)))
302 {
303 /* The content of this file is a list of ranges or single values, e.g.
304 * 0-5, or 1-3,5,7 or similar. As we are interested in the
305 * max valid ID, we just need to find the last valid
306 * delimiter ('-' or ',') and parse the integer immediately after that.
307 */
308 auto startfrom = line.begin();
309
310 for(auto i = line.begin(); i < line.end(); ++i)
311 {
312 if(*i == '-' || *i == ',')
313 {
314 startfrom = i + 1;
315 }
316 }
317
318 line.erase(line.begin(), startfrom);
319
Pablo Tello65f99822018-05-24 11:40:15 +0100320 max_cpus = support::cpp11::stoi(line, nullptr) + 1;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000321 success = true;
322 }
323 }
324
325 // Return std::thread::hardware_concurrency() as a fallback.
326 if(!success)
327 {
328 max_cpus = std::thread::hardware_concurrency();
329 }
330#endif /* BARE_METAL */
Pablo Tello7fad9b12018-03-14 17:55:27 +0000331 return max_cpus;
332}
333#endif /* !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__)) */
334
335} // namespace
336
337namespace arm_compute
338{
339void get_cpu_configuration(CPUInfo &cpuinfo)
340{
341#if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100342 bool cpuid = false;
343 bool hwcaps_fp16_support = false;
344 bool hwcaps_dot_support = false;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000345
346 const uint32_t hwcaps = getauxval(AT_HWCAP);
347
348 if((hwcaps & HWCAP_CPUID) != 0)
349 {
350 cpuid = true;
351 }
352
353 if((hwcaps & HWCAP_ASIMDHP) != 0)
354 {
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100355 hwcaps_fp16_support = true;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000356 }
357
Pablo Tellodf3b5bb2018-10-09 10:51:51 +0100358#if defined(__aarch64__)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000359 if((hwcaps & HWCAP_ASIMDDP) != 0)
360 {
Anthony Barbier5a3ee4f2018-07-24 11:24:17 +0100361 hwcaps_dot_support = true;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000362 }
Pablo Tellodf3b5bb2018-10-09 10:51:51 +0100363#endif /* defined(__aarch64__) */
Pablo Tello7fad9b12018-03-14 17:55:27 +0000364
Pablo Tello7fad9b12018-03-14 17:55:27 +0000365 const unsigned int max_cpus = get_max_cpus();
366 cpuinfo.set_cpu_num(max_cpus);
Anthony Barbier3efb3752018-07-20 15:30:46 +0100367 std::vector<CPUModel> percpu(max_cpus, CPUModel::GENERIC);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000368 if(cpuid)
369 {
370 populate_models_cpuid(percpu);
371 }
372 else
373 {
374 populate_models_cpuinfo(percpu);
375 }
376 int j(0);
Georgios Pinitasecae3a12018-10-09 15:13:12 +0100377 // Update dot product and FP16 support if one of the CPUs support these features
378 // We assume that the system does not have mixed architectures
379 bool one_supports_dot = false;
380 bool one_supports_fp16 = false;
Pablo Tello7fad9b12018-03-14 17:55:27 +0000381 for(const auto &v : percpu)
382 {
Georgios Pinitas9c2ec7e2018-11-12 17:07:18 +0000383 one_supports_dot = one_supports_dot || model_supports_dot(v);
384 one_supports_fp16 = one_supports_fp16 || model_supports_fp16(v);
Anthony Barbier3efb3752018-07-20 15:30:46 +0100385 cpuinfo.set_cpu_model(j++, v);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000386 }
Georgios Pinitasecae3a12018-10-09 15:13:12 +0100387 cpuinfo.set_dotprod(one_supports_dot || hwcaps_dot_support);
388 cpuinfo.set_fp16(one_supports_fp16 || hwcaps_fp16_support);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000389#else /* !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__)) */
390 ARM_COMPUTE_UNUSED(cpuinfo);
391#endif /* !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__)) */
392}
393
394unsigned int get_threads_hint()
395{
396 unsigned int num_threads_hint = 1;
397
398#ifndef BARE_METAL
399 std::map<std::string, unsigned int> cpu_part_occurrence_map;
400
401 // CPU part regex
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000402 regex_t cpu_part_rgx;
403 memset(&cpu_part_rgx, 0, sizeof(regex_t));
Georgios Pinitasde6dbfe2018-12-24 15:02:55 +0000404 int ret_status = regcomp(&cpu_part_rgx, R"(.*CPU part.+/?\:[[:space:]]+([[:alnum:]]+).*)", REG_EXTENDED);
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000405 ARM_COMPUTE_UNUSED(ret_status);
406 ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
Pablo Tello7fad9b12018-03-14 17:55:27 +0000407
408 // Read cpuinfo and get occurrence of each core
409 std::ifstream cpuinfo;
410 cpuinfo.open("/proc/cpuinfo", std::ios::in);
411 if(cpuinfo.is_open())
412 {
413 std::string line;
414 while(bool(getline(cpuinfo, line)))
415 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000416 regmatch_t match[2];
417 ret_status = regexec(&cpu_part_rgx, line.c_str(), 2, match, 0);
418 if(ret_status == 0)
Pablo Tello7fad9b12018-03-14 17:55:27 +0000419 {
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000420 std::string cpu_part = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
Pablo Tello7fad9b12018-03-14 17:55:27 +0000421 if(cpu_part_occurrence_map.find(cpu_part) != cpu_part_occurrence_map.end())
422 {
423 cpu_part_occurrence_map[cpu_part]++;
424 }
425 else
426 {
427 cpu_part_occurrence_map[cpu_part] = 1;
428 }
429 }
430 }
431 }
Michalis Spyrou8e5174c2018-12-04 11:43:23 +0000432 regfree(&cpu_part_rgx);
Pablo Tello7fad9b12018-03-14 17:55:27 +0000433
434 // Get min number of threads
435 auto min_common_cores = std::min_element(cpu_part_occurrence_map.begin(), cpu_part_occurrence_map.end(),
436 [](const std::pair<std::string, unsigned int> &p1, const std::pair<std::string, unsigned int> &p2)
437 {
438 return p1.second < p2.second;
439 });
440
441 // Set thread hint
442 num_threads_hint = cpu_part_occurrence_map.empty() ? std::thread::hardware_concurrency() : min_common_cores->second;
443#endif /* BARE_METAL */
444
445 return num_threads_hint;
446}
447
448} // namespace arm_compute