blob: b7cc3d773b3ab4288fb67409df99847df126046f [file] [log] [blame]
Pablo Telloeb82fd22018-02-23 13:43:50 +00001/*
2 * Copyright (c) 2018 ARM Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25#pragma once
26
27#include <fcntl.h>
28#include <sched.h>
29#include <stdio.h>
30#include <stdlib.h>
31#include <string.h>
32#include <unistd.h>
33#include <sys/types.h>
34#include <sys/stat.h>
35
36#include <fstream>
37#include <iostream>
38#include <regex>
39#include <sstream>
40#include <thread>
41
42extern int l1_cache_size;
43extern int l2_cache_size;
44extern int force_cpu;
45
46#ifdef __ANDROID__
47inline unsigned long stoul( const std::string& str, std::size_t* pos = 0, int base = 10 )
48{
49 char *end;
50 const unsigned long ret = strtoul( str.c_str(), &end, base);
51 *pos = end - str.c_str();
52 return ret;
53}
54inline int stoi( const std::string& str, std::size_t* pos = 0, int base = 10 )
55{
56 return atoi(str.c_str());
57}
58#endif
59
60
61#ifndef BARE_METAL
62#include <sys/auxv.h>
63
64/* Get HWCAP bits from asm/hwcap.h */
65#include <asm/hwcap.h>
66#endif /* !BARE_METAL */
67
68/* Make sure the bits we care about are defined, just in case asm/hwcap.h is
69 * out of date (or for bare metal mode) */
70#ifndef HWCAP_ASIMDHP
71#define HWCAP_ASIMDHP (1 << 10)
72#endif
73
74#ifndef HWCAP_CPUID
75#define HWCAP_CPUID (1 << 11)
76#endif
77
78#ifndef HWCAP_ASIMDDP
79#define HWCAP_ASIMDDP (1 << 20)
80#endif
81
82#define CPUINFO_HACK
83
84//unsigned int get_cpu_impl();
85
86
87/* CPU models - we only need to detect CPUs we have
88 * microarchitecture-specific code for.
89 *
90 * Architecture features are detected via HWCAPs.
91 */
92enum class CPUModel {
93 GENERIC = 0x0001,
94 A53 = 0x0010,
95 A55r0 = 0x0011,
96 A55r1 = 0x0012,
97};
98
99class CPUInfo
100{
101private:
102 struct PerCPUData {
103 CPUModel model = CPUModel::GENERIC;
104 uint32_t midr = 0;
105 bool model_set = false;
106 };
107
108 std::vector<PerCPUData> _percpu={};
109
110 bool _cpuid = false;
111 bool _fp16 = false;
112 bool _dotprod = false;
113
114 unsigned int L1_cache_size = 32768;
115 unsigned int L2_cache_size = 262144;
116
117 /* Convert an MIDR register value to a CPUModel enum value. */
118 CPUModel midr_to_model(const unsigned int midr) const {
119 CPUModel model;
120
121 // Unpack variant and CPU ID
122 int variant = (midr >> 20) & 0xF;
123 int cpunum = (midr >> 4) & 0xFFF;
124
125 /* Only CPUs we have code paths for are detected. All other CPUs
126 * can be safely classed as "GENERIC"
127 */
128
129 switch(cpunum) {
130 case 0xd03:
131 model = CPUModel::A53;
132 break;
133
134 case 0xd05:
135 if (variant) {
136 model = CPUModel::A55r1;
137 } else {
138 model = CPUModel::A55r0;
139 }
140 break;
141
142 default:
143 model = CPUModel::GENERIC;
144 break;
145 }
146
147 return model;
148 }
149
150 /* If the CPUID capability is present, MIDR information is provided in
151 /sys. Use that to populate the CPU model table. */
152 void populate_models_cpuid() {
153 for (unsigned long int i=0; i<_percpu.size(); i++) {
154 std::stringstream str;
155 str << "/sys/devices/system/cpu/cpu" << i << "/regs/identification/midr_el1";
156 std::ifstream file;
157
158 file.open(str.str(), std::ios::in);
159
160 if (file.is_open()) {
161 std::string line;
162
163 if (bool(getline(file, line))) {
164 const unsigned long midr = stoul(line, nullptr, 16);
165
166 _percpu[i].midr = (midr & 0xffffffff);
167 _percpu[i].model = midr_to_model(_percpu[i].midr);
168 _percpu[i].model_set = true;
169 }
170 }
171 }
172 }
173
174 /* If "long-form" cpuinfo is present, parse that to populate models. */
175 void populate_models_cpuinfo() {
176 std::regex proc_regex("^processor.*(\\d+)$");
177 std::regex imp_regex("^CPU implementer.*0x(..)$");
178 std::regex var_regex("^CPU variant.*0x(.)$");
179 std::regex part_regex("^CPU part.*0x(...)$");
180 std::regex rev_regex("^CPU revision.*(\\d+)$");
181
182 std::ifstream file;
183 file.open("/proc/cpuinfo", std::ios::in);
184
185 if (file.is_open()) {
186 std::string line;
187 int midr=0;
188 int curcpu=-1;
189
190 while(bool(getline(file, line))) {
191 std::smatch match;
192
193 if (std::regex_match(line, match, proc_regex)) {
194 std::string id = match[1];
195 int newcpu=stoi(id, nullptr, 0);
196
197 if (curcpu >= 0 && midr==0) {
198 // Matched a new CPU ID without any description of the previous one - looks like old format.
199 return;
200 }
201
202 if (curcpu >= 0) {
203 _percpu[curcpu].midr = midr;
204 _percpu[curcpu].model = midr_to_model(midr);
205 _percpu[curcpu].model_set = true;
206
207 printf("CPU %d: %x\n",curcpu,midr);
208 }
209
210 midr=0;
211 curcpu=newcpu;
212
213 continue;
214 }
215
216 if (std::regex_match(line, match, imp_regex)) {
217 int impv = stoi(match[1], nullptr, 16);
218 midr |= (impv << 24);
219 continue;
220 }
221
222 if (std::regex_match(line, match, var_regex)) {
223 int varv = stoi(match[1], nullptr, 16);
224 midr |= (varv << 16);
225 continue;
226 }
227
228 if (std::regex_match(line, match, part_regex)) {
229 int partv = stoi(match[1], nullptr, 16);
230 midr |= (partv << 4);
231 continue;
232 }
233
234 if (std::regex_match(line, match, rev_regex)) {
235 int regv = stoi(match[1], nullptr, 10);
236 midr |= (regv);
237 midr |= (0xf << 16);
238 continue;
239 }
240 }
241
242 if (curcpu >= 0) {
243 _percpu[curcpu].midr = midr;
244 _percpu[curcpu].model = midr_to_model(midr);
245 _percpu[curcpu].model_set = true;
246
247 printf("CPU %d: %x\n",curcpu,midr);
248 }
249 }
250 }
251
252 /* Identify the maximum valid CPUID in the system. This reads
253 * /sys/devices/system/cpu/present to get the information. */
254 int get_max_cpus() {
255 int max_cpus = 1;
256
257#ifndef BARE_METAL
258 std::ifstream CPUspresent;
259 CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in);
260 bool success = false;
261
262 if (CPUspresent.is_open()) {
263 std::string line;
264
265 if (bool(getline(CPUspresent, line))) {
266 /* The content of this file is a list of ranges or single values, e.g.
267 * 0-5, or 1-3,5,7 or similar. As we are interested in the
268 * max valid ID, we just need to find the last valid
269 * delimiter ('-' or ',') and parse the integer immediately after that.
270 */
271 auto startfrom=line.begin();
272
273 for (auto i=line.begin(); i<line.end(); ++i) {
274 if (*i=='-' || *i==',') {
275 startfrom=i+1;
276 }
277 }
278
279 line.erase(line.begin(), startfrom);
280
281 max_cpus = stoi(line, nullptr, 0) + 1;
282 success = true;
283 }
284 }
285
286 // Return std::thread::hardware_concurrency() as a fallback.
287 if (!success) {
288 max_cpus = std::thread::hardware_concurrency();
289 }
290#endif // !BARE_METAL
291
292 return max_cpus;
293 }
294
295public:
296 CPUInfo() {
297#ifndef BARE_METAL
298 unsigned long hwcaps = getauxval(AT_HWCAP);
299
300 if (hwcaps & HWCAP_CPUID) {
301 _cpuid = true;
302 }
303
304 if (hwcaps & HWCAP_ASIMDHP) {
305 _fp16 = true;
306 }
307
308 if (hwcaps & HWCAP_ASIMDDP) {
309 _dotprod = true;
310 }
311
312#ifdef __aarch64__
313 /* Pre-4.15 kernels don't have the ASIMDDP bit.
314 *
315 * Although the CPUID bit allows us to read the feature register
316 * directly, the kernel quite sensibly masks this to only show
317 * features known by it to be safe to show to userspace. As a
318 * result, pre-4.15 kernels won't show the relevant bit in the
319 * feature registers either.
320 *
321 * So for now, use a whitelist of CPUs known to support the feature.
322 */
323 if (!_dotprod && _cpuid) {
324 /* List of CPUs with dot product support: A55r1 A75r1 A75r2 */
325 const unsigned int dotprod_whitelist_masks[] = { 0xfff0fff0, 0xfff0fff0, 0xfff0fff0, 0 };
326 const unsigned int dotprod_whitelist_values[] = { 0x4110d050, 0x4110d0a0, 0x4120d0a0, 0 };
327
328 unsigned long cpuid;
329
330 __asm __volatile (
331 "mrs %0, midr_el1\n"
332 : "=r" (cpuid)
333 :
334 :
335 );
336
337 for (int i=0;dotprod_whitelist_values[i];i++) {
338 if ((cpuid & dotprod_whitelist_masks[i]) == dotprod_whitelist_values[i]) {
339 _dotprod = true;
340 break;
341 }
342 }
343 }
344#endif
345 _percpu.resize(get_max_cpus());
346#endif
347 if (_cpuid) {
348 populate_models_cpuid();
349 } else {
350 populate_models_cpuinfo();
351 }
352 }
353
354 void set_fp16(const bool fp16) {
355 _fp16 = fp16;
356 }
357
358 void set_dotprod(const bool dotprod) {
359 _dotprod = dotprod;
360 }
361
362 void set_cpu_model(unsigned long cpuid, CPUModel model) {
363 if (_percpu.size() > cpuid) {
364 _percpu[cpuid].model = model;
365 _percpu[cpuid].model_set = true;
366 }
367 }
368
369 bool has_fp16() const {
370 return _fp16;
371 }
372
373 bool has_dotprod() const {
374 return _dotprod;
375 }
376
377 CPUModel get_cpu_model(unsigned long cpuid) const {
378 if (cpuid < _percpu.size()) {
379 return _percpu[cpuid].model;
380 }
381
382 return CPUModel::GENERIC;
383 }
384
385 CPUModel get_cpu_model() const {
386#ifdef BARE_METAL
387 return get_cpu_model(0);
388#else
389 return get_cpu_model(sched_getcpu());
390#endif
391 }
392
393 unsigned int get_L1_cache_size() const {
394 return L1_cache_size;
395 }
396
397 void set_L1_cache_size(unsigned int size) {
398 L1_cache_size = size;
399 }
400
401 unsigned int get_L2_cache_size() const {
402 return L2_cache_size;
403 }
404
405 void set_L2_cache_size(unsigned int size) {
406 L2_cache_size = size;
407 }
408};
409
410CPUInfo *get_CPUInfo();