| /* |
| * Copyright (c) 2021-2023 Arm Limited. |
| * |
| * SPDX-License-Identifier: MIT |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to |
| * deal in the Software without restriction, including without limitation the |
| * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or |
| * sell copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in all |
| * copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| * SOFTWARE. |
| */ |
| #include "src/common/cpuinfo/CpuInfo.h" |
| |
| #include "arm_compute/core/Error.h" |
| #include "arm_compute/core/Log.h" |
| #include "support/StringSupport.h" |
| #include "support/ToolchainSupport.h" |
| |
| #include <sstream> |
| |
| #if !defined(BARE_METAL) |
| #include <algorithm> |
| #include <cstring> |
| #include <fstream> |
| #if !defined(_WIN64) |
| #include <regex.h> /* C++ std::regex takes up a lot of space in the standalone builds */ |
| #include <sched.h> |
| #endif /* !defined(_WIN64) */ |
| |
| #include <thread> |
| #include <unordered_map> |
| #endif /* !defined(BARE_METAL) */ |
| |
| #if !defined(_WIN64) |
| #if !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) |
| #include <asm/hwcap.h> /* Get HWCAP bits from asm/hwcap.h */ |
| #include <sys/auxv.h> |
| #elif defined(__APPLE__) && defined(__aarch64__) |
| #include <sys/sysctl.h> |
| #include <sys/types.h> |
| #endif /* defined(__APPLE__) && defined(__aarch64__)) */ |
| #endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ |
| |
| #define ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID (1 << 11) |
| #define ARM_COMPUTE_GET_FEATURE_REG(var, freg) __asm __volatile("MRS %0, " #freg \ |
| : "=r"(var)) |
| namespace arm_compute |
| { |
| namespace cpuinfo |
| { |
| namespace |
| { |
| #if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) |
| /** Extract MIDR using CPUID information that are exposed to user-space |
| * |
| * @param[in] max_num_cpus Maximum number of possible CPUs |
| * |
| * @return std::vector<uint32_t> A list of the MIDR of each core |
| */ |
| std::vector<uint32_t> midr_from_cpuid(uint32_t max_num_cpus) |
| { |
| std::vector<uint32_t> cpus; |
| for(unsigned int i = 0; i < max_num_cpus; ++i) |
| { |
| std::stringstream str; |
| str << "/sys/devices/system/cpu/cpu" << i << "/regs/identification/midr_el1"; |
| std::ifstream file(str.str(), std::ios::in); |
| if(file.is_open()) |
| { |
| std::string line; |
| if(bool(getline(file, line))) |
| { |
| cpus.emplace_back(support::cpp11::stoul(line, nullptr, support::cpp11::NumericBase::BASE_16)); |
| } |
| } |
| } |
| return cpus; |
| } |
| |
| /** Extract MIDR by parsing the /proc/cpuinfo meta-data |
| * |
| * @param[in] max_num_cpus Maximum number of possible CPUs |
| * |
| * @return std::vector<uint32_t> A list of the MIDR of each core |
| */ |
| std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus) |
| { |
| std::vector<uint32_t> cpus; |
| |
| regex_t proc_regex; |
| regex_t imp_regex; |
| regex_t var_regex; |
| regex_t part_regex; |
| regex_t rev_regex; |
| |
| memset(&proc_regex, 0, sizeof(regex_t)); |
| memset(&imp_regex, 0, sizeof(regex_t)); |
| memset(&var_regex, 0, sizeof(regex_t)); |
| memset(&part_regex, 0, sizeof(regex_t)); |
| memset(&rev_regex, 0, sizeof(regex_t)); |
| |
| int ret_status = 0; |
| // If "long-form" cpuinfo is present, parse that to populate models. |
| ret_status |= regcomp(&proc_regex, R"(^processor.*([[:digit:]]+)$)", REG_EXTENDED); |
| ret_status |= regcomp(&imp_regex, R"(^CPU implementer.*0x(..)$)", REG_EXTENDED); |
| ret_status |= regcomp(&var_regex, R"(^CPU variant.*0x(.)$)", REG_EXTENDED); |
| ret_status |= regcomp(&part_regex, R"(^CPU part.*0x(...)$)", REG_EXTENDED); |
| ret_status |= regcomp(&rev_regex, R"(^CPU revision.*([[:digit:]]+)$)", REG_EXTENDED); |
| ARM_COMPUTE_UNUSED(ret_status); |
| ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed."); |
| |
| std::ifstream file("/proc/cpuinfo", std::ios::in); |
| if(file.is_open()) |
| { |
| std::string line; |
| int midr = 0; |
| int curcpu = -1; |
| |
| while(bool(getline(file, line))) |
| { |
| std::array<regmatch_t, 2> match; |
| ret_status = regexec(&proc_regex, line.c_str(), 2, match.data(), 0); |
| if(ret_status == 0) |
| { |
| std::string id = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); |
| int newcpu = support::cpp11::stoi(id, nullptr); |
| |
| if(curcpu >= 0 && midr == 0) |
| { |
| // Matched a new CPU ID without any description of the previous one - looks like old format. |
| return {}; |
| } |
| |
| if(curcpu >= 0 && curcpu < max_num_cpus) |
| { |
| cpus.emplace_back(midr); |
| } |
| else |
| { |
| ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!"); |
| } |
| |
| midr = 0; |
| curcpu = newcpu; |
| |
| continue; |
| } |
| |
| ret_status = regexec(&imp_regex, line.c_str(), 2, match.data(), 0); |
| if(ret_status == 0) |
| { |
| std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); |
| int impv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); |
| midr |= (impv << 24); |
| |
| continue; |
| } |
| |
| ret_status = regexec(&var_regex, line.c_str(), 2, match.data(), 0); |
| if(ret_status == 0) |
| { |
| std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); |
| int varv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); |
| midr |= (varv << 20); |
| |
| continue; |
| } |
| |
| ret_status = regexec(&part_regex, line.c_str(), 2, match.data(), 0); |
| if(ret_status == 0) |
| { |
| std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); |
| int partv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16); |
| midr |= (partv << 4); |
| |
| continue; |
| } |
| |
| ret_status = regexec(&rev_regex, line.c_str(), 2, match.data(), 0); |
| if(ret_status == 0) |
| { |
| std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)); |
| int regv = support::cpp11::stoi(subexp, nullptr); |
| midr |= (regv); |
| midr |= (0xf << 16); |
| |
| continue; |
| } |
| } |
| |
| if(curcpu >= 0 && curcpu < max_num_cpus) |
| { |
| cpus.emplace_back(midr); |
| } |
| else |
| { |
| ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!"); |
| } |
| } |
| |
| // Free allocated memory |
| regfree(&proc_regex); |
| regfree(&imp_regex); |
| regfree(&var_regex); |
| regfree(&part_regex); |
| regfree(&rev_regex); |
| |
| return cpus; |
| } |
| |
| /** Get the maximim number of CPUs in the system by parsing /sys/devices/system/cpu/present |
| * |
| * @return int Maximum number of CPUs |
| */ |
| int get_max_cpus() |
| { |
| int max_cpus = 1; |
| std::ifstream CPUspresent; |
| CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in); |
| bool success = false; |
| |
| if(CPUspresent.is_open()) |
| { |
| std::string line; |
| |
| if(bool(getline(CPUspresent, line))) |
| { |
| /* The content of this file is a list of ranges or single values, e.g. |
| * 0-5, or 1-3,5,7 or similar. As we are interested in the |
| * max valid ID, we just need to find the last valid |
| * delimiter ('-' or ',') and parse the integer immediately after that. |
| */ |
| auto startfrom = line.begin(); |
| |
| for(auto i = line.begin(); i < line.end(); ++i) |
| { |
| if(*i == '-' || *i == ',') |
| { |
| startfrom = i + 1; |
| } |
| } |
| |
| line.erase(line.begin(), startfrom); |
| |
| max_cpus = support::cpp11::stoi(line, nullptr) + 1; |
| success = true; |
| } |
| } |
| |
| // Return std::thread::hardware_concurrency() as a fallback. |
| if(!success) |
| { |
| max_cpus = std::thread::hardware_concurrency(); |
| } |
| return max_cpus; |
| } |
| #elif defined(__aarch64__) && defined(__APPLE__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */ |
| /** Query features through sysctlbyname |
| * |
| * @return int value queried |
| */ |
| int get_hw_capability(const std::string &cap) |
| { |
| int64_t result(0); |
| size_t size = sizeof(result); |
| sysctlbyname(cap.c_str(), &result, &size, NULL, 0); |
| return result; |
| } |
| #endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ |
| |
| #if defined(BARE_METAL) && defined(__aarch64__) |
| uint64_t get_sve_feature_reg() |
| { |
| uint64_t svefr0 = 0; |
| __asm __volatile( |
| ".inst 0xd5380483 // mrs x3, ID_AA64ZFR0_EL1\n" |
| "MOV %0, X3" |
| : "=r"(svefr0) |
| : |
| : "x3"); |
| return svefr0; |
| } |
| #endif /* defined(BARE_METAL) && defined(__aarch64__) */ |
| } // namespace |
| |
| CpuInfo::CpuInfo(CpuIsaInfo isa, std::vector<CpuModel> cpus) |
| : _isa(std::move(isa)), _cpus(std::move(cpus)) |
| { |
| } |
| |
| CpuInfo CpuInfo::build() |
| { |
| #if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) |
| const uint32_t hwcaps = getauxval(AT_HWCAP); |
| const uint32_t hwcaps2 = getauxval(AT_HWCAP2); |
| const uint32_t max_cpus = get_max_cpus(); |
| |
| // Populate midr values |
| std::vector<uint32_t> cpus_midr; |
| if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID) |
| { |
| cpus_midr = midr_from_cpuid(max_cpus); |
| } |
| if(cpus_midr.empty()) |
| { |
| cpus_midr = midr_from_proc_cpuinfo(max_cpus); |
| } |
| if(cpus_midr.empty()) |
| { |
| cpus_midr.resize(max_cpus, 0); |
| } |
| |
| // Populate isa (Assume homogeneous ISA specification) |
| CpuIsaInfo isa = init_cpu_isa_from_hwcaps(hwcaps, hwcaps2, cpus_midr.back()); |
| |
| // Convert midr to models |
| std::vector<CpuModel> cpus_model; |
| std::transform(std::begin(cpus_midr), std::end(cpus_midr), std::back_inserter(cpus_model), |
| [](uint32_t midr) -> CpuModel { return midr_to_model(midr); }); |
| |
| CpuInfo info(isa, cpus_model); |
| return info; |
| |
| #elif(BARE_METAL) && defined(__aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ |
| |
| // Assume single CPU in bare metal mode. Just read the ID register and feature bits directly. |
| uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, pfr1 = 0, svefr0 = 0, midr = 0; |
| ARM_COMPUTE_GET_FEATURE_REG(isar0, ID_AA64ISAR0_EL1); |
| ARM_COMPUTE_GET_FEATURE_REG(isar1, ID_AA64ISAR1_EL1); |
| ARM_COMPUTE_GET_FEATURE_REG(pfr0, ID_AA64PFR0_EL1); |
| ARM_COMPUTE_GET_FEATURE_REG(pfr1, ID_AA64PFR1_EL1); |
| ARM_COMPUTE_GET_FEATURE_REG(midr, MIDR_EL1); |
| if((pfr0 >> 32) & 0xf) |
| { |
| svefr0 = get_sve_feature_reg(); |
| } |
| |
| CpuIsaInfo isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, pfr1, svefr0, midr); |
| std::vector<CpuModel> cpus_model(1, midr_to_model(midr)); |
| CpuInfo info(isa, cpus_model); |
| return info; |
| #elif defined(__aarch64__) && defined(__APPLE__) /* #elif(BARE_METAL) && defined(__aarch64__) */ |
| int ncpus = get_hw_capability("hw.perflevel0.logicalcpu"); |
| CpuIsaInfo isainfo; |
| std::vector<CpuModel> cpus_model(ncpus); |
| isainfo.neon = get_hw_capability("hw.optional.neon"); |
| isainfo.fp16 = get_hw_capability("hw.optional.neon_fp16"); |
| isainfo.dot = get_hw_capability("hw.optional.arm.FEAT_DotProd"); |
| CpuInfo info(isainfo, cpus_model); |
| return info; |
| #else /* #elif defined(__aarch64__) && defined(__APPLE__) */ |
| CpuInfo info(CpuIsaInfo(), { CpuModel::GENERIC }); |
| return info; |
| #endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */ |
| } |
| |
| CpuModel CpuInfo::cpu_model(uint32_t cpuid) const |
| { |
| if(cpuid < _cpus.size()) |
| { |
| return _cpus[cpuid]; |
| } |
| return CpuModel::GENERIC; |
| } |
| |
| CpuModel CpuInfo::cpu_model() const |
| { |
| #if defined(_WIN64) || defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) |
| return cpu_model(0); |
| #else /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */ |
| return cpu_model(sched_getcpu()); |
| #endif /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */ |
| } |
| |
| uint32_t CpuInfo::num_cpus() const |
| { |
| return _cpus.size(); |
| } |
| |
| uint32_t num_threads_hint() |
| { |
| unsigned int num_threads_hint = 1; |
| |
| #if !defined(BARE_METAL) && !defined(_WIN64) && !defined(ARM_COMPUTE_DISABLE_THREADS_HINT) |
| std::vector<std::string> cpus; |
| cpus.reserve(64); |
| |
| // CPU part regex |
| regex_t cpu_part_rgx; |
| memset(&cpu_part_rgx, 0, sizeof(regex_t)); |
| int ret_status = regcomp(&cpu_part_rgx, R"(.*CPU part.+/?\:[[:space:]]+([[:alnum:]]+).*)", REG_EXTENDED); |
| ARM_COMPUTE_UNUSED(ret_status); |
| ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed."); |
| |
| // Read cpuinfo and get occurrence of each core |
| std::ifstream cpuinfo_file("/proc/cpuinfo", std::ios::in); |
| if(cpuinfo_file.is_open()) |
| { |
| std::string line; |
| while(bool(getline(cpuinfo_file, line))) |
| { |
| std::array<regmatch_t, 2> match; |
| if(regexec(&cpu_part_rgx, line.c_str(), 2, match.data(), 0) == 0) |
| { |
| cpus.emplace_back(line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so))); |
| } |
| } |
| } |
| regfree(&cpu_part_rgx); |
| |
| // Get min number of threads |
| std::sort(std::begin(cpus), std::end(cpus)); |
| auto least_frequent_cpu_occurences = [](const std::vector<std::string> &cpus) -> uint32_t |
| { |
| std::unordered_map<std::string, uint32_t> cpus_freq; |
| for(const auto &cpu : cpus) |
| { |
| cpus_freq[cpu]++; |
| } |
| |
| uint32_t vmin = cpus.size() + 1; |
| for(const auto &cpu_freq : cpus_freq) |
| { |
| vmin = std::min(vmin, cpu_freq.second); |
| } |
| return vmin; |
| }; |
| |
| // Set thread hint |
| num_threads_hint = cpus.empty() ? std::thread::hardware_concurrency() : least_frequent_cpu_occurences(cpus); |
| #endif /* !defined(BARE_METAL) && !defined(_WIN64) && !defined(ARM_COMPUTE_DISABLE_THREADS_HINT) */ |
| |
| return num_threads_hint; |
| } |
| } // namespace cpuinfo |
| } // namespace arm_compute |