diff options
Diffstat (limited to 'tools/power/x86/turbostat')
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 27 | ||||
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 1205 |
2 files changed, 623 insertions, 609 deletions
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 3340def58d01..1551fcdbfd8a 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -101,7 +101,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. .PP -\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "pct_idle". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "pct_idle". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle". +\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a comma-separated-list of CATEGORIES of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "cache", "llc", "other". "idle" (enabled by default), includes "hwidle" and "pct_idle". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "pct_idle". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle". .PP \fB--Dump\fP displays the raw counter values. .PP @@ -159,6 +159,10 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs. .PP +\fBLLCkRPS\fP Last Level Cache Thousands of References Per Second. For CPUs with an L3 LLC, this is the number of references that CPU made to the L3 (and the number of misses that CPU made to it's L2). For CPUs with an L2 LLC, this is the number of references to the L2 (and the number of misses to the CPU's L1). The system summary row shows the sum for all CPUs. In both cases, the value displayed is the actual value divided by 1000 in the interest of usually fitting into 8 columns. +.PP +\fBLLC%hit\fP Last Level Cache Hit Rate %. Hit Rate Percent = 100.0 * (References - Misses)/References. The system summary row shows the weighted average for all CPUs (100.0 * (Sum_References - Sum_Misses)/Sum_References). +.PP \fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default. .PP \fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default. @@ -410,25 +414,24 @@ CPU pCPU%c1 CPU%c1 .fi .SH ADD PERF COUNTER EXAMPLE #2 (using virtual cpu device) -Here we run on hybrid, Raptor Lake platform. -We limit turbostat to show output for just cpu0 (pcore) and cpu12 (ecore). +Here we run on hybrid, Meteor Lake platform. +We limit turbostat to show output for just cpu0 (pcore) and cpu4 (ecore). We add a counter showing number of L3 cache misses, using virtual "cpu" device, labeling it with the column header, "VCMISS". We add a counter showing number of L3 cache misses, using virtual "cpu_core" device, -labeling it with the column header, "PCMISS". This will fail on ecore cpu12. +labeling it with the column header, "PCMISS". This will fail on ecore cpu4. We add a counter showing number of L3 cache misses, using virtual "cpu_atom" device, labeling it with the column header, "ECMISS". This will fail on pcore cpu0. We display it only once, after the conclusion of 0.1 second sleep. .nf -sudo ./turbostat --quiet --cpu 0,12 --show CPU --add perf/cpu/cache-misses,cpu,delta,raw,VCMISS --add perf/cpu_core/cache-misses,cpu,delta,raw,PCMISS --add perf/cpu_atom/cache-misses,cpu,delta,raw,ECMISS sleep .1 +sudo ./turbostat --quiet --cpu 0,4 --show CPU --add perf/cpu/cache-misses,cpu,delta,VCMISS --add perf/cpu_core/cache-misses,cpu,delta,PCMISS --add perf/cpu_atom/cache-misses,cpu,delta,ECMISS sleep 5 turbostat: added_perf_counters_init_: perf/cpu_atom/cache-misses: failed to open counter on cpu0 -turbostat: added_perf_counters_init_: perf/cpu_core/cache-misses: failed to open counter on cpu12 -0.104630 sec -CPU ECMISS PCMISS VCMISS -- 0x0000000000000000 0x0000000000000000 0x0000000000000000 -0 0x0000000000000000 0x0000000000007951 0x0000000000007796 -12 0x000000000001137a 0x0000000000000000 0x0000000000011392 - +turbostat: added_perf_counters_init_: perf/cpu_core/cache-misses: failed to open counter on cpu4 +5.001207 sec +CPU ECMISS PCMISS VCMISS +- 41586506 46291219 87877749 +4 83173012 0 83173040 +0 0 92582439 92582458 .fi .SH ADD PMT COUNTER EXAMPLE diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index f2512d78bcbd..5ad45c2ac5bd 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -142,6 +142,7 @@ struct msr_counter { #define FLAGS_SHOW (1 << 1) #define SYSFS_PERCPU (1 << 1) }; +static int use_android_msr_path; struct msr_counter bic[] = { { 0x0, "usec", NULL, 0, 0, 0, NULL, 0 }, @@ -209,6 +210,8 @@ struct msr_counter bic[] = { { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "LLCkRPS", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "LLC%hit", NULL, 0, 0, 0, NULL, 0 }, }; /* n.b. bic_names must match the order in bic[], above */ @@ -278,6 +281,8 @@ enum bic_names { BIC_NMI, BIC_CPU_c1e, BIC_pct_idle, + BIC_LLC_RPS, + BIC_LLC_HIT, MAX_BIC }; @@ -305,6 +310,7 @@ static cpu_set_t bic_group_frequency; static cpu_set_t bic_group_hw_idle; static cpu_set_t bic_group_sw_idle; static cpu_set_t bic_group_idle; +static cpu_set_t bic_group_cache; static cpu_set_t bic_group_other; static cpu_set_t bic_group_disabled_by_default; static cpu_set_t bic_enabled; @@ -413,9 +419,14 @@ static void bic_groups_init(void) SET_BIC(BIC_pct_idle, &bic_group_sw_idle); BIC_INIT(&bic_group_idle); + CPU_OR(&bic_group_idle, &bic_group_idle, &bic_group_hw_idle); SET_BIC(BIC_pct_idle, &bic_group_idle); + BIC_INIT(&bic_group_cache); + SET_BIC(BIC_LLC_RPS, &bic_group_cache); + SET_BIC(BIC_LLC_HIT, &bic_group_cache); + BIC_INIT(&bic_group_other); SET_BIC(BIC_IRQ, &bic_group_other); SET_BIC(BIC_NMI, &bic_group_other); @@ -466,12 +477,11 @@ static void bic_groups_init(void) #define PCL_10 14 /* PC10 */ #define PCLUNL 15 /* Unlimited */ -struct amperf_group_fd; - char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; int *fd_instr_count_percpu; +int *fd_llc_percpu; struct timeval interval_tv = { 5, 0 }; struct timespec interval_ts = { 5, 0 }; @@ -482,11 +492,12 @@ unsigned int quiet; unsigned int shown; unsigned int sums_need_wide_columns; unsigned int rapl_joules; +unsigned int valid_rapl_msrs; unsigned int summary_only; unsigned int list_header_only; unsigned int dump_only; unsigned int force_load; -unsigned int has_aperf; +unsigned int cpuid_has_aperf_mperf; unsigned int has_aperf_access; unsigned int has_epb; unsigned int has_turbo; @@ -552,8 +563,7 @@ static struct gfx_sysfs_info gfx_info[GFX_MAX]; int get_msr(int cpu, off_t offset, unsigned long long *msr); int add_counter(unsigned int msr_num, char *path, char *name, - unsigned int width, enum counter_scope scope, - enum counter_type type, enum counter_format format, int flags, int package_num); + unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format, int flags, int package_num); /* Model specific support Start */ @@ -578,7 +588,7 @@ struct platform_features { bool has_cst_prewake_bit; /* Cstate prewake bit in MSR_IA32_POWER_CTL */ int trl_msrs; /* MSR_TURBO_RATIO_LIMIT/LIMIT1/LIMIT2/SECONDARY, Atom TRL MSRs */ int plr_msrs; /* MSR_CORE/GFX/RING_PERF_LIMIT_REASONS */ - int rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ + int plat_rapl_msrs; /* RAPL PKG/DRAM/CORE/GFX MSRs, AMD RAPL MSRs */ bool has_per_core_rapl; /* Indicates cores energy collection is per-core, not per-package. AMD specific for now */ bool has_rapl_divisor; /* Divisor for Energy unit raw value from MSR_RAPL_POWER_UNIT */ bool has_fixed_rapl_unit; /* Fixed Energy Unit used for DRAM RAPL Domain */ @@ -733,7 +743,7 @@ static const struct platform_features snb_features = { .cst_limit = CST_LIMIT_SNB, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features snx_features = { @@ -745,7 +755,7 @@ static const struct platform_features snx_features = { .cst_limit = CST_LIMIT_SNB, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, }; static const struct platform_features ivb_features = { @@ -758,7 +768,7 @@ static const struct platform_features ivb_features = { .cst_limit = CST_LIMIT_SNB, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features ivx_features = { @@ -770,7 +780,7 @@ static const struct platform_features ivx_features = { .cst_limit = CST_LIMIT_SNB, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE | TRL_LIMIT1, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM_ALL, }; static const struct platform_features hsw_features = { @@ -784,7 +794,7 @@ static const struct platform_features hsw_features = { .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features hsx_features = { @@ -798,7 +808,7 @@ static const struct platform_features hsx_features = { .has_irtl_msrs = 1, .trl_msrs = TRL_BASE | TRL_LIMIT1 | TRL_LIMIT2, .plr_msrs = PLR_CORE | PLR_RING, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, .has_fixed_rapl_unit = 1, }; @@ -813,7 +823,7 @@ static const struct platform_features hswl_features = { .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features hswg_features = { @@ -827,7 +837,7 @@ static const struct platform_features hswg_features = { .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, .plr_msrs = PLR_CORE | PLR_GFX | PLR_RING, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features bdw_features = { @@ -840,7 +850,7 @@ static const struct platform_features bdw_features = { .cst_limit = CST_LIMIT_HSW, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features bdwg_features = { @@ -853,7 +863,7 @@ static const struct platform_features bdwg_features = { .cst_limit = CST_LIMIT_HSW, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE_ALL | RAPL_GFX | RAPL_PKG_POWER_INFO, }; static const struct platform_features bdx_features = { @@ -867,7 +877,7 @@ static const struct platform_features bdx_features = { .has_irtl_msrs = 1, .has_cst_auto_convension = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, .has_fixed_rapl_unit = 1, }; @@ -884,7 +894,7 @@ static const struct platform_features skl_features = { .has_ext_cst_msrs = 1, .trl_msrs = TRL_BASE, .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, .enable_tsc_tweak = 1, }; @@ -901,7 +911,7 @@ static const struct platform_features cnl_features = { .has_ext_cst_msrs = 1, .trl_msrs = TRL_BASE, .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, .enable_tsc_tweak = 1, }; @@ -919,7 +929,7 @@ static const struct platform_features adl_features = { .has_ext_cst_msrs = cnl_features.has_ext_cst_msrs, .trl_msrs = cnl_features.trl_msrs, .tcc_offset_bits = cnl_features.tcc_offset_bits, - .rapl_msrs = cnl_features.rapl_msrs, + .plat_rapl_msrs = cnl_features.plat_rapl_msrs, .enable_tsc_tweak = cnl_features.enable_tsc_tweak, }; @@ -937,7 +947,7 @@ static const struct platform_features lnl_features = { .has_ext_cst_msrs = adl_features.has_ext_cst_msrs, .trl_msrs = adl_features.trl_msrs, .tcc_offset_bits = adl_features.tcc_offset_bits, - .rapl_msrs = adl_features.rapl_msrs, + .plat_rapl_msrs = adl_features.plat_rapl_msrs, .enable_tsc_tweak = adl_features.enable_tsc_tweak, }; @@ -952,7 +962,7 @@ static const struct platform_features skx_features = { .has_irtl_msrs = 1, .has_cst_auto_convension = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, .has_fixed_rapl_unit = 1, }; @@ -968,7 +978,7 @@ static const struct platform_features icx_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, .has_fixed_rapl_unit = 1, }; @@ -985,7 +995,7 @@ static const struct platform_features spr_features = { .has_cst_prewake_bit = 1, .has_fixed_rapl_psys_unit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features dmr_features = { @@ -1000,7 +1010,7 @@ static const struct platform_features dmr_features = { .has_fixed_rapl_psys_unit = spr_features.has_fixed_rapl_psys_unit, .trl_msrs = spr_features.trl_msrs, .has_msr_module_c6_res_ms = 1, /* DMR has Dual-Core-Module and MC6 MSR */ - .rapl_msrs = 0, /* DMR does not have RAPL MSRs */ + .plat_rapl_msrs = 0, /* DMR does not have RAPL MSRs */ .plr_msrs = 0, /* DMR does not have PLR MSRs */ .has_irtl_msrs = 0, /* DMR does not have IRTL MSRs */ .has_config_tdp = 0, /* DMR does not have CTDP MSRs */ @@ -1019,7 +1029,7 @@ static const struct platform_features srf_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features grr_features = { @@ -1035,7 +1045,7 @@ static const struct platform_features grr_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features slv_features = { @@ -1048,7 +1058,7 @@ static const struct platform_features slv_features = { .has_msr_c6_demotion_policy_config = 1, .has_msr_atom_pkg_c6_residency = 1, .trl_msrs = TRL_ATOM, - .rapl_msrs = RAPL_PKG | RAPL_CORE, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE, .has_rapl_divisor = 1, .rapl_quirk_tdp = 30, }; @@ -1061,7 +1071,7 @@ static const struct platform_features slvd_features = { .cst_limit = CST_LIMIT_SLV, .has_msr_atom_pkg_c6_residency = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_CORE, + .plat_rapl_msrs = RAPL_PKG | RAPL_CORE, .rapl_quirk_tdp = 30, }; @@ -1082,7 +1092,7 @@ static const struct platform_features gmt_features = { .cst_limit = CST_LIMIT_GMT, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, }; static const struct platform_features gmtd_features = { @@ -1095,7 +1105,7 @@ static const struct platform_features gmtd_features = { .has_irtl_msrs = 1, .has_msr_core_c1_res = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_CORE_ENERGY_STATUS, }; static const struct platform_features gmtp_features = { @@ -1107,7 +1117,7 @@ static const struct platform_features gmtp_features = { .cst_limit = CST_LIMIT_GMT, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, + .plat_rapl_msrs = RAPL_PKG | RAPL_PKG_POWER_INFO, }; static const struct platform_features tmt_features = { @@ -1118,7 +1128,7 @@ static const struct platform_features tmt_features = { .cst_limit = CST_LIMIT_GMT, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, .enable_tsc_tweak = 1, }; @@ -1130,7 +1140,7 @@ static const struct platform_features tmtd_features = { .cst_limit = CST_LIMIT_GMT, .has_irtl_msrs = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL, }; static const struct platform_features knl_features = { @@ -1142,7 +1152,7 @@ static const struct platform_features knl_features = { .cst_limit = CST_LIMIT_KNL, .has_msr_knl_core_c6_residency = 1, .trl_msrs = TRL_KNL, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .plat_rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, .has_fixed_rapl_unit = 1, .need_perf_multiplier = 1, }; @@ -1151,7 +1161,7 @@ static const struct platform_features default_features = { }; static const struct platform_features amd_features_with_rapl = { - .rapl_msrs = RAPL_AMD_F17H, + .plat_rapl_msrs = RAPL_AMD_F17H, .has_per_core_rapl = 1, .rapl_quirk_tdp = 280, /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ }; @@ -1210,6 +1220,9 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_ARROWLAKE, &adl_features }, { INTEL_LUNARLAKE_M, &lnl_features }, { INTEL_PANTHERLAKE_L, &lnl_features }, + { INTEL_NOVALAKE, &lnl_features }, + { INTEL_NOVALAKE_L, &lnl_features }, + { INTEL_WILDCATLAKE_L, &lnl_features }, { INTEL_ATOM_SILVERMONT, &slv_features }, { INTEL_ATOM_SILVERMONT_D, &slvd_features }, { INTEL_ATOM_AIRMONT, &amt_features }, @@ -1294,8 +1307,7 @@ char *progname; #define CPU_SUBSET_MAXCPUS 8192 /* need to use before probe... */ cpu_set_t *cpu_present_set, *cpu_possible_set, *cpu_effective_set, *cpu_allowed_set, *cpu_affinity_set, *cpu_subset; -size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, - cpu_subset_size; +size_t cpu_present_setsize, cpu_possible_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affinity_setsize, cpu_subset_size; #define MAX_ADDED_THREAD_COUNTERS 24 #define MAX_ADDED_CORE_COUNTERS 8 #define MAX_ADDED_PACKAGE_COUNTERS 16 @@ -1991,6 +2003,10 @@ void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size) pmt_counter_resize_(pcounter, new_size); } +struct llc_stats { + unsigned long long references; + unsigned long long misses; +}; struct thread_data { struct timeval tv_begin; struct timeval tv_end; @@ -2003,6 +2019,7 @@ struct thread_data { unsigned long long irq_count; unsigned long long nmi_count; unsigned int smi_count; + struct llc_stats llc; unsigned int cpu_id; unsigned int apic_id; unsigned int x2apic_id; @@ -2118,7 +2135,7 @@ off_t idx_to_offset(int idx) switch (idx) { case IDX_PKG_ENERGY: - if (platform->rapl_msrs & RAPL_AMD_F17H) + if (valid_rapl_msrs & RAPL_AMD_F17H) offset = MSR_PKG_ENERGY_STAT; else offset = MSR_PKG_ENERGY_STATUS; @@ -2184,19 +2201,19 @@ int idx_valid(int idx) { switch (idx) { case IDX_PKG_ENERGY: - return platform->rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); + return valid_rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H); case IDX_DRAM_ENERGY: - return platform->rapl_msrs & RAPL_DRAM; + return valid_rapl_msrs & RAPL_DRAM; case IDX_PP0_ENERGY: - return platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS; + return valid_rapl_msrs & RAPL_CORE_ENERGY_STATUS; case IDX_PP1_ENERGY: - return platform->rapl_msrs & RAPL_GFX; + return valid_rapl_msrs & RAPL_GFX; case IDX_PKG_PERF: - return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; + return valid_rapl_msrs & RAPL_PKG_PERF_STATUS; case IDX_DRAM_PERF: - return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; + return valid_rapl_msrs & RAPL_DRAM_PERF_STATUS; case IDX_PSYS_ENERGY: - return platform->rapl_msrs & RAPL_PSYS; + return valid_rapl_msrs & RAPL_PSYS; default: return 0; } @@ -2362,23 +2379,19 @@ int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pk return retval; } -int is_cpu_first_thread_in_core(PER_THREAD_PARAMS) +int is_cpu_first_thread_in_core(struct thread_data *t, struct core_data *c) { - UNUSED(p); - return ((int)t->cpu_id == c->base_cpu || c->base_cpu < 0); } -int is_cpu_first_core_in_package(PER_THREAD_PARAMS) +int is_cpu_first_core_in_package(struct thread_data *t, struct pkg_data *p) { - UNUSED(c); - return ((int)t->cpu_id == p->base_cpu || p->base_cpu < 0); } -int is_cpu_first_thread_in_package(PER_THREAD_PARAMS) +int is_cpu_first_thread_in_package(struct thread_data *t, struct core_data *c, struct pkg_data *p) { - return is_cpu_first_thread_in_core(t, c, p) && is_cpu_first_core_in_package(t, c, p); + return is_cpu_first_thread_in_core(t, c) && is_cpu_first_core_in_package(t, p); } int cpu_migrate(int cpu) @@ -2400,20 +2413,11 @@ int get_msr_fd(int cpu) if (fd) return fd; -#if defined(ANDROID) - sprintf(pathname, "/dev/msr%d", cpu); -#else - sprintf(pathname, "/dev/cpu/%d/msr", cpu); -#endif + sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", cpu); fd = open(pathname, O_RDONLY); if (fd < 0) -#if defined(ANDROID) - err(-1, "%s open failed, try chown or chmod +r /dev/msr*, " - "or run with --no-msr, or run as root", pathname); -#else - err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, " - "or run with --no-msr, or run as root", pathname); -#endif + err(-1, "%s open failed, try chown or chmod +r %s, " + "or run with --no-msr, or run as root", pathname, use_android_msr_path ? "/dev/msr*" : "/dev/cpu/*/msr"); fd_percpu[cpu] = fd; return fd; @@ -2432,6 +2436,13 @@ static void bic_disable_msr_access(void) free_sys_msr_counters(); } +static void bic_disable_perf_access(void) +{ + CLR_BIC(BIC_IPC, &bic_enabled); + CLR_BIC(BIC_LLC_RPS, &bic_enabled); + CLR_BIC(BIC_LLC_HIT, &bic_enabled); +} + static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { assert(!no_perf); @@ -2512,7 +2523,7 @@ int add_rapl_msr_counter(int cpu, const struct rapl_counter_arch_info *cai) { int ret; - if (!(platform->rapl_msrs & cai->feature_mask)) + if (!(valid_rapl_msrs & cai->feature_mask)) return -1; ret = add_msr_counter(cpu, cai->msr); @@ -2656,6 +2667,12 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode) } else if (!strcmp(name_list, "idle")) { CPU_OR(ret_set, ret_set, &bic_group_idle); break; + } else if (!strcmp(name_list, "cache")) { + CPU_OR(ret_set, ret_set, &bic_group_cache); + break; + } else if (!strcmp(name_list, "llc")) { + CPU_OR(ret_set, ret_set, &bic_group_cache); + break; } else if (!strcmp(name_list, "swidle")) { CPU_OR(ret_set, ret_set, &bic_group_sw_idle); break; @@ -2677,8 +2694,7 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode) if (mode == SHOW_LIST) { deferred_add_names[deferred_add_index++] = name_list; if (deferred_add_index >= MAX_DEFERRED) { - fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", - MAX_DEFERRED, name_list); + fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", MAX_DEFERRED, name_list); help(); exit(1); } @@ -2687,8 +2703,7 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode) if (debug) fprintf(stderr, "deferred \"%s\"\n", name_list); if (deferred_skip_index >= MAX_DEFERRED) { - fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", - MAX_DEFERRED, name_list); + fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", MAX_DEFERRED, name_list); help(); exit(1); } @@ -2702,6 +2717,47 @@ void bic_lookup(cpu_set_t *ret_set, char *name_list, enum show_hide_mode mode) } } +/* + * print_name() + * Print column header name for raw 64-bit counter in 16 columns (at least 8-char plus a tab) + * Otherwise, allow the name + tab to fit within 8-coumn tab-stop. + * In both cases, left justififed, just like other turbostat columns, + * to allow the column values to consume the tab. + * + * Yes, 32-bit counters can overflow 8-columns, and + * 64-bit counters can overflow 16-columns, but that is uncommon. + */ +static inline int print_name(int width, int *printed, char *delim, char *name, enum counter_type type, enum counter_format format) +{ + UNUSED(type); + + if (format == FORMAT_RAW && width >= 64) + return (sprintf(outp, "%s%-8s", (*printed++ ? delim : ""), name)); + else + return (sprintf(outp, "%s%s", (*printed++ ? delim : ""), name)); +} + +static inline int print_hex_value(int width, int *printed, char *delim, unsigned long long value) +{ + if (width <= 32) + return (sprintf(outp, "%s%08x", (*printed++ ? delim : ""), (unsigned int)value)); + else + return (sprintf(outp, "%s%016llx", (*printed++ ? delim : ""), value)); +} + +static inline int print_decimal_value(int width, int *printed, char *delim, unsigned long long value) +{ + if (width <= 32) + return (sprintf(outp, "%s%d", (*printed++ ? delim : ""), (unsigned int)value)); + else + return (sprintf(outp, "%s%-8lld", (*printed++ ? delim : ""), value)); +} + +static inline int print_float_value(int *printed, char *delim, double value) +{ + return (sprintf(outp, "%s%0.2f", (*printed++ ? delim : ""), value)); +} + void print_header(char *delim) { struct msr_counter *mp; @@ -2757,50 +2813,28 @@ void print_header(char *delim) if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); - for (mp = sys.tp; mp; mp = mp->next) { + if (DO_BIC(BIC_LLC_RPS)) + outp += sprintf(outp, "%sLLCkRPS", (printed++ ? delim : "")); - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { - if (mp->width == 64) - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); - else - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name); - } else { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name); - else - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name); - } - } + if (DO_BIC(BIC_LLC_HIT)) + outp += sprintf(outp, "%sLLC%%hit", (printed++ ? delim : "")); - for (pp = sys.perf_tp; pp; pp = pp->next) { + for (mp = sys.tp; mp; mp = mp->next) + outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format); - if (pp->format == FORMAT_RAW) { - if (pp->width == 64) - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); - } else { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); - } - } + for (pp = sys.perf_tp; pp; pp = pp->next) + outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format); ppmt = sys.pmt_tp; while (ppmt) { switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); - else - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); - + outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; case PMT_TYPE_XTAL_TIME: case PMT_TYPE_TCORE_CLOCK: - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); + outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; } @@ -2825,63 +2859,36 @@ void print_header(char *delim) if (DO_BIC(BIC_CORE_THROT_CNT)) outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); - if (platform->rapl_msrs && !rapl_joules) { + if (valid_rapl_msrs && !rapl_joules) { if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); - } else if (platform->rapl_msrs && rapl_joules) { + } else if (valid_rapl_msrs && rapl_joules) { if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); } - for (mp = sys.cp; mp; mp = mp->next) { - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { - if (mp->width == 64) - outp += sprintf(outp, "%s%18.18s", delim, mp->name); - else - outp += sprintf(outp, "%s%10.10s", delim, mp->name); - } else { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", delim, mp->name); - else - outp += sprintf(outp, "%s%s", delim, mp->name); - } - } + for (mp = sys.cp; mp; mp = mp->next) + outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format); - for (pp = sys.perf_cp; pp; pp = pp->next) { - - if (pp->format == FORMAT_RAW) { - if (pp->width == 64) - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); - } else { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); - } - } + for (pp = sys.perf_cp; pp; pp = pp->next) + outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format); ppmt = sys.pmt_cp; while (ppmt) { switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); - else - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; case PMT_TYPE_XTAL_TIME: case PMT_TYPE_TCORE_CLOCK: - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); + outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; } ppmt = ppmt->next; } - if (DO_BIC(BIC_PkgTmp)) outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); @@ -2963,51 +2970,22 @@ void print_header(char *delim) if (DO_BIC(BIC_UNCORE_MHZ)) outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : "")); - for (mp = sys.pp; mp; mp = mp->next) { - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { - if (mp->width == 64) - outp += sprintf(outp, "%s%18.18s", delim, mp->name); - else if (mp->width == 32) - outp += sprintf(outp, "%s%10.10s", delim, mp->name); - else - outp += sprintf(outp, "%s%7.7s", delim, mp->name); - } else { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", delim, mp->name); - else - outp += sprintf(outp, "%s%7.7s", delim, mp->name); - } - } - - for (pp = sys.perf_pp; pp; pp = pp->next) { + for (mp = sys.pp; mp; mp = mp->next) + outp += print_name(mp->width, &printed, delim, mp->name, mp->type, mp->format); - if (pp->format == FORMAT_RAW) { - if (pp->width == 64) - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); - } else { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); - else - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); - } - } + for (pp = sys.perf_pp; pp; pp = pp->next) + outp += print_name(pp->width, &printed, delim, pp->name, pp->type, pp->format); ppmt = sys.pmt_pp; while (ppmt) { switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); - else - outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); - + outp += print_name(pmt_counter_get_width(ppmt), &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; case PMT_TYPE_XTAL_TIME: case PMT_TYPE_TCORE_CLOCK: - outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); + outp += print_name(32, &printed, delim, ppmt->name, COUNTER_ITEMS, ppmt->format); break; } @@ -3022,6 +3000,25 @@ void print_header(char *delim) outp += sprintf(outp, "\n"); } +/* + * pct() + * + * If absolute value is < 1.1, return percentage + * otherwise, return nan + * + * return value is appropriate for printing percentages with %f + * while flagging some obvious erroneous values. + */ +double pct(double d) +{ + + double abs = fabs(d); + + if (abs < 1.10) + return (100.0 * d); + return nan(""); +} + int dump_counters(PER_THREAD_PARAMS) { int i; @@ -3047,14 +3044,16 @@ int dump_counters(PER_THREAD_PARAMS) if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "SMI: %d\n", t->smi_count); + outp += sprintf(outp, "LLC refs: %lld", t->llc.references); + outp += sprintf(outp, "LLC miss: %lld", t->llc.misses); + outp += sprintf(outp, "LLC Hit%%: %.2f", pct((t->llc.references - t->llc.misses) / t->llc.references)); + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - outp += - sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, - t->counter[i], mp->sp->path); + outp += sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, t->counter[i], mp->sp->path); } } - if (c && is_cpu_first_thread_in_core(t, c, p)) { + if (c && is_cpu_first_thread_in_core(t, c)) { outp += sprintf(outp, "core: %d\n", c->core_id); outp += sprintf(outp, "c3: %016llX\n", c->c3); outp += sprintf(outp, "c6: %016llX\n", c->c6); @@ -3069,14 +3068,12 @@ int dump_counters(PER_THREAD_PARAMS) outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - outp += - sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, - c->counter[i], mp->sp->path); + outp += sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, c->counter[i], mp->sp->path); } outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); } - if (p && is_cpu_first_core_in_package(t, c, p)) { + if (p && is_cpu_first_core_in_package(t, p)) { outp += sprintf(outp, "package: %d\n", p->package_id); outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); @@ -3106,9 +3103,7 @@ int dump_counters(PER_THREAD_PARAMS) outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - outp += - sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, - p->counter[i], mp->sp->path); + outp += sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num, p->counter[i], mp->sp->path); } } @@ -3134,6 +3129,26 @@ double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desir return scaled; } +void get_perf_llc_stats(int cpu, struct llc_stats *llc) +{ + struct read_format { + unsigned long long num_read; + struct llc_stats llc; + } r; + const ssize_t expected_read_size = sizeof(r); + ssize_t actual_read_size; + + actual_read_size = read(fd_llc_percpu[cpu], &r, expected_read_size); + + if (actual_read_size == -1) + err(-1, "%s(cpu%d,) %d,,%ld\n", __func__, cpu, fd_llc_percpu[cpu], expected_read_size); + + llc->references = r.llc.references; + llc->misses = r.llc.misses; + if (actual_read_size != expected_read_size) + warn("%s: failed to read perf_data (req %zu act %zu)", __func__, expected_read_size, actual_read_size); +} + /* * column formatting convention & formats */ @@ -3143,7 +3158,8 @@ int format_counters(PER_THREAD_PARAMS) struct platform_counters *pplat_cnt = NULL; double interval_float, tsc; - char *fmt8; + char *fmt8 = "%s%.2f"; + int i; struct msr_counter *mp; struct perf_counter_info *pp; @@ -3157,11 +3173,11 @@ int format_counters(PER_THREAD_PARAMS) } /* if showing only 1st thread in core and this isn't one, bail out */ - if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) + if (show_core_only && !is_cpu_first_thread_in_core(t, c)) return 0; /* if showing only 1st thread in pkg and this isn't one, bail out */ - if (show_pkg_only && !is_cpu_first_core_in_package(t, c, p)) + if (show_pkg_only && !is_cpu_first_core_in_package(t, p)) return 0; /*if not summary line and --cpu is used */ @@ -3223,8 +3239,7 @@ int format_counters(PER_THREAD_PARAMS) } if (DO_BIC(BIC_Node)) { if (t) - outp += sprintf(outp, "%s%d", - (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); else outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } @@ -3246,15 +3261,13 @@ int format_counters(PER_THREAD_PARAMS) outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); if (DO_BIC(BIC_Busy)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(t->mperf / tsc)); if (DO_BIC(BIC_Bzy_MHz)) { if (has_base_hz) - outp += - sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); else - outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), - tsc / units * t->aperf / t->mperf / interval_float); + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), tsc / units * t->aperf / t->mperf / interval_float); } if (DO_BIC(BIC_TSC_MHz)) @@ -3283,96 +3296,81 @@ int format_counters(PER_THREAD_PARAMS) if (DO_BIC(BIC_SMI)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); - /* Added counters */ + /* LLC Stats */ + if (DO_BIC(BIC_LLC_RPS) || DO_BIC(BIC_LLC_HIT)) { + if (DO_BIC(BIC_LLC_RPS)) + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), t->llc.references / interval_float / 1000); + + if (DO_BIC(BIC_LLC_HIT)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), pct((t->llc.references - t->llc.misses) / t->llc.references)); + } + + /* Added Thread Counters */ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { - if (mp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); - } else if (mp->format == FORMAT_DELTA) { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); - } else if (mp->format == FORMAT_PERCENT) { + if (mp->format == FORMAT_RAW) + outp += print_hex_value(mp->width, &printed, delim, t->counter[i]); + else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(mp->width, &printed, delim, t->counter[i]); + else if (mp->format == FORMAT_PERCENT) { if (mp->type == COUNTER_USEC) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - t->counter[i] / interval_float / 10000); + outp += print_float_value(&printed, delim, t->counter[i] / interval_float / 10000); else - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc); + outp += print_float_value(&printed, delim, pct(t->counter[i] / tsc)); } } - /* Added perf counters */ + /* Added perf Thread Counters */ for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) { - if (pp->format == FORMAT_RAW) { - if (pp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)t->perf_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]); - } else if (pp->format == FORMAT_DELTA) { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]); - } else if (pp->format == FORMAT_PERCENT) { + if (pp->format == FORMAT_RAW) + outp += print_hex_value(pp->width, &printed, delim, t->perf_counter[i]); + else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(pp->width, &printed, delim, t->perf_counter[i]); + else if (pp->format == FORMAT_PERCENT) { if (pp->type == COUNTER_USEC) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - t->perf_counter[i] / interval_float / 10000); + outp += print_float_value(&printed, delim, t->perf_counter[i] / interval_float / 10000); else - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc); + outp += print_float_value(&printed, delim, pct(t->perf_counter[i] / tsc)); } } + /* Added PMT Thread Counters */ for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = t->pmt_counter[i]; double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)t->pmt_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]); - + outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, t->pmt_counter[i]); break; case PMT_TYPE_XTAL_TIME: - value_converted = 100.0 * value_raw / crystal_hz / interval_float; + value_converted = pct(value_raw / crystal_hz / interval_float); outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); break; case PMT_TYPE_TCORE_CLOCK: - value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; + value_converted = pct(value_raw / tcore_clock_freq_hz / interval_float); outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); } } /* C1 */ if (DO_BIC(BIC_CPU_c1)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(t->c1 / tsc)); /* print per-core data only for 1st thread in core */ - if (!is_cpu_first_thread_in_core(t, c, p)) + if (!is_cpu_first_thread_in_core(t, c)) goto done; if (DO_BIC(BIC_CPU_c3)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c3 / tsc)); if (DO_BIC(BIC_CPU_c6)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c6 / tsc)); if (DO_BIC(BIC_CPU_c7)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->c7 / tsc)); /* Mod%c6 */ if (DO_BIC(BIC_Mod_c6)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(c->mc6_us / tsc)); if (DO_BIC(BIC_CoreTmp)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); @@ -3381,77 +3379,53 @@ int format_counters(PER_THREAD_PARAMS) if (DO_BIC(BIC_CORE_THROT_CNT)) outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); + /* Added Core Counters */ for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { - if (mp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); - } else if (mp->format == FORMAT_DELTA) { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); - } else if (mp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc); - } + if (mp->format == FORMAT_RAW) + outp += print_hex_value(mp->width, &printed, delim, c->counter[i]); + else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(mp->width, &printed, delim, c->counter[i]); + else if (mp->format == FORMAT_PERCENT) + outp += print_float_value(&printed, delim, pct(c->counter[i] / tsc)); } + /* Added perf Core counters */ for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { - if (pp->format == FORMAT_RAW) { - if (pp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)c->perf_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]); - } else if (pp->format == FORMAT_DELTA) { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]); - } else if (pp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc); - } + if (pp->format == FORMAT_RAW) + outp += print_hex_value(pp->width, &printed, delim, c->perf_counter[i]); + else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(pp->width, &printed, delim, c->perf_counter[i]); + else if (pp->format == FORMAT_PERCENT) + outp += print_float_value(&printed, delim, pct(c->perf_counter[i] / tsc)); } + /* Added PMT Core counters */ for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = c->pmt_counter[i]; double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)c->pmt_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]); - + outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, c->pmt_counter[i]); break; case PMT_TYPE_XTAL_TIME: - value_converted = 100.0 * value_raw / crystal_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + value_converted = pct(value_raw / crystal_hz / interval_float); + outp += print_float_value(&printed, delim, value_converted); break; case PMT_TYPE_TCORE_CLOCK: - value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + value_converted = pct(value_raw / tcore_clock_freq_hz / interval_float); + outp += print_float_value(&printed, delim, value_converted); } } - fmt8 = "%s%.2f"; - if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float)); /* print per-package data only for 1st core in package */ - if (!is_cpu_first_core_in_package(t, c, p)) + if (!is_cpu_first_core_in_package(t, p)) goto done; /* PkgTmp */ @@ -3463,8 +3437,7 @@ int format_counters(PER_THREAD_PARAMS) if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); } else { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - p->gfx_rc6_ms / 10.0 / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), p->gfx_rc6_ms / 10.0 / interval_float); } } @@ -3481,8 +3454,7 @@ int format_counters(PER_THREAD_PARAMS) if (p->sam_mc6_ms == -1) { /* detect GFX counter reset */ outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); } else { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - p->sam_mc6_ms / 10.0 / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), p->sam_mc6_ms / 10.0 / interval_float); } } @@ -3496,150 +3468,112 @@ int format_counters(PER_THREAD_PARAMS) /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (DO_BIC(BIC_Totl_c0)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100 * p->pkg_wtd_core_c0 / tsc); /* can exceed 100% */ if (DO_BIC(BIC_Any_c0)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_any_core_c0 / tsc)); if (DO_BIC(BIC_GFX_c0)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_any_gfxe_c0 / tsc)); if (DO_BIC(BIC_CPUGFX)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pkg_both_core_gfxe_c0 / tsc)); if (DO_BIC(BIC_Pkgpc2)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc2 / tsc)); if (DO_BIC(BIC_Pkgpc3)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc3 / tsc)); if (DO_BIC(BIC_Pkgpc6)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc6 / tsc)); if (DO_BIC(BIC_Pkgpc7)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc7 / tsc)); if (DO_BIC(BIC_Pkgpc8)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc8 / tsc)); if (DO_BIC(BIC_Pkgpc9)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc9 / tsc)); if (DO_BIC(BIC_Pkgpc10)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->pc10 / tsc)); if (DO_BIC(BIC_Diec6)) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->die_c6 / crystal_hz / interval_float)); if (DO_BIC(BIC_CPU_LPI)) { if (p->cpu_lpi >= 0) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - 100.0 * p->cpu_lpi / 1000000.0 / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->cpu_lpi / 1000000.0 / interval_float)); else outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); } if (DO_BIC(BIC_SYS_LPI)) { if (p->sys_lpi >= 0) - outp += - sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - 100.0 * p->sys_lpi / 1000000.0 / interval_float); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), pct(p->sys_lpi / 1000000.0 / interval_float)); else outp += sprintf(outp, "%s(neg)", (printed++ ? delim : "")); } if (DO_BIC(BIC_PkgWatt)) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_GFXWatt)) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_RAMWatt)) - outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_Pkg_J)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_GFX_J)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_RAM_J)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float)); if (DO_BIC(BIC_PKG__)) outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); + sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float)); if (DO_BIC(BIC_RAM__)) outp += - sprintf(outp, fmt8, (printed++ ? delim : ""), - rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); + sprintf(outp, fmt8, (printed++ ? delim : ""), rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float)); /* UncMHz */ if (DO_BIC(BIC_UNCORE_MHZ)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); + /* Added Package Counters */ for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) { - if (mp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); - } else if (mp->format == FORMAT_DELTA) { - if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); - } else if (mp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc); - } else if (mp->type == COUNTER_K2M) + if (mp->format == FORMAT_RAW) + outp += print_hex_value(mp->width, &printed, delim, p->counter[i]); + else if (mp->type == COUNTER_K2M) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000); + else if (mp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(mp->width, &printed, delim, p->counter[i]); + else if (mp->format == FORMAT_PERCENT) + outp += print_float_value(&printed, delim, pct(p->counter[i] / tsc)); } + /* Added perf Package Counters */ for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { - if (pp->format == FORMAT_RAW) { - if (pp->width == 32) - outp += - sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)p->perf_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]); - } else if (pp->format == FORMAT_DELTA) { - if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) - outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]); - else - outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]); - } else if (pp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc); - } else if (pp->type == COUNTER_K2M) { - outp += - sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); - } + if (pp->format == FORMAT_RAW) + outp += print_hex_value(pp->width, &printed, delim, p->perf_counter[i]); + else if (pp->type == COUNTER_K2M) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); + else if (pp->format == FORMAT_DELTA || mp->format == FORMAT_AVERAGE) + outp += print_decimal_value(pp->width, &printed, delim, p->perf_counter[i]); + else if (pp->format == FORMAT_PERCENT) + outp += print_float_value(&printed, delim, pct(p->perf_counter[i] / tsc)); } + /* Added PMT Package Counters */ for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { const unsigned long value_raw = p->pmt_counter[i]; double value_converted; switch (ppmt->type) { case PMT_TYPE_RAW: - if (pmt_counter_get_width(ppmt) <= 32) - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), - (unsigned int)p->pmt_counter[i]); - else - outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]); - + outp += print_hex_value(pmt_counter_get_width(ppmt), &printed, delim, p->pmt_counter[i]); break; case PMT_TYPE_XTAL_TIME: - value_converted = 100.0 * value_raw / crystal_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + value_converted = pct(value_raw / crystal_hz / interval_float); + outp += print_float_value(&printed, delim, value_converted); break; case PMT_TYPE_TCORE_CLOCK: - value_converted = 100.0 * value_raw / tcore_clock_freq_hz / interval_float; - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + value_converted = pct(value_raw / tcore_clock_freq_hz / interval_float); + outp += print_float_value(&printed, delim, value_converted); } } @@ -3754,11 +3688,10 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value; old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value; old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value; - old->rapl_dram_perf_status.raw_value = - new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; + old->rapl_dram_perf_status.raw_value = new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) + if (mp->format == FORMAT_RAW) old->counter[i] = new->counter[i]; else if (mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; @@ -3862,8 +3795,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d /* check for TSC < 1 Mcycles over interval */ if (old->tsc < (1000 * 1000)) errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n" - "You can disable all c-states by booting with \"idle=poll\"\n" - "or just the deep ones with \"processor.max_cstate=1\""); + "You can disable all c-states by booting with \"idle=poll\"\n" "or just the deep ones with \"processor.max_cstate=1\""); old->c1 = new->c1 - old->c1; @@ -3892,8 +3824,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d old->c1 = 0; else { /* normal case, derive c1 */ - old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 - - core_delta->c6 - core_delta->c7; + old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 - core_delta->c6 - core_delta->c7; } } @@ -3915,6 +3846,12 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d if (DO_BIC(BIC_SMI)) old->smi_count = new->smi_count - old->smi_count; + if (DO_BIC(BIC_LLC_RPS)) + old->llc.references = new->llc.references - old->llc.references; + + if (DO_BIC(BIC_LLC_HIT)) + old->llc.misses = new->llc.misses - old->llc.misses; + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW || mp->format == FORMAT_AVERAGE) old->counter[i] = new->counter[i]; @@ -3939,20 +3876,19 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d return 0; } -int delta_cpu(struct thread_data *t, struct core_data *c, - struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) +int delta_cpu(struct thread_data *t, struct core_data *c, struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) { int retval = 0; /* calculate core delta only for 1st thread in core */ - if (is_cpu_first_thread_in_core(t, c, p)) + if (is_cpu_first_thread_in_core(t, c)) delta_core(c, c2); /* always calculate thread delta */ retval = delta_thread(t, t2, c2); /* c2 is core delta */ /* calculate package delta only for 1st core in package */ - if (is_cpu_first_core_in_package(t, c, p)) + if (is_cpu_first_core_in_package(t, p)) retval |= delta_package(p, p2); return retval; @@ -3993,6 +3929,9 @@ void clear_counters(PER_THREAD_PARAMS) t->nmi_count = 0; t->smi_count = 0; + t->llc.references = 0; + t->llc.misses = 0; + c->c3 = 0; c->c6 = 0; c->c7 = 0; @@ -4001,6 +3940,9 @@ void clear_counters(PER_THREAD_PARAMS) rapl_counter_clear(&c->core_energy); c->core_throt_cnt = 0; + t->llc.references = 0; + t->llc.misses = 0; + p->pkg_wtd_core_c0 = 0; p->pkg_any_core_c0 = 0; p->pkg_any_gfxe_c0 = 0; @@ -4098,6 +4040,9 @@ int sum_counters(PER_THREAD_PARAMS) average.threads.nmi_count += t->nmi_count; average.threads.smi_count += t->smi_count; + average.threads.llc.references += t->llc.references; + average.threads.llc.misses += t->llc.misses; + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) continue; @@ -4115,7 +4060,7 @@ int sum_counters(PER_THREAD_PARAMS) } /* sum per-core values only for 1st thread in core */ - if (!is_cpu_first_thread_in_core(t, c, p)) + if (!is_cpu_first_thread_in_core(t, c)) return 0; average.cores.c3 += c->c3; @@ -4145,7 +4090,7 @@ int sum_counters(PER_THREAD_PARAMS) } /* sum per-pkg values only for 1st core in pkg */ - if (!is_cpu_first_core_in_package(t, c, p)) + if (!is_cpu_first_core_in_package(t, p)) return 0; if (DO_BIC(BIC_Totl_c0)) @@ -4411,8 +4356,7 @@ unsigned long long get_legacy_uncore_mhz(int package) */ for (die = 0; die <= topo.max_die_id; ++die) { - sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", - package, die); + sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package, die); if (access(path, R_OK) == 0) return (snapshot_sysfs_counter(path) / 1000); @@ -4523,11 +4467,6 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt) return 0; } -struct amperf_group_fd { - int aperf; /* Also the group descriptor */ - int mperf; -}; - static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr) { int fdmt; @@ -4727,8 +4666,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data)); if (actual_read_size != expected_read_size) - err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, - actual_read_size); + err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size); } for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { @@ -4966,8 +4904,7 @@ int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data)); if (actual_read_size != expected_read_size) - err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, - actual_read_size); + err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, actual_read_size); } for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) { @@ -5121,6 +5058,9 @@ int get_counters(PER_THREAD_PARAMS) get_smi_aperf_mperf(cpu, t); + if (DO_BIC(BIC_LLC_RPS) || DO_BIC(BIC_LLC_HIT)) + get_perf_llc_stats(cpu, &t->llc); + if (DO_BIC(BIC_IPC)) if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) return -4; @@ -5144,7 +5084,7 @@ int get_counters(PER_THREAD_PARAMS) t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id); /* collect core counters only for 1st thread in core */ - if (!is_cpu_first_thread_in_core(t, c, p)) + if (!is_cpu_first_thread_in_core(t, c)) goto done; if (platform->has_per_core_rapl) { @@ -5188,7 +5128,7 @@ int get_counters(PER_THREAD_PARAMS) c->pmt_counter[i] = pmt_read_counter(pp, c->core_id); /* collect package counters only for 1st core in package */ - if (!is_cpu_first_core_in_package(t, c, p)) + if (!is_cpu_first_core_in_package(t, p)) goto done; if (DO_BIC(BIC_Totl_c0)) { @@ -5277,48 +5217,39 @@ char *pkg_cstate_limit_strings[] = { "unknown", "reserved", "pc0", "pc1", "pc2", "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited" }; -int nhm_pkg_cstate_limits[16] = - { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int nhm_pkg_cstate_limits[16] = { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int snb_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int snb_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int hsw_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int hsw_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int slv_pkg_cstate_limits[16] = - { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int slv_pkg_cstate_limits[16] = { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7 }; -int amt_pkg_cstate_limits[16] = - { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int amt_pkg_cstate_limits[16] = { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int phi_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int phi_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int glm_pkg_cstate_limits[16] = - { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int glm_pkg_cstate_limits[16] = { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int skx_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int skx_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; -int icx_pkg_cstate_limits[16] = - { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +int icx_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV }; @@ -5393,8 +5324,7 @@ static void dump_power_ctl(void) return; get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); - fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", - base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); + fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ if (platform->has_cst_prewake_bit) @@ -5487,8 +5417,7 @@ static void dump_turbo_ratio_limits(int trl_msr_offset) ratio = (msr >> shift) & 0xFF; group_size = (core_counts >> shift) & 0xFF; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", - ratio, bclk, ratio * bclk, group_size); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", ratio, bclk, ratio * bclk, group_size); } return; @@ -5586,9 +5515,7 @@ static void dump_knl_turbo_ratio_limits(void) for (i = buckets_no - 1; i >= 0; i--) if (i > 0 ? ratio[i] != ratio[i - 1] : 1) - fprintf(outf, - "%d * %.1f = %.1f MHz max turbo %d active cores\n", - ratio[i], bclk, ratio[i] * bclk, cores[i]); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", ratio[i], bclk, ratio[i] * bclk, cores[i]); } static void dump_cst_cfg(void) @@ -5673,43 +5600,37 @@ void print_irtl(void) if (platform->supported_cstates & PC3) { get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC6) { get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC7) { get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC8) { get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC9) { get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } if (platform->supported_cstates & PC10) { get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); - fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", - (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } } @@ -5743,6 +5664,20 @@ void free_fd_instr_count_percpu(void) fd_instr_count_percpu = NULL; } +void free_fd_llc_percpu(void) +{ + if (!fd_llc_percpu) + return; + + for (int i = 0; i < topo.max_cpu_num + 1; ++i) { + if (fd_llc_percpu[i] != 0) + close(fd_llc_percpu[i]); + } + + free(fd_llc_percpu); + fd_llc_percpu = NULL; +} + void free_fd_cstate(void) { if (!ccstate_counter_info) @@ -5867,6 +5802,7 @@ void free_all_buffers(void) free_fd_percpu(); free_fd_instr_count_percpu(); + free_fd_llc_percpu(); free_fd_msr(); free_fd_rapl_percpu(); free_fd_cstate(); @@ -6213,6 +6149,7 @@ void linux_perf_init(void); void msr_perf_init(void); void rapl_perf_init(void); void cstate_perf_init(void); +void perf_llc_init(void); void added_perf_counters_init(void); void pmt_init(void); @@ -6224,10 +6161,10 @@ void re_initialize(void) msr_perf_init(); rapl_perf_init(); cstate_perf_init(); + perf_llc_init(); added_perf_counters_init(); pmt_init(); - fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, - topo.allowed_cpus); + fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus); } void set_max_cpu_num(void) @@ -6673,6 +6610,7 @@ release_timer: timer_delete(timerid); release_msr: free(per_cpu_msr_sum); + per_cpu_msr_sum = NULL; } /* @@ -6797,21 +6735,43 @@ restart: } } -void check_dev_msr() +int probe_dev_msr(void) { struct stat sb; char pathname[32]; - if (no_msr) - return; -#if defined(ANDROID) sprintf(pathname, "/dev/msr%d", base_cpu); -#else + return !stat(pathname, &sb); +} + +int probe_dev_cpu_msr(void) +{ + struct stat sb; + char pathname[32]; + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); -#endif - if (stat(pathname, &sb)) - if (system("/sbin/modprobe msr > /dev/null 2>&1")) - no_msr = 1; + return !stat(pathname, &sb); +} + +int probe_msr_driver(void) +{ + if (probe_dev_msr()) { + use_android_msr_path = 1; + return 1; + } + return probe_dev_cpu_msr(); +} + +void check_msr_driver(void) +{ + if (probe_msr_driver()) + return; + + if (system("/sbin/modprobe msr > /dev/null 2>&1")) + no_msr = 1; + + if (!probe_msr_driver()) + no_msr = 1; } /* @@ -6866,11 +6826,7 @@ void check_msr_permission(void) failed += check_for_cap_sys_rawio(); /* test file permissions */ -#if defined(ANDROID) - sprintf(pathname, "/dev/msr%d", base_cpu); -#else - sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); -#endif + sprintf(pathname, use_android_msr_path ? "/dev/msr%d" : "/dev/cpu/%d/msr", base_cpu); if (euidaccess(pathname, R_OK)) { failed++; } @@ -6999,8 +6955,7 @@ static void probe_intel_uncore_frequency_legacy(void) int k, l; char path_base[128]; - sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, - j); + sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i, j); sprintf(path, "%s/current_freq_khz", path_base); if (access(path, R_OK)) @@ -7083,8 +7038,7 @@ static void probe_intel_uncore_frequency_cluster(void) */ if BIC_IS_ENABLED (BIC_UNCORE_MHZ) - add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, - package_id); + add_counter(0, path, name_buf, 0, SCOPE_PACKAGE, COUNTER_K2M, FORMAT_AVERAGE, 0, package_id); if (quiet) continue; @@ -7093,8 +7047,7 @@ static void probe_intel_uncore_frequency_cluster(void) k = read_sysfs_int(path); sprintf(path, "%s/max_freq_khz", path_base); l = read_sysfs_int(path); - fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, - cluster_id, k / 1000, l / 1000); + fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id, cluster_id, k / 1000, l / 1000); sprintf(path, "%s/initial_min_freq_khz", path_base); k = read_sysfs_int(path); @@ -7156,21 +7109,17 @@ static void probe_graphics(void) else goto next; - set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", - gt0_is_gt ? GFX_rc6 : SAM_mc6); + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", gt0_is_gt ? GFX_rc6 : SAM_mc6); set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz); - set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", - gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz); + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz); - set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", - gt0_is_gt ? SAM_mc6 : GFX_rc6); + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", gt0_is_gt ? SAM_mc6 : GFX_rc6); set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz); - set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", - gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz); + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz); goto end; } @@ -7425,8 +7374,7 @@ int print_hwp(PER_THREAD_PARAMS) "(high %d guar %d eff %d low %d)\n", cpu, msr, (unsigned int)HWP_HIGHEST_PERF(msr), - (unsigned int)HWP_GUARANTEED_PERF(msr), - (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); + (unsigned int)HWP_GUARANTEED_PERF(msr), (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) return 0; @@ -7437,8 +7385,7 @@ int print_hwp(PER_THREAD_PARAMS) (unsigned int)(((msr) >> 0) & 0xff), (unsigned int)(((msr) >> 8) & 0xff), (unsigned int)(((msr) >> 16) & 0xff), - (unsigned int)(((msr) >> 24) & 0xff), - (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); + (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); if (has_hwp_pkg) { if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) @@ -7449,23 +7396,20 @@ int print_hwp(PER_THREAD_PARAMS) cpu, msr, (unsigned int)(((msr) >> 0) & 0xff), (unsigned int)(((msr) >> 8) & 0xff), - (unsigned int)(((msr) >> 16) & 0xff), - (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); + (unsigned int)(((msr) >> 16) & 0xff), (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); } if (has_hwp_notify) { if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) return 0; fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " - "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", - cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); + "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); } if (get_msr(cpu, MSR_HWP_STATUS, &msr)) return 0; fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " - "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", - cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); + "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); return 0; } @@ -7510,8 +7454,7 @@ int print_perf_limit(PER_THREAD_PARAMS) (msr & 1 << 6) ? "VR-Therm, " : "", (msr & 1 << 5) ? "Auto-HWP, " : "", (msr & 1 << 4) ? "Graphics, " : "", - (msr & 1 << 2) ? "bit2, " : "", - (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); + (msr & 1 << 2) ? "bit2, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", (msr & 1 << 31) ? "bit31, " : "", (msr & 1 << 30) ? "bit30, " : "", @@ -7524,8 +7467,7 @@ int print_perf_limit(PER_THREAD_PARAMS) (msr & 1 << 22) ? "VR-Therm, " : "", (msr & 1 << 21) ? "Auto-HWP, " : "", (msr & 1 << 20) ? "Graphics, " : "", - (msr & 1 << 18) ? "bit18, " : "", - (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); + (msr & 1 << 18) ? "bit18, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); } if (platform->plr_msrs & PLR_GFX) { @@ -7537,16 +7479,14 @@ int print_perf_limit(PER_THREAD_PARAMS) (msr & 1 << 4) ? "Graphics, " : "", (msr & 1 << 6) ? "VR-Therm, " : "", (msr & 1 << 8) ? "Amps, " : "", - (msr & 1 << 9) ? "GFXPwr, " : "", - (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); + (msr & 1 << 9) ? "GFXPwr, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", (msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 20) ? "Graphics, " : "", (msr & 1 << 22) ? "VR-Therm, " : "", (msr & 1 << 24) ? "Amps, " : "", - (msr & 1 << 25) ? "GFXPwr, " : "", - (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); + (msr & 1 << 25) ? "GFXPwr, " : "", (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); } if (platform->plr_msrs & PLR_RING) { get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); @@ -7555,14 +7495,12 @@ int print_perf_limit(PER_THREAD_PARAMS) (msr & 1 << 0) ? "PROCHOT, " : "", (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 6) ? "VR-Therm, " : "", - (msr & 1 << 8) ? "Amps, " : "", - (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); + (msr & 1 << 8) ? "Amps, " : "", (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", (msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 22) ? "VR-Therm, " : "", - (msr & 1 << 24) ? "Amps, " : "", - (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); + (msr & 1 << 24) ? "Amps, " : "", (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); } return 0; } @@ -7582,7 +7520,7 @@ double get_tdp_intel(void) { unsigned long long msr; - if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) + if (valid_rapl_msrs & RAPL_PKG_POWER_INFO) if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; return get_quirk_tdp(); @@ -7613,12 +7551,12 @@ void rapl_probe_intel(void) CLR_BIC(BIC_GFX_J, &bic_enabled); } - if (!platform->rapl_msrs || no_msr) + if (!valid_rapl_msrs || no_msr) return; - if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS)) + if (!(valid_rapl_msrs & RAPL_PKG_PERF_STATUS)) CLR_BIC(BIC_PKG__, &bic_enabled); - if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)) + if (!(valid_rapl_msrs & RAPL_DRAM_PERF_STATUS)) CLR_BIC(BIC_RAM__, &bic_enabled); /* units on package 0, verify later other packages match */ @@ -7667,7 +7605,7 @@ void rapl_probe_amd(void) CLR_BIC(BIC_Cor_J, &bic_enabled); } - if (!platform->rapl_msrs || no_msr) + if (!valid_rapl_msrs || no_msr) return; if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) @@ -7690,8 +7628,7 @@ void print_power_limit_msr(int cpu, unsigned long long msr, char *label) cpu, label, ((msr >> 15) & 1) ? "EN" : "DIS", ((msr >> 0) & 0x7FFF) * rapl_power_units, - (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, - (((msr >> 16) & 1) ? "EN" : "DIS")); + (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, (((msr >> 16) & 1) ? "EN" : "DIS")); return; } @@ -7857,7 +7794,7 @@ int print_rapl(PER_THREAD_PARAMS) UNUSED(c); UNUSED(p); - if (!platform->rapl_msrs) + if (!valid_rapl_msrs) return 0; /* RAPL counters are per package, so print only for 1st thread/package */ @@ -7870,7 +7807,7 @@ int print_rapl(PER_THREAD_PARAMS) return -1; } - if (platform->rapl_msrs & RAPL_AMD_F17H) { + if (valid_rapl_msrs & RAPL_AMD_F17H) { msr_name = "MSR_RAPL_PWR_UNIT"; if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) return -1; @@ -7883,7 +7820,7 @@ int print_rapl(PER_THREAD_PARAMS) fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, rapl_power_units, rapl_energy_units, rapl_time_units); - if (platform->rapl_msrs & RAPL_PKG_POWER_INFO) { + if (valid_rapl_msrs & RAPL_PKG_POWER_INFO) { if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) return -5; @@ -7892,25 +7829,22 @@ int print_rapl(PER_THREAD_PARAMS) cpu, msr, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, - ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, - ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); } - if (platform->rapl_msrs & RAPL_PKG) { + if (valid_rapl_msrs & RAPL_PKG) { if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) return -9; - fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 63) & 1 ? "" : "UN"); + fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 63) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "PKG Limit #1"); fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n", cpu, ((msr >> 47) & 1) ? "EN" : "DIS", ((msr >> 32) & 0x7FFF) * rapl_power_units, - (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, - ((msr >> 48) & 1) ? "EN" : "DIS"); + (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, ((msr >> 48) & 1) ? "EN" : "DIS"); if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr)) return -9; @@ -7920,7 +7854,7 @@ int print_rapl(PER_THREAD_PARAMS) cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); } - if (platform->rapl_msrs & RAPL_DRAM_POWER_INFO) { + if (valid_rapl_msrs & RAPL_DRAM_POWER_INFO) { if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) return -6; @@ -7928,31 +7862,28 @@ int print_rapl(PER_THREAD_PARAMS) cpu, msr, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, - ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, - ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); } - if (platform->rapl_msrs & RAPL_DRAM) { + if (valid_rapl_msrs & RAPL_DRAM) { if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) return -9; - fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "DRAM Limit"); } - if (platform->rapl_msrs & RAPL_CORE_POLICY) { + if (valid_rapl_msrs & RAPL_CORE_POLICY) { if (get_msr(cpu, MSR_PP0_POLICY, &msr)) return -7; fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); } - if (platform->rapl_msrs & RAPL_CORE_POWER_LIMIT) { + if (valid_rapl_msrs & RAPL_CORE_POWER_LIMIT) { if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) return -9; - fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "Cores Limit"); } - if (platform->rapl_msrs & RAPL_GFX) { + if (valid_rapl_msrs & RAPL_GFX) { if (get_msr(cpu, MSR_PP1_POLICY, &msr)) return -8; @@ -7960,20 +7891,58 @@ int print_rapl(PER_THREAD_PARAMS) if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) return -9; - fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "GFX Limit"); } return 0; } /* + * probe_rapl_msrs + * + * initialize global valid_rapl_msrs to platform->plat_rapl_msrs + * only if PKG_ENERGY counter is enumerated and reads non-zero + */ +void probe_rapl_msrs(void) +{ + int ret; + off_t offset; + unsigned long long msr_value; + + if (no_msr) + return; + + if ((platform->plat_rapl_msrs & (RAPL_PKG | RAPL_AMD_F17H)) == 0) + return; + + offset = idx_to_offset(IDX_PKG_ENERGY); + if (offset < 0) + return; + + ret = get_msr(base_cpu, offset, &msr_value); + if (ret) { + if (debug) + fprintf(outf, "Can not read RAPL_PKG_ENERGY MSR(0x%llx)\n", (unsigned long long)offset); + return; + } + if (msr_value == 0) { + if (debug) + fprintf(outf, "RAPL_PKG_ENERGY MSR(0x%llx) == ZERO: disabling all RAPL MSRs\n", (unsigned long long)offset); + return; + } + + valid_rapl_msrs = platform->plat_rapl_msrs; /* success */ +} + +/* * probe_rapl() * * sets rapl_power_units, rapl_energy_units, rapl_time_units */ void probe_rapl(void) { + probe_rapl_msrs(); + if (genuine_intel) rapl_probe_intel(); if (authentic_amd || hygon_genuine) @@ -7984,7 +7953,7 @@ void probe_rapl(void) print_rapl_sysfs(); - if (!platform->rapl_msrs || no_msr) + if (!valid_rapl_msrs || no_msr) return; for_all_cpus(print_rapl, ODD_COUNTERS); @@ -8088,7 +8057,7 @@ int print_thermal(PER_THREAD_PARAMS) cpu = t->cpu_id; /* DTS is per-core, no need to print for each thread */ - if (!is_cpu_first_thread_in_core(t, c, p)) + if (!is_cpu_first_thread_in_core(t, c)) return 0; if (cpu_migrate(cpu)) { @@ -8096,7 +8065,7 @@ int print_thermal(PER_THREAD_PARAMS) return -1; } - if (do_ptm && is_cpu_first_core_in_package(t, c, p)) { + if (do_ptm && is_cpu_first_core_in_package(t, p)) { if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) return 0; @@ -8108,8 +8077,7 @@ int print_thermal(PER_THREAD_PARAMS) dts = (msr >> 16) & 0x7F; dts2 = (msr >> 8) & 0x7F; - fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", - cpu, msr, tj_max - dts, tj_max - dts2); + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tj_max - dts, tj_max - dts2); } if (do_dts && debug) { @@ -8120,16 +8088,14 @@ int print_thermal(PER_THREAD_PARAMS) dts = (msr >> 16) & 0x7F; resolution = (msr >> 27) & 0xF; - fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", - cpu, msr, tj_max - dts, resolution); + fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", cpu, msr, tj_max - dts, resolution); if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) return 0; dts = (msr >> 16) & 0x7F; dts2 = (msr >> 8) & 0x7F; - fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", - cpu, msr, tj_max - dts, tj_max - dts2); + fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", cpu, msr, tj_max - dts, tj_max - dts2); } return 0; @@ -8203,8 +8169,7 @@ void decode_misc_enable_msr(void) msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", - msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", - msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); + msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); } void decode_misc_feature_control(void) @@ -8243,8 +8208,7 @@ void decode_misc_pwr_mgmt_msr(void) if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", - base_cpu, msr, - msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); + base_cpu, msr, msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); } /* @@ -8297,30 +8261,26 @@ void print_dev_latency(void) close(fd); } -static int has_instr_count_access(void) +static int has_perf_instr_count_access(void) { int fd; - int has_access; if (no_perf) return 0; fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0); - has_access = fd != -1; - if (fd != -1) close(fd); - if (!has_access) + if (fd == -1) warnx("Failed to access %s. Some of the counters may not be available\n" - "\tRun as root to enable them or use %s to disable the access explicitly", - "instructions retired perf counter", "--no-perf"); + "\tRun as root to enable them or use %s to disable the access explicitly", "perf instructions retired counter", + "'--hide IPC' or '--no-perf'"); - return has_access; + return (fd != -1); } -int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, - double *scale_, enum rapl_unit *unit_) +int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, double *scale_, enum rapl_unit *unit_) { int ret = -1; @@ -8370,11 +8330,16 @@ void linux_perf_init(void) if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) return; - if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) { + if (BIC_IS_ENABLED(BIC_IPC) && cpuid_has_aperf_mperf) { fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); if (fd_instr_count_percpu == NULL) err(-1, "calloc fd_instr_count_percpu"); } + if (BIC_IS_ENABLED(BIC_LLC_RPS)) { + fd_llc_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); + if (fd_llc_percpu == NULL) + err(-1, "calloc fd_llc_percpu"); + } } void rapl_perf_init(void) @@ -8485,7 +8450,7 @@ void rapl_perf_init(void) /* Assumes msr_counter_info is populated */ static int has_amperf_access(void) { - return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && + return cpuid_has_aperf_mperf && msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present; } @@ -8708,8 +8673,7 @@ void cstate_perf_init_(bool soft_c1) cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; /* User MSR for this counter */ - } else if (pkg_cstate_limit >= cai->pkg_cstate_limit - && add_msr_counter(cpu, cai->msr) >= 0) { + } else if (pkg_cstate_limit >= cai->pkg_cstate_limit && add_msr_counter(cpu, cai->msr) >= 0) { cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; cci->msr[cai->rci_index] = cai->msr; } @@ -8827,8 +8791,7 @@ void process_cpuid() hygon_genuine = 1; if (!quiet) - fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", - (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); + fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); __cpuid(1, fms, ebx, ecx, edx); family = (fms >> 8) & 0xf; @@ -8857,8 +8820,7 @@ void process_cpuid() __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); if (!quiet) { - fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", - family, model, stepping, family, model, stepping); + fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)", family, model, stepping, family, model, stepping); if (ucode_patch_valid) fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); fputc('\n', outf); @@ -8872,8 +8834,7 @@ void process_cpuid() ecx_flags & (1 << 8) ? "TM2" : "-", edx_flags & (1 << 4) ? "TSC" : "-", edx_flags & (1 << 5) ? "MSR" : "-", - edx_flags & (1 << 22) ? "ACPI-TM" : "-", - edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); + edx_flags & (1 << 22) ? "ACPI-TM" : "-", edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); } probe_platform_features(family, model); @@ -8897,7 +8858,7 @@ void process_cpuid() */ __cpuid(0x6, eax, ebx, ecx, edx); - has_aperf = ecx & (1 << 0); + cpuid_has_aperf_mperf = ecx & (1 << 0); do_dts = eax & (1 << 0); if (do_dts) BIC_PRESENT(BIC_CoreTmp); @@ -8915,14 +8876,13 @@ void process_cpuid() if (!quiet) fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, " "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", - has_aperf ? "" : "No-", + cpuid_has_aperf_mperf ? "" : "No-", has_turbo ? "" : "No-", do_dts ? "" : "No-", do_ptm ? "" : "No-", has_hwp ? "" : "No-", has_hwp_notify ? "" : "No-", - has_hwp_activity_window ? "" : "No-", - has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); + has_hwp_activity_window ? "" : "No-", has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); if (!quiet) decode_misc_enable_msr(); @@ -8956,8 +8916,7 @@ void process_cpuid() if (ebx_tsc != 0) { if (!quiet && (ebx != 0)) - fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", - eax_crystal, ebx_tsc, crystal_hz); + fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", eax_crystal, ebx_tsc, crystal_hz); if (crystal_hz == 0) crystal_hz = platform->crystal_freq; @@ -8989,11 +8948,10 @@ void process_cpuid() tsc_tweak = base_hz / tsc_hz; if (!quiet) - fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", - base_mhz, max_mhz, bus_mhz); + fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", base_mhz, max_mhz, bus_mhz); } - if (has_aperf) + if (cpuid_has_aperf_mperf) aperf_mperf_multiplier = platform->need_perf_multiplier ? 1024 : 1; BIC_PRESENT(BIC_IRQ); @@ -9045,6 +9003,62 @@ void probe_pm_features(void) decode_misc_feature_control(); } +/* perf_llc_probe + * + * return 1 on success, else 0 + */ +int has_perf_llc_access(void) +{ + int fd; + + if (no_perf) + return 0; + + fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, -1, PERF_FORMAT_GROUP); + if (fd != -1) + close(fd); + + if (fd == -1) + warnx("Failed to access %s. Some of the counters may not be available\n" + "\tRun as root to enable them or use %s to disable the access explicitly", "perf LLC counters", "'--hide LLC' or '--no-perf'"); + + return (fd != -1); +} + +void perf_llc_init(void) +{ + int cpu; + int retval; + + if (no_perf) + return; + if (!(BIC_IS_ENABLED(BIC_LLC_RPS) && BIC_IS_ENABLED(BIC_LLC_HIT))) + return; + + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { + + if (cpu_is_not_allowed(cpu)) + continue; + + assert(fd_llc_percpu != 0); + fd_llc_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, -1, PERF_FORMAT_GROUP); + if (fd_llc_percpu[cpu] == -1) { + warnx("%s: perf REFS: failed to open counter on cpu%d", __func__, cpu); + free_fd_llc_percpu(); + return; + } + assert(fd_llc_percpu != 0); + retval = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, fd_llc_percpu[cpu], PERF_FORMAT_GROUP); + if (retval == -1) { + warnx("%s: perf MISS: failed to open counter on cpu%d", __func__, cpu); + free_fd_llc_percpu(); + return; + } + } + BIC_PRESENT(BIC_LLC_RPS); + BIC_PRESENT(BIC_LLC_HIT); +} + /* * in /dev/cpu/ return success for names that are numbers * ie. filter out ".", "..", "microcode". @@ -9351,6 +9365,7 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, t->cpu_id = cpu_id; if (!cpu_is_not_allowed(cpu_id)) { + if (c->base_cpu < 0) c->base_cpu = t->cpu_id; if (pkg_base[pkg_id].base_cpu < 0) @@ -9456,7 +9471,7 @@ bool has_added_counters(void) void check_msr_access(void) { - check_dev_msr(); + check_msr_driver(); check_msr_permission(); if (no_msr) @@ -9465,8 +9480,16 @@ void check_msr_access(void) void check_perf_access(void) { - if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) - CLR_BIC(BIC_IPC, &bic_enabled); + if (BIC_IS_ENABLED(BIC_IPC)) + if (!has_perf_instr_count_access()) + no_perf = 1; + + if (BIC_IS_ENABLED(BIC_LLC_RPS) || BIC_IS_ENABLED(BIC_LLC_HIT)) + if (!has_perf_llc_access()) + no_perf = 1; + + if (no_perf) + bic_disable_perf_access(); } bool perf_has_hybrid_devices(void) @@ -9589,8 +9612,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) perf_config = read_perf_config(perf_device, pinfo->event); if (perf_config == (unsigned int)-1) { - warnx("%s: perf/%s/%s: failed to read %s", - __func__, perf_device, pinfo->event, "config"); + warnx("%s: perf/%s/%s: failed to read %s", __func__, perf_device, pinfo->event, "config"); continue; } @@ -9601,8 +9623,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); if (fd_perf == -1) { - warnx("%s: perf/%s/%s: failed to open counter on cpu%d", - __func__, perf_device, pinfo->event, cpu); + warnx("%s: perf/%s/%s: failed to open counter on cpu%d", __func__, perf_device, pinfo->event, cpu); continue; } @@ -9611,8 +9632,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) pinfo->scale = perf_scale; if (debug) - fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", - perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); + fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); } pinfo = pinfo->next; @@ -9926,8 +9946,7 @@ int pmt_add_counter(unsigned int guid, unsigned int seq, const char *name, enum } if (conflict) { - fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", - __func__, name); + fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", __func__, name); exit(1); } @@ -9970,8 +9989,7 @@ void pmt_init(void) * CWF with newer firmware might require a PMT_TYPE_XTAL_TIME intead of PMT_TYPE_TCORE_CLOCK. */ pmt_add_counter(PMT_CWF_MC1E_GUID, seq, "CPU%c1e", PMT_TYPE_TCORE_CLOCK, - PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU, - FORMAT_DELTA, cpu_num, PMT_OPEN_TRY); + PMT_COUNTER_CWF_MC1E_LSB, PMT_COUNTER_CWF_MC1E_MSB, offset, SCOPE_CPU, FORMAT_DELTA, cpu_num, PMT_OPEN_TRY); /* * Rather complex logic for each time we go to the next loop iteration, @@ -10021,6 +10039,7 @@ void turbostat_init() linux_perf_init(); rapl_perf_init(); cstate_perf_init(); + perf_llc_init(); added_perf_counters_init(); pmt_init(); @@ -10126,7 +10145,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2025.09.09 - Len Brown <lenb@kernel.org>\n"); + fprintf(outf, "turbostat version 2025.12.02 - Len Brown <lenb@kernel.org>\n"); } #define COMMAND_LINE_SIZE 2048 @@ -10166,8 +10185,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) } int add_counter(unsigned int msr_num, char *path, char *name, - unsigned int width, enum counter_scope scope, - enum counter_type type, enum counter_format format, int flags, int id) + unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format, int flags, int id) { struct msr_counter *msrp; @@ -10276,9 +10294,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, struct perf_counter_info *make_perf_counter_info(const char *perf_device, const char *perf_event, const char *name, - unsigned int width, - enum counter_scope scope, - enum counter_type type, enum counter_format format) + unsigned int width, enum counter_scope scope, enum counter_type type, enum counter_format format) { struct perf_counter_info *pinfo; @@ -10353,8 +10369,7 @@ int add_perf_counter(const char *perf_device, const char *perf_event, const char // FIXME: we might not have debug here yet if (debug) - fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", - __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); + fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); return 0; } @@ -10523,8 +10538,7 @@ int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigne pmt_diriter_init(&pmt_iter); - for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL; - dirname = pmt_diriter_next(&pmt_iter)) { + for (dirname = pmt_diriter_begin(&pmt_iter, SYSFS_TELEM_PATH); dirname != NULL; dirname = pmt_diriter_next(&pmt_iter)) { fd_telem_dir = openat(dirfd(pmt_iter.dir), dirname->d_name, O_RDONLY | O_DIRECTORY); if (fd_telem_dir == -1) @@ -10536,8 +10550,7 @@ int pmt_parse_from_path(const char *target_path, unsigned int *out_guid, unsigne } if (fstat(fd_telem_dir, &stat) == -1) { - fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__, - dirname->d_name, strerror(errno)); + fprintf(stderr, "%s: Failed to stat %s directory: %s", __func__, dirname->d_name, strerror(errno)); continue; } @@ -10633,8 +10646,7 @@ void parse_add_command_pmt(char *add_command) } if (!has_scope) { - printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", - __func__); + printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", __func__); exit(1); } @@ -10710,8 +10722,7 @@ next: } if (!has_format) { - fprintf(stderr, "%s: Invalid format %s. Expected raw, average or delta\n", - __func__, format_name); + fprintf(stderr, "%s: Invalid format %s. Expected raw, average or delta\n", __func__, format_name); exit(1); } } @@ -10878,7 +10889,7 @@ void probe_cpuidle_residency(void) if (is_deferred_skip(name_buf)) continue; - add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); + add_counter(0, path, name_buf, 32, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU, 0); if (state > max_state) max_state = state; |
