diff options
Diffstat (limited to 'tools/perf/util')
99 files changed, 1980 insertions, 1814 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4be313cd115a..1c2a43e1dc68 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -129,22 +129,22 @@ perf-util-y += iostat.o perf-util-y += stream.o perf-util-y += kvm-stat.o perf-util-y += lock-contention.o -perf-util-$(CONFIG_AUXTRACE) += auxtrace.o +perf-util-y += auxtrace.o perf-util-y += intel-pt-decoder/ -perf-util-$(CONFIG_AUXTRACE) += intel-pt.o -perf-util-$(CONFIG_AUXTRACE) += intel-bts.o -perf-util-$(CONFIG_AUXTRACE) += arm-spe.o -perf-util-$(CONFIG_AUXTRACE) += arm-spe-decoder/ -perf-util-$(CONFIG_AUXTRACE) += hisi-ptt.o -perf-util-$(CONFIG_AUXTRACE) += hisi-ptt-decoder/ -perf-util-$(CONFIG_AUXTRACE) += s390-cpumsf.o -perf-util-$(CONFIG_AUXTRACE) += powerpc-vpadtl.o +perf-util-y += intel-pt.o +perf-util-y += intel-bts.o +perf-util-y += arm-spe.o +perf-util-y += arm-spe-decoder/ +perf-util-y += hisi-ptt.o +perf-util-y += hisi-ptt-decoder/ +perf-util-y += s390-cpumsf.o +perf-util-y += powerpc-vpadtl.o ifdef CONFIG_LIBOPENCSD -perf-util-$(CONFIG_AUXTRACE) += cs-etm.o -perf-util-$(CONFIG_AUXTRACE) += cs-etm-decoder/ +perf-util-y += cs-etm.o +perf-util-y += cs-etm-decoder/ endif -perf-util-$(CONFIG_AUXTRACE) += cs-etm-base.o +perf-util-y += cs-etm-base.o perf-util-y += parse-branch-options.o perf-util-y += dump-insn.o diff --git a/tools/perf/util/annotate-data.c b/tools/perf/util/annotate-data.c index 903027a6fb7d..07cf9c334be0 100644 --- a/tools/perf/util/annotate-data.c +++ b/tools/perf/util/annotate-data.c @@ -4,7 +4,7 @@ * * Written by Namhyung Kim <namhyung@kernel.org> */ - +#include <errno.h> #include <stdio.h> #include <stdlib.h> #include <inttypes.h> @@ -59,6 +59,10 @@ void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind) pr_info(" constant\n"); return; case TSR_KIND_PERCPU_POINTER: + pr_info(" percpu pointer"); + /* it also prints the type info */ + break; + case TSR_KIND_POINTER: pr_info(" pointer"); /* it also prints the type info */ break; @@ -573,21 +577,31 @@ struct type_state_stack *find_stack_state(struct type_state *state, } void set_stack_state(struct type_state_stack *stack, int offset, u8 kind, - Dwarf_Die *type_die) + Dwarf_Die *type_die, int ptr_offset) { int tag; Dwarf_Word size; - if (dwarf_aggregate_size(type_die, &size) < 0) + if (kind == TSR_KIND_POINTER) { + /* TODO: arch-dependent pointer size */ + size = sizeof(void *); + } + else if (dwarf_aggregate_size(type_die, &size) < 0) size = 0; - tag = dwarf_tag(type_die); - stack->type = *type_die; stack->size = size; stack->offset = offset; + stack->ptr_offset = ptr_offset; stack->kind = kind; + if (kind == TSR_KIND_POINTER) { + stack->compound = false; + return; + } + + tag = dwarf_tag(type_die); + switch (tag) { case DW_TAG_structure_type: case DW_TAG_union_type: @@ -601,18 +615,19 @@ void set_stack_state(struct type_state_stack *stack, int offset, u8 kind, struct type_state_stack *findnew_stack_state(struct type_state *state, int offset, u8 kind, - Dwarf_Die *type_die) + Dwarf_Die *type_die, + int ptr_offset) { struct type_state_stack *stack = find_stack_state(state, offset); if (stack) { - set_stack_state(stack, offset, kind, type_die); + set_stack_state(stack, offset, kind, type_die, ptr_offset); return stack; } stack = malloc(sizeof(*stack)); if (stack) { - set_stack_state(stack, offset, kind, type_die); + set_stack_state(stack, offset, kind, type_die, ptr_offset); list_add(&stack->list, &state->stack_vars); } return stack; @@ -882,7 +897,7 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo continue; findnew_stack_state(state, offset, TSR_KIND_TYPE, - &mem_die); + &mem_die, /*ptr_offset=*/0); if (var->reg == state->stack_reg) { pr_debug_dtp("var [%"PRIx64"] %#x(reg%d)", @@ -892,28 +907,45 @@ static void update_var_state(struct type_state *state, struct data_loc_info *dlo insn_offset, -offset); } pr_debug_type_name(&mem_die, TSR_KIND_TYPE); - } else if (has_reg_type(state, var->reg) && var->offset == 0) { + } else if (has_reg_type(state, var->reg)) { struct type_state_reg *reg; Dwarf_Die orig_type; reg = &state->regs[var->reg]; - /* For gp registers, skip the address registers for now */ - if (var->is_reg_var_addr) + if (reg->ok && reg->kind == TSR_KIND_TYPE && + (!is_better_type(®->type, &mem_die) || var->is_reg_var_addr)) continue; - if (reg->ok && reg->kind == TSR_KIND_TYPE && - !is_better_type(®->type, &mem_die)) + /* Handle address registers with TSR_KIND_POINTER */ + if (var->is_reg_var_addr) { + if (reg->ok && reg->kind == TSR_KIND_POINTER && + !is_better_type(®->type, &mem_die)) + continue; + + reg->offset = -var->offset; + reg->type = mem_die; + reg->kind = TSR_KIND_POINTER; + reg->ok = true; + + pr_debug_dtp("var [%"PRIx64"] reg%d addr offset %x", + insn_offset, var->reg, var->offset); + pr_debug_type_name(&mem_die, TSR_KIND_POINTER); continue; + } orig_type = reg->type; - + /* + * var->offset + reg value is the beginning of the struct + * reg->offset is the offset the reg points + */ + reg->offset = -var->offset; reg->type = mem_die; reg->kind = TSR_KIND_TYPE; reg->ok = true; - pr_debug_dtp("var [%"PRIx64"] reg%d", - insn_offset, var->reg); + pr_debug_dtp("var [%"PRIx64"] reg%d offset %x", + insn_offset, var->reg, var->offset); pr_debug_type_name(&mem_die, TSR_KIND_TYPE); /* @@ -1101,7 +1133,7 @@ again: if (__die_get_real_type(&state->regs[reg].type, type_die) == NULL) return PERF_TMR_NO_POINTER; - dloc->type_offset = dloc->op->offset; + dloc->type_offset = dloc->op->offset + state->regs[reg].offset; if (dwarf_tag(type_die) == DW_TAG_typedef) die_get_real_type(type_die, &sized_type); @@ -1116,6 +1148,30 @@ again: return PERF_TMR_OK; } + if (state->regs[reg].kind == TSR_KIND_POINTER) { + struct strbuf sb; + + strbuf_init(&sb, 32); + die_get_typename_from_type(&state->regs[reg].type, &sb); + pr_debug_dtp("(ptr->%s)", sb.buf); + strbuf_release(&sb); + + /* + * Register holds a pointer (address) to the target variable. + * The type is the type of the variable it points to. + */ + *type_die = state->regs[reg].type; + + dloc->type_offset = dloc->op->offset + state->regs[reg].offset; + + /* Get the size of the actual type */ + if (dwarf_aggregate_size(type_die, &size) < 0 || + (unsigned)dloc->type_offset >= size) + return PERF_TMR_BAD_OFFSET; + + return PERF_TMR_OK; + } + if (state->regs[reg].kind == TSR_KIND_PERCPU_POINTER) { pr_debug_dtp("percpu ptr"); diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h index df52a0a1f496..869307c7f130 100644 --- a/tools/perf/util/annotate-data.h +++ b/tools/perf/util/annotate-data.h @@ -35,6 +35,7 @@ enum type_state_kind { TSR_KIND_PERCPU_BASE, TSR_KIND_CONST, TSR_KIND_PERCPU_POINTER, + TSR_KIND_POINTER, TSR_KIND_CANARY, }; @@ -173,6 +174,12 @@ extern struct annotated_data_stat ann_data_stat; struct type_state_reg { Dwarf_Die type; u32 imm_value; + /* + * The offset within the struct that the register points to. + * A value of 0 means the register points to the beginning. + * type_offset = op->offset + reg->offset + */ + s32 offset; bool ok; bool caller_saved; u8 kind; @@ -184,6 +191,8 @@ struct type_state_stack { struct list_head list; Dwarf_Die type; int offset; + /* pointer offset, saves tsr->offset on the stack state */ + int ptr_offset; int size; bool compound; u8 kind; @@ -240,9 +249,10 @@ int annotated_data_type__get_member_name(struct annotated_data_type *adt, bool has_reg_type(struct type_state *state, int reg); struct type_state_stack *findnew_stack_state(struct type_state *state, int offset, u8 kind, - Dwarf_Die *type_die); + Dwarf_Die *type_die, + int ptr_offset); void set_stack_state(struct type_state_stack *stack, int offset, u8 kind, - Dwarf_Die *type_die); + Dwarf_Die *type_die, int ptr_offset); struct type_state_stack *find_stack_state(struct type_state *state, int offset); bool get_global_var_type(Dwarf_Die *cu_die, struct data_loc_info *dloc, diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index a2e34f149a07..cc7764455faf 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -980,7 +980,7 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel) annotation__calc_percent(notes, evsel, symbol__size(sym)); } -static int evsel__get_arch(struct evsel *evsel, struct arch **parch) +int evsel__get_arch(struct evsel *evsel, struct arch **parch) { struct perf_env *env = evsel__env(evsel); const char *arch_name = perf_env__arch(env); @@ -1021,7 +1021,7 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, int err, nr; err = evsel__get_arch(evsel, &arch); - if (err < 0) + if (err) return err; if (parch) @@ -2698,6 +2698,20 @@ static bool is_stack_canary(struct arch *arch, struct annotated_op_loc *loc) return false; } +/** + * Returns true if the instruction has a memory operand without + * performing a load/store + */ +static bool is_address_gen_insn(struct arch *arch, struct disasm_line *dl) +{ + if (arch__is(arch, "x86")) { + if (!strncmp(dl->ins.name, "lea", 3)) + return true; + } + + return false; +} + static struct disasm_line * annotation__prev_asm_line(struct annotation *notes, struct disasm_line *curr) { @@ -2806,6 +2820,12 @@ __hist_entry__get_data_type(struct hist_entry *he, struct arch *arch, return &stackop_type; } + if (is_address_gen_insn(arch, dl)) { + istat->bad++; + ann_data_stat.no_mem_ops++; + return NO_TYPE; + } + for_each_insn_op_loc(&loc, i, op_loc) { struct data_loc_info dloc = { .arch = arch, diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index eaf6c8aa7f47..d4990bff29a7 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -585,4 +585,6 @@ void debuginfo_cache__delete(void); int annotation_br_cntr_entry(char **str, int br_cntr_nr, u64 *br_cntr, int num_aggr, struct evsel *evsel); int annotation_br_cntr_abbr_list(char **str, struct evsel *evsel, bool header); + +int evsel__get_arch(struct evsel *evsel, struct arch **parch); #endif /* __PERF_ANNOTATE_H */ diff --git a/tools/perf/util/arm-spe-decoder/Build b/tools/perf/util/arm-spe-decoder/Build index 960062b3cb9e..ab500e0efe24 100644 --- a/tools/perf/util/arm-spe-decoder/Build +++ b/tools/perf/util/arm-spe-decoder/Build @@ -1 +1 @@ -perf-util-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o +perf-util-y += arm-spe-pkt-decoder.o arm-spe-decoder.o diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c index 96eb7cced6fd..9e02b2bdd117 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c @@ -200,13 +200,61 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder) decoder->record.op |= ARM_SPE_OP_ST; else decoder->record.op |= ARM_SPE_OP_LD; - if (SPE_OP_PKT_IS_LDST_SVE(payload)) - decoder->record.op |= ARM_SPE_OP_SVE_LDST; + + if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload)) { + decoder->record.op |= ARM_SPE_OP_GP_REG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload)) { + decoder->record.op |= ARM_SPE_OP_SIMD_FP; + } else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload)) { + decoder->record.op |= ARM_SPE_OP_UNSPEC_REG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) { + decoder->record.op |= ARM_SPE_OP_NV_SYSREG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) { + decoder->record.op |= ARM_SPE_OP_MTE_TAG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(payload)) { + if (payload & SPE_OP_PKT_AR) + decoder->record.op |= ARM_SPE_OP_AR; + if (payload & SPE_OP_PKT_EXCL) + decoder->record.op |= ARM_SPE_OP_EXCL; + if (payload & SPE_OP_PKT_AT) + decoder->record.op |= ARM_SPE_OP_ATOMIC; + } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) { + decoder->record.op |= ARM_SPE_OP_SVE; + if (payload & SPE_OP_PKT_SVE_PRED) + decoder->record.op |= ARM_SPE_OP_PRED; + if (payload & SPE_OP_PKT_SVE_SG) + decoder->record.op |= ARM_SPE_OP_SG; + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload)) { + decoder->record.op |= ARM_SPE_OP_MEMCPY; + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) { + decoder->record.op |= ARM_SPE_OP_MEMSET; + } else if (SPE_OP_PKT_LDST_SUBCLASS_GCS(payload)) { + decoder->record.op |= ARM_SPE_OP_GCS; + if (payload & SPE_OP_PKT_GCS_COMM) + decoder->record.op |= ARM_SPE_OP_COMM; + } + break; case SPE_OP_PKT_HDR_CLASS_OTHER: decoder->record.op |= ARM_SPE_OP_OTHER; - if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) - decoder->record.op |= ARM_SPE_OP_SVE_OTHER; + if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) { + decoder->record.op |= ARM_SPE_OP_SVE | ARM_SPE_OP_DP; + if (payload & SPE_OP_PKT_OTHER_FP) + decoder->record.op |= ARM_SPE_OP_FP; + if (payload & SPE_OP_PKT_SVE_PRED) + decoder->record.op |= ARM_SPE_OP_PRED; + } else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) { + decoder->record.op |= ARM_SPE_OP_SME; + if (payload & SPE_OP_PKT_OTHER_FP) + decoder->record.op |= ARM_SPE_OP_FP; + } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) { + if (payload & SPE_OP_PKT_OTHER_ASE) + decoder->record.op |= ARM_SPE_OP_ASE; + if (payload & SPE_OP_PKT_OTHER_FP) + decoder->record.op |= ARM_SPE_OP_FP; + if (payload & SPE_OP_PKT_COND) + decoder->record.op |= ARM_SPE_OP_COND; + } break; case SPE_OP_PKT_HDR_CLASS_BR_ERET: decoder->record.op |= ARM_SPE_OP_BRANCH_ERET; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index fbb57f805237..3310e05122f0 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -36,33 +36,42 @@ enum arm_spe_op_type { ARM_SPE_OP_OTHER = 1 << 0, ARM_SPE_OP_LDST = 1 << 1, ARM_SPE_OP_BRANCH_ERET = 1 << 2, +}; + +enum arm_spe_2nd_op_ldst { + ARM_SPE_OP_GP_REG = 1 << 8, + ARM_SPE_OP_UNSPEC_REG = 1 << 9, + ARM_SPE_OP_NV_SYSREG = 1 << 10, + ARM_SPE_OP_SIMD_FP = 1 << 11, + ARM_SPE_OP_SVE = 1 << 12, + ARM_SPE_OP_MTE_TAG = 1 << 13, + ARM_SPE_OP_MEMCPY = 1 << 14, + ARM_SPE_OP_MEMSET = 1 << 15, + ARM_SPE_OP_GCS = 1 << 16, + ARM_SPE_OP_SME = 1 << 17, + ARM_SPE_OP_ASE = 1 << 18, + + /* Assisted information for memory / SIMD */ + ARM_SPE_OP_LD = 1 << 20, + ARM_SPE_OP_ST = 1 << 21, + ARM_SPE_OP_ATOMIC = 1 << 22, + ARM_SPE_OP_EXCL = 1 << 23, + ARM_SPE_OP_AR = 1 << 24, + ARM_SPE_OP_DP = 1 << 25, /* Data processing */ + ARM_SPE_OP_PRED = 1 << 26, /* Predicated */ + ARM_SPE_OP_SG = 1 << 27, /* Gather/Scatter */ + ARM_SPE_OP_COMM = 1 << 28, /* Common */ + ARM_SPE_OP_FP = 1 << 29, /* Floating-point */ + ARM_SPE_OP_COND = 1 << 30, /* Conditional */ +}; - /* Second level operation type for OTHER */ - ARM_SPE_OP_SVE_OTHER = 1 << 16, - ARM_SPE_OP_SVE_FP = 1 << 17, - ARM_SPE_OP_SVE_PRED_OTHER = 1 << 18, - - /* Second level operation type for LDST */ - ARM_SPE_OP_LD = 1 << 16, - ARM_SPE_OP_ST = 1 << 17, - ARM_SPE_OP_ATOMIC = 1 << 18, - ARM_SPE_OP_EXCL = 1 << 19, - ARM_SPE_OP_AR = 1 << 20, - ARM_SPE_OP_SIMD_FP = 1 << 21, - ARM_SPE_OP_GP_REG = 1 << 22, - ARM_SPE_OP_UNSPEC_REG = 1 << 23, - ARM_SPE_OP_NV_SYSREG = 1 << 24, - ARM_SPE_OP_SVE_LDST = 1 << 25, - ARM_SPE_OP_SVE_PRED_LDST = 1 << 26, - ARM_SPE_OP_SVE_SG = 1 << 27, - - /* Second level operation type for BRANCH_ERET */ - ARM_SPE_OP_BR_COND = 1 << 16, - ARM_SPE_OP_BR_INDIRECT = 1 << 17, - ARM_SPE_OP_BR_GCS = 1 << 18, - ARM_SPE_OP_BR_CR_BL = 1 << 19, - ARM_SPE_OP_BR_CR_RET = 1 << 20, - ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 21, +enum arm_spe_2nd_op_branch { + ARM_SPE_OP_BR_COND = 1 << 8, + ARM_SPE_OP_BR_INDIRECT = 1 << 9, + ARM_SPE_OP_BR_GCS = 1 << 10, + ARM_SPE_OP_BR_CR_BL = 1 << 11, + ARM_SPE_OP_BR_CR_RET = 1 << 12, + ARM_SPE_OP_BR_CR_NON_BL_RET = 1 << 13, }; enum arm_spe_common_data_source { diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 80561630253d..5769ba2f4140 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -340,7 +340,7 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, switch (packet->index) { case SPE_OP_PKT_HDR_CLASS_OTHER: - if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload)) { + if (SPE_OP_PKT_OTHER_SUBCLASS_SVE(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, "SVE-OTHER"); /* SVE effective vector length */ @@ -351,8 +351,21 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP"); if (payload & SPE_OP_PKT_SVE_PRED) arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); - } else { + } else if (SPE_OP_PKT_OTHER_SUBCLASS_SME(payload)) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, "SME-OTHER"); + + /* SME effective vector length or tile size */ + arm_spe_pkt_out_string(&err, &buf, &buf_len, " ETS %d", + SPE_OP_PKG_SME_ETS(payload)); + + if (payload & SPE_OP_PKT_OTHER_FP) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP"); + } else if (SPE_OP_PKT_OTHER_SUBCLASS_OTHER(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, "OTHER"); + if (payload & SPE_OP_PKT_OTHER_ASE) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " ASE"); + if (payload & SPE_OP_PKT_OTHER_FP) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " FP"); arm_spe_pkt_out_string(&err, &buf, &buf_len, " %s", payload & SPE_OP_PKT_COND ? "COND-SELECT" : "INSN-OTHER"); @@ -362,42 +375,30 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, payload & 0x1 ? "ST" : "LD"); - if (SPE_OP_PKT_IS_LDST_ATOMIC(payload)) { + if (SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(payload)) { if (payload & SPE_OP_PKT_AT) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AT"); if (payload & SPE_OP_PKT_EXCL) arm_spe_pkt_out_string(&err, &buf, &buf_len, " EXCL"); if (payload & SPE_OP_PKT_AR) arm_spe_pkt_out_string(&err, &buf, &buf_len, " AR"); - } - - switch (SPE_OP_PKT_LDST_SUBCLASS_GET(payload)) { - case SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP: + } else if (SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " SIMD-FP"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_GP_REG: + } else if (SPE_OP_PKT_LDST_SUBCLASS_GP_REG(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " GP-REG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG: + } else if (SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " UNSPEC-REG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG: + } else if (SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " NV-SYSREG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG: + } else if (SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " MTE-TAG"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_MEMCPY: + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMCPY"); - break; - case SPE_OP_PKT_LDST_SUBCLASS_MEMSET: + } else if (SPE_OP_PKT_LDST_SUBCLASS_MEMSET(payload)) { arm_spe_pkt_out_string(&err, &buf, &buf_len, " MEMSET"); - break; - default: - break; - } + } else if (SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(payload)) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, " SVE-SME-REG"); - if (SPE_OP_PKT_IS_LDST_SVE(payload)) { /* SVE effective vector length */ arm_spe_pkt_out_string(&err, &buf, &buf_len, " EVLEN %d", SPE_OP_PKG_SVE_EVL(payload)); @@ -406,6 +407,10 @@ static int arm_spe_pkt_desc_op_type(const struct arm_spe_pkt *packet, arm_spe_pkt_out_string(&err, &buf, &buf_len, " PRED"); if (payload & SPE_OP_PKT_SVE_SG) arm_spe_pkt_out_string(&err, &buf, &buf_len, " SG"); + } else if (SPE_OP_PKT_LDST_SUBCLASS_GCS(payload)) { + arm_spe_pkt_out_string(&err, &buf, &buf_len, " GCS"); + if (payload & SPE_OP_PKT_GCS_COMM) + arm_spe_pkt_out_string(&err, &buf, &buf_len, " COMM"); } break; case SPE_OP_PKT_HDR_CLASS_BR_ERET: diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h index d00c2481712d..adf4cde320aa 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h @@ -123,25 +123,39 @@ enum arm_spe_events { #define SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC 0x1 #define SPE_OP_PKT_HDR_CLASS_BR_ERET 0x2 -#define SPE_OP_PKT_IS_OTHER_SVE_OP(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8) +#define SPE_OP_PKT_OTHER_SUBCLASS_OTHER(v) (((v) & GENMASK_ULL(7, 3)) == 0x0) +#define SPE_OP_PKT_OTHER_SUBCLASS_SVE(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x8) +#define SPE_OP_PKT_OTHER_SUBCLASS_SME(v) (((v) & (BIT(7) | BIT(3) | BIT(0))) == 0x88) -#define SPE_OP_PKT_LDST_SUBCLASS_GET(v) ((v) & GENMASK_ULL(7, 1)) -#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG 0x0 -#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP 0x4 -#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG 0x10 -#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG 0x30 -#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG 0x14 -#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY 0x20 -#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET 0x25 +#define SPE_OP_PKT_OTHER_ASE BIT(2) +#define SPE_OP_PKT_OTHER_FP BIT(1) -#define SPE_OP_PKT_IS_LDST_ATOMIC(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2) +/* + * SME effective vector length or tile size (ETS) is stored in byte 0 + * bits [6:4,2]; the length is rounded up to a power of two and use 128 + * as one step, so ETS calculation is: + * + * 128 * (2 ^ bits [6:4,2]) = 32 << (bits [6:4,2]) + */ +#define SPE_OP_PKG_SME_ETS(v) (128 << (FIELD_GET(GENMASK_ULL(6, 4), (v)) << 1 | \ + (FIELD_GET(BIT(2), (v))))) + +#define SPE_OP_PKT_LDST_SUBCLASS_GP_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x0) +#define SPE_OP_PKT_LDST_SUBCLASS_SIMD_FP(v) (((v) & GENMASK_ULL(7, 1)) == 0x4) +#define SPE_OP_PKT_LDST_SUBCLASS_UNSPEC_REG(v) (((v) & GENMASK_ULL(7, 1)) == 0x10) +#define SPE_OP_PKT_LDST_SUBCLASS_NV_SYSREG(v) (((v) & GENMASK_ULL(7, 1)) == 0x30) +#define SPE_OP_PKT_LDST_SUBCLASS_MTE_TAG(v) (((v) & GENMASK_ULL(7, 1)) == 0x14) +#define SPE_OP_PKT_LDST_SUBCLASS_MEMCPY(v) (((v) & GENMASK_ULL(7, 1)) == 0x20) +#define SPE_OP_PKT_LDST_SUBCLASS_MEMSET(v) (((v) & GENMASK_ULL(7, 0)) == 0x25) + +#define SPE_OP_PKT_LDST_SUBCLASS_EXTENDED(v) (((v) & (GENMASK_ULL(7, 5) | BIT(1))) == 0x2) #define SPE_OP_PKT_AR BIT(4) #define SPE_OP_PKT_EXCL BIT(3) #define SPE_OP_PKT_AT BIT(2) #define SPE_OP_PKT_ST BIT(0) -#define SPE_OP_PKT_IS_LDST_SVE(v) (((v) & (BIT(3) | BIT(1))) == 0x8) +#define SPE_OP_PKT_LDST_SUBCLASS_SVE_SME_REG(v) (((v) & (BIT(3) | BIT(1))) == 0x8) #define SPE_OP_PKT_SVE_SG BIT(7) /* @@ -155,6 +169,10 @@ enum arm_spe_events { #define SPE_OP_PKT_SVE_PRED BIT(2) #define SPE_OP_PKT_SVE_FP BIT(1) +#define SPE_OP_PKT_LDST_SUBCLASS_GCS(v) (((v) & (GENMASK_ULL(7, 3) | BIT(1))) == 0x40) + +#define SPE_OP_PKT_GCS_COMM BIT(2) + #define SPE_OP_PKT_CR_MASK GENMASK_ULL(4, 3) #define SPE_OP_PKT_CR_BL(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 1) #define SPE_OP_PKT_CR_RET(v) (FIELD_GET(SPE_OP_PKT_CR_MASK, (v)) == 2) diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 71be979f5077..dc19e72258f3 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -39,6 +39,11 @@ #define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST)) +#define is_simd_op(op) (!!((op) & (ARM_SPE_OP_SIMD_FP | ARM_SPE_OP_SVE | \ + ARM_SPE_OP_SME | ARM_SPE_OP_ASE))) + +#define is_mem_op(op) (is_ldst_op(op) || is_simd_op(op)) + #define ARM_SPE_CACHE_EVENT(lvl) \ (ARM_SPE_##lvl##_ACCESS | ARM_SPE_##lvl##_MISS) @@ -346,10 +351,7 @@ static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record * { struct simd_flags simd_flags = {}; - if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST)) - simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; - - if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER)) + if (record->op & ARM_SPE_OP_SVE) simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE; if (record->type & ARM_SPE_SVE_PARTIAL_PRED) @@ -570,15 +572,21 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, } static const struct midr_range common_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A715), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720AE), MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), {}, }; @@ -988,8 +996,7 @@ arm_spe__synth_data_source(struct arm_spe_queue *speq, { union perf_mem_data_src data_src = {}; - /* Only synthesize data source for LDST operations */ - if (!is_ldst_op(record->op)) + if (!is_mem_op(record->op)) return data_src; if (record->op & ARM_SPE_OP_LD) @@ -997,7 +1004,7 @@ arm_spe__synth_data_source(struct arm_spe_queue *speq, else if (record->op & ARM_SPE_OP_ST) data_src.mem_op = PERF_MEM_OP_STORE; else - return data_src; + data_src.mem_op = PERF_MEM_OP_NA; arm_spe__synth_ds(speq, record, &data_src); arm_spe__synth_memory_level(speq, record, &data_src); @@ -1098,11 +1105,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq) return err; } - /* - * When data_src is zero it means the record is not a memory operation, - * skip to synthesize memory sample for this case. - */ - if (spe->sample_memory && is_ldst_op(record->op)) { + if (spe->sample_memory && is_mem_op(record->op)) { err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src); if (err) return err; @@ -1732,10 +1735,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) attr.sample_period = spe->synth_opts.period; /* create new id val to be a fixed offset from evsel id */ - id = evsel->core.id[0] + 1000000000; - - if (!id) - id = 1; + id = auxtrace_synth_id_range_start(evsel); if (spe->synth_opts.flc) { spe->sample_flc = true; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 1539c1dc823c..a224687ffbc1 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -62,6 +62,22 @@ #include <internal/lib.h> #include "util/sample.h" +#define AUXTRACE_SYNTH_EVENT_ID_OFFSET 1000000000ULL + +/* + * Event IDs are allocated sequentially, so a big offset from any + * existing ID will reach a unused range. + */ +u64 auxtrace_synth_id_range_start(struct evsel *evsel) +{ + u64 id = evsel->core.id[0] + AUXTRACE_SYNTH_EVENT_ID_OFFSET; + + if (!id) + id = 1; + + return id; +} + /* * Make a group from 'leader' to 'last', requiring that the events were not * already grouped to a different leader. @@ -1363,7 +1379,8 @@ static void unleader_auxtrace(struct perf_session *session) } } -int perf_event__process_auxtrace_info(struct perf_session *session, +int perf_event__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { enum auxtrace_type type = event->auxtrace_info.type; @@ -1407,7 +1424,8 @@ int perf_event__process_auxtrace_info(struct perf_session *session, return 0; } -s64 perf_event__process_auxtrace(struct perf_session *session, +s64 perf_event__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { s64 err; @@ -1804,7 +1822,8 @@ void events_stats__auxtrace_error_warn(const struct events_stats *stats) } } -int perf_event__process_auxtrace_error(struct perf_session *session, +int perf_event__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { if (auxtrace__dont_decode(session)) diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index e0a5b39fed12..6947f3f284c0 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -8,16 +8,11 @@ #define __PERF_AUXTRACE_H #include <sys/types.h> -#include <errno.h> -#include <stdbool.h> -#include <stddef.h> #include <stdio.h> // FILE -#include <linux/list.h> #include <linux/perf_event.h> #include <linux/types.h> -#include <perf/cpumap.h> -#include <asm/bitsperlong.h> #include <asm/barrier.h> +#include <perf/cpumap.h> union perf_event; struct perf_session; @@ -459,8 +454,6 @@ struct addr_filters { struct auxtrace_cache; -#ifdef HAVE_AUXTRACE_SUPPORT - u64 compat_auxtrace_mmap__read_head(struct auxtrace_mmap *mm); int compat_auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail); @@ -615,11 +608,14 @@ void auxtrace_synth_error(struct perf_record_auxtrace_error *auxtrace_error, int int code, int cpu, pid_t pid, pid_t tid, u64 ip, const char *msg, u64 timestamp); -int perf_event__process_auxtrace_info(struct perf_session *session, +int perf_event__process_auxtrace_info(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); -s64 perf_event__process_auxtrace(struct perf_session *session, +s64 perf_event__process_auxtrace(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); -int perf_event__process_auxtrace_error(struct perf_session *session, +int perf_event__process_auxtrace_error(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, const char *str, int unset); @@ -648,6 +644,7 @@ void auxtrace__free_events(struct perf_session *session); void auxtrace__free(struct perf_session *session); bool auxtrace__evsel_is_auxtrace(struct perf_session *session, struct evsel *evsel); +u64 auxtrace_synth_id_range_start(struct evsel *evsel); #define ITRACE_HELP \ " i[period]: synthesize instructions events\n" \ @@ -702,212 +699,4 @@ void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts) opts->range_num = 0; } -#else -#include "debug.h" - -static inline struct auxtrace_record * -auxtrace_record__init(struct evlist *evlist __maybe_unused, - int *err) -{ - *err = 0; - return NULL; -} - -static inline -void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused) -{ -} - -static inline -int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused, - struct evlist *evlist __maybe_unused, - struct record_opts *opts __maybe_unused) -{ - return 0; -} - -static inline -int perf_event__process_auxtrace_info(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - return 0; -} - -static inline -s64 perf_event__process_auxtrace(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - return 0; -} - -static inline -int perf_event__process_auxtrace_error(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused) -{ - return 0; -} - -static inline -void perf_session__auxtrace_error_inc(struct perf_session *session - __maybe_unused, - union perf_event *event - __maybe_unused) -{ -} - -static inline -void events_stats__auxtrace_error_warn(const struct events_stats *stats - __maybe_unused) -{ -} - -static inline -int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts __maybe_unused, - const char *str __maybe_unused, int unset __maybe_unused) -{ - pr_err("AUX area tracing not supported\n"); - return -EINVAL; -} - -static inline -int itrace_parse_synth_opts(const struct option *opt __maybe_unused, - const char *str __maybe_unused, - int unset __maybe_unused) -{ - pr_err("AUX area tracing not supported\n"); - return -EINVAL; -} - -static inline -int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, - struct record_opts *opts __maybe_unused, - const char *str) -{ - if (!str) - return 0; - pr_err("AUX area tracing not supported\n"); - return -EINVAL; -} - -static inline -int auxtrace_parse_sample_options(struct auxtrace_record *itr __maybe_unused, - struct evlist *evlist __maybe_unused, - struct record_opts *opts __maybe_unused, - const char *str) -{ - if (!str) - return 0; - pr_err("AUX area tracing not supported\n"); - return -EINVAL; -} - -static inline -int auxtrace_parse_aux_action(struct evlist *evlist __maybe_unused) -{ - pr_err("AUX area tracing not supported\n"); - return -EINVAL; -} - -static inline -int auxtrace__process_event(struct perf_session *session __maybe_unused, - union perf_event *event __maybe_unused, - struct perf_sample *sample __maybe_unused, - const struct perf_tool *tool __maybe_unused) -{ - return 0; -} - -static inline -void auxtrace__dump_auxtrace_sample(struct perf_session *session __maybe_unused, - struct perf_sample *sample __maybe_unused) -{ -} - -static inline -int auxtrace__flush_events(struct perf_session *session __maybe_unused, - const struct perf_tool *tool __maybe_unused) -{ - return 0; -} - -static inline -void auxtrace__free_events(struct perf_session *session __maybe_unused) -{ -} - -static inline -void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache __maybe_unused) -{ -} - -static inline -void auxtrace__free(struct perf_session *session __maybe_unused) -{ -} - -static inline -int auxtrace_index__write(int fd __maybe_unused, - struct list_head *head __maybe_unused) -{ - return -EINVAL; -} - -static inline -int auxtrace_index__process(int fd __maybe_unused, - u64 size __maybe_unused, - struct perf_session *session __maybe_unused, - bool needs_swap __maybe_unused) -{ - return -EINVAL; -} - -static inline -void auxtrace_index__free(struct list_head *head __maybe_unused) -{ -} - -static inline -bool auxtrace__evsel_is_auxtrace(struct perf_session *session __maybe_unused, - struct evsel *evsel __maybe_unused) -{ - return false; -} - -static inline -int auxtrace_parse_filters(struct evlist *evlist __maybe_unused) -{ - return 0; -} - -int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, - struct auxtrace_mmap_params *mp, - void *userpg, int fd); -void auxtrace_mmap__munmap(struct auxtrace_mmap *mm); -void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, - off_t auxtrace_offset, - unsigned int auxtrace_pages, - bool auxtrace_overwrite); -void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, - struct evlist *evlist, - struct evsel *evsel, int idx); - -#define ITRACE_HELP "" - -static inline -void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts - __maybe_unused, - struct perf_time_interval *ptime_range - __maybe_unused, - int range_num __maybe_unused) -{ -} - -static inline -void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts - __maybe_unused) -{ -} - -#endif - #endif diff --git a/tools/perf/util/bpf-filter.h b/tools/perf/util/bpf-filter.h index 122477f2de44..818c554b91b2 100644 --- a/tools/perf/util/bpf-filter.h +++ b/tools/perf/util/bpf-filter.h @@ -36,6 +36,8 @@ int perf_bpf_filter__unpin(void); #else /* !HAVE_BPF_SKEL */ +#include <errno.h> + static inline int perf_bpf_filter__parse(struct list_head *expr_head __maybe_unused, const char *str __maybe_unused) { diff --git a/tools/perf/util/bpf-trace-summary.c b/tools/perf/util/bpf-trace-summary.c index 8dfe7e678941..cf6e1e4402d5 100644 --- a/tools/perf/util/bpf-trace-summary.c +++ b/tools/perf/util/bpf-trace-summary.c @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <errno.h> #include <inttypes.h> #include <math.h> #include <stdio.h> diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index ca5d01b9017d..a5882b582205 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -460,6 +460,7 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, struct bperf_leader_bpf *skel = bperf_leader_bpf__open(); int link_fd, diff_map_fd, err; struct bpf_link *link = NULL; + struct perf_thread_map *threads; if (!skel) { pr_err("Failed to open leader skeleton\n"); @@ -495,7 +496,11 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, * following evsel__open_per_cpu call */ evsel->leader_skel = skel; - evsel__open(evsel, evsel->core.cpus, evsel->core.threads); + assert(!perf_cpu_map__has_any_cpu_or_is_empty(evsel->core.cpus)); + /* Always open system wide. */ + threads = thread_map__new_by_tid(-1); + evsel__open(evsel, evsel->core.cpus, threads); + perf_thread_map__put(threads); out: bperf_leader_bpf__destroy(skel); diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index 690be3ce3e11..17d7196c6589 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -4,6 +4,7 @@ /* Copyright (c) 2021 Google */ #include <assert.h> +#include <errno.h> #include <limits.h> #include <unistd.h> #include <sys/file.h> @@ -27,6 +28,7 @@ #include "cpumap.h" #include "thread_map.h" +#include "bpf_skel/bperf_cgroup.h" #include "bpf_skel/bperf_cgroup.skel.h" static struct perf_event_attr cgrp_switch_attr = { @@ -42,6 +44,55 @@ static struct bperf_cgroup_bpf *skel; #define FD(evt, cpu) (*(int *)xyarray__entry(evt->core.fd, cpu, 0)) +static void setup_rodata(struct bperf_cgroup_bpf *sk, int evlist_size) +{ + int map_size, total_cpus = cpu__max_cpu().cpu; + + sk->rodata->num_cpus = total_cpus; + sk->rodata->num_events = evlist_size / nr_cgroups; + + if (cgroup_is_v2("perf_event") > 0) + sk->rodata->use_cgroup_v2 = 1; + + BUG_ON(evlist_size % nr_cgroups != 0); + + /* we need one copy of events per cpu for reading */ + map_size = total_cpus * evlist_size / nr_cgroups; + bpf_map__set_max_entries(sk->maps.events, map_size); + bpf_map__set_max_entries(sk->maps.cgrp_idx, nr_cgroups); + /* previous result is saved in a per-cpu array */ + map_size = evlist_size / nr_cgroups; + bpf_map__set_max_entries(sk->maps.prev_readings, map_size); + /* cgroup result needs all events (per-cpu) */ + map_size = evlist_size; + bpf_map__set_max_entries(sk->maps.cgrp_readings, map_size); +} + +static void test_max_events_program_load(void) +{ +#ifndef NDEBUG + /* + * Test that the program verifies with the maximum number of events. If + * this test fails unfortunately perf needs recompiling with a lower + * BPERF_CGROUP__MAX_EVENTS to avoid BPF verifier issues. + */ + int err, max_events = BPERF_CGROUP__MAX_EVENTS * nr_cgroups; + struct bperf_cgroup_bpf *test_skel = bperf_cgroup_bpf__open(); + + if (!test_skel) { + pr_err("Failed to open cgroup skeleton\n"); + return; + } + setup_rodata(test_skel, max_events); + err = bperf_cgroup_bpf__load(test_skel); + if (err) { + pr_err("Failed to load cgroup skeleton with max events %d.\n", + BPERF_CGROUP__MAX_EVENTS); + } + bperf_cgroup_bpf__destroy(test_skel); +#endif +} + static int bperf_load_program(struct evlist *evlist) { struct bpf_link *link; @@ -50,35 +101,18 @@ static int bperf_load_program(struct evlist *evlist) int i, j; struct perf_cpu cpu; int total_cpus = cpu__max_cpu().cpu; - int map_size, map_fd; - int prog_fd, err; + int map_fd, prog_fd, err; + + set_max_rlimit(); + + test_max_events_program_load(); skel = bperf_cgroup_bpf__open(); if (!skel) { pr_err("Failed to open cgroup skeleton\n"); return -1; } - - skel->rodata->num_cpus = total_cpus; - skel->rodata->num_events = evlist->core.nr_entries / nr_cgroups; - - if (cgroup_is_v2("perf_event") > 0) - skel->rodata->use_cgroup_v2 = 1; - - BUG_ON(evlist->core.nr_entries % nr_cgroups != 0); - - /* we need one copy of events per cpu for reading */ - map_size = total_cpus * evlist->core.nr_entries / nr_cgroups; - bpf_map__set_max_entries(skel->maps.events, map_size); - bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups); - /* previous result is saved in a per-cpu array */ - map_size = evlist->core.nr_entries / nr_cgroups; - bpf_map__set_max_entries(skel->maps.prev_readings, map_size); - /* cgroup result needs all events (per-cpu) */ - map_size = evlist->core.nr_entries; - bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size); - - set_max_rlimit(); + setup_rodata(skel, evlist->core.nr_entries); err = bperf_cgroup_bpf__load(skel); if (err) { diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index e61a3b20be0a..c456d24efa30 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -1,6 +1,7 @@ -#include <stdio.h> +#include <errno.h> #include <fcntl.h> #include <stdint.h> +#include <stdio.h> #include <stdlib.h> #include <bpf/bpf.h> diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 60b81d586323..7b5671f13c53 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -184,6 +184,9 @@ int lock_contention_prepare(struct lock_contention *con) struct evlist *evlist = con->evlist; struct target *target = con->target; + /* make sure it loads the kernel map before lookup */ + map__load(machine__kernel_map(con->machine)); + skel = lock_contention_bpf__open(); if (!skel) { pr_err("Failed to open lock-contention BPF skeleton\n"); @@ -749,9 +752,6 @@ int lock_contention_read(struct lock_contention *con) bpf_prog_test_run_opts(prog_fd, &opts); } - /* make sure it loads the kernel map */ - maps__load_first(machine->kmaps); - prev_key = NULL; while (!bpf_map_get_next_key(fd, prev_key, &key)) { s64 ls_key; diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c index 578f27d2d6b4..442f91b4e8e1 100644 --- a/tools/perf/util/bpf_map.c +++ b/tools/perf/util/bpf_map.c @@ -5,6 +5,7 @@ #include <bpf/libbpf.h> #include <linux/err.h> #include <linux/kernel.h> +#include <errno.h> #include <stdbool.h> #include <stdlib.h> #include <unistd.h> diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index cb86e261b4de..2a6e61864ee0 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -45,7 +45,7 @@ struct syscalls_sys_enter { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); __type(key, __u32); __type(value, __u32); - __uint(max_entries, 512); + __uint(max_entries, 1024); } syscalls_sys_enter SEC(".maps"); /* @@ -57,7 +57,7 @@ struct syscalls_sys_exit { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); __type(key, __u32); __type(value, __u32); - __uint(max_entries, 512); + __uint(max_entries, 1024); } syscalls_sys_exit SEC(".maps"); struct syscall_enter_args { diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c index 57cab7647a9a..c2298a2decc9 100644 --- a/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_cgroup.bpf.c @@ -1,14 +1,12 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2021 Facebook // Copyright (c) 2021 Google +#include "bperf_cgroup.h" #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> -#define MAX_LEVELS 10 // max cgroup hierarchy level: arbitrary -#define MAX_EVENTS 32 // max events per cgroup: arbitrary - // NOTE: many of map and global data will be modified before loading // from the userspace (perf tool) using the skeleton helpers. @@ -97,7 +95,7 @@ static inline int get_cgroup_v1_idx(__u32 *cgrps, int size) cgrp = BPF_CORE_READ(p, cgroups, subsys[perf_subsys_id], cgroup); level = BPF_CORE_READ(cgrp, level); - for (cnt = 0; i < MAX_LEVELS; i++) { + for (cnt = 0; i < BPERF_CGROUP__MAX_LEVELS; i++) { __u64 cgrp_id; if (i > level) @@ -123,7 +121,7 @@ static inline int get_cgroup_v2_idx(__u32 *cgrps, int size) __u32 *elem; int cnt; - for (cnt = 0; i < MAX_LEVELS; i++) { + for (cnt = 0; i < BPERF_CGROUP__MAX_LEVELS; i++) { __u64 cgrp_id = bpf_get_current_ancestor_cgroup_id(i); if (cgrp_id == 0) @@ -148,17 +146,17 @@ static int bperf_cgroup_count(void) register int c = 0; struct bpf_perf_event_value val, delta, *prev_val, *cgrp_val; __u32 cpu = bpf_get_smp_processor_id(); - __u32 cgrp_idx[MAX_LEVELS]; + __u32 cgrp_idx[BPERF_CGROUP__MAX_LEVELS]; int cgrp_cnt; __u32 key, cgrp; long err; if (use_cgroup_v2) - cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, MAX_LEVELS); + cgrp_cnt = get_cgroup_v2_idx(cgrp_idx, BPERF_CGROUP__MAX_LEVELS); else - cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, MAX_LEVELS); + cgrp_cnt = get_cgroup_v1_idx(cgrp_idx, BPERF_CGROUP__MAX_LEVELS); - for ( ; idx < MAX_EVENTS; idx++) { + for ( ; idx < BPERF_CGROUP__MAX_EVENTS; idx++) { if (idx == num_events) break; @@ -186,7 +184,7 @@ static int bperf_cgroup_count(void) delta.enabled = val.enabled - prev_val->enabled; delta.running = val.running - prev_val->running; - for (c = 0; c < MAX_LEVELS; c++) { + for (c = 0; c < BPERF_CGROUP__MAX_LEVELS; c++) { if (c == cgrp_cnt) break; diff --git a/tools/perf/util/bpf_skel/bperf_cgroup.h b/tools/perf/util/bpf_skel/bperf_cgroup.h new file mode 100644 index 000000000000..3fb84b19d39a --- /dev/null +++ b/tools/perf/util/bpf_skel/bperf_cgroup.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Data structures shared between BPF and tools. */ +#ifndef __BPERF_CGROUP_H +#define __BPERF_CGROUP_H + +// These constants impact code size of bperf_cgroup.bpf.c that may result in BPF +// verifier issues. They are exposed to control the size and also to disable BPF +// counters when the number of user events is too large. + +// max cgroup hierarchy level: arbitrary +#define BPERF_CGROUP__MAX_LEVELS 10 +// max events per cgroup: arbitrary +#define BPERF_CGROUP__MAX_EVENTS 128 + +#endif /* __BPERF_CGROUP_H */ diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 35505a1ffd11..fdb35133fde4 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -122,7 +122,7 @@ int filename__snprintf_build_id(const char *pathname, char *sbuild_id, size_t sb struct build_id bid = { .size = 0, }; int ret; - ret = filename__read_build_id(pathname, &bid, /*block=*/true); + ret = filename__read_build_id(pathname, &bid); if (ret < 0) return ret; @@ -848,7 +848,7 @@ static int filename__read_build_id_ns(const char *filename, int ret; nsinfo__mountns_enter(nsi, &nsc); - ret = filename__read_build_id(filename, bid, /*block=*/true); + ret = filename__read_build_id(filename, bid); nsinfo__mountns_exit(&nsc); return ret; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index d7b7eef740b9..428e5350d7a2 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -275,9 +275,13 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) if (tok) { unsigned long size; - size = strtoul(tok, &name, 0); - if (size < (unsigned) sysctl__max_stack()) - param->max_stack = size; + if (!strncmp(tok, "defer", sizeof("defer"))) { + param->defer = true; + } else { + size = strtoul(tok, &name, 0); + if (size < (unsigned) sysctl__max_stack()) + param->max_stack = size; + } } break; @@ -314,6 +318,12 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) } while (0); free(buf); + + if (param->defer && param->record_mode != CALLCHAIN_FP) { + pr_err("callchain: deferred callchain only works with FP\n"); + return -EINVAL; + } + return ret; } @@ -1828,3 +1838,38 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel, } return 0; } + +/* + * This function merges earlier samples (@sample_orig) waiting for deferred + * user callchains with the matching callchain record (@sample_callchain) + * which is delivered now. The @sample_orig->callchain should be released + * after use if ->deferred_callchain is set. + */ +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain) +{ + u64 nr_orig = sample_orig->callchain->nr - 1; + u64 nr_deferred = sample_callchain->callchain->nr; + struct ip_callchain *callchain; + + if (sample_orig->callchain->nr < 2) { + sample_orig->deferred_callchain = false; + return -EINVAL; + } + + callchain = calloc(1 + nr_orig + nr_deferred, sizeof(u64)); + if (callchain == NULL) { + sample_orig->deferred_callchain = false; + return -ENOMEM; + } + + callchain->nr = nr_orig + nr_deferred; + /* copy original including PERF_CONTEXT_USER_DEFERRED (but the cookie) */ + memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64)); + /* copy deferred user callchains */ + memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips, + nr_deferred * sizeof(u64)); + + sample_orig->callchain = callchain; + return 0; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 86ed9e4d04f9..2a52af8c80ac 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -98,6 +98,7 @@ extern bool dwarf_callchain_users; struct callchain_param { bool enabled; + bool defer; enum perf_call_graph_mode record_mode; u32 dump_size; enum chain_mode mode; @@ -317,4 +318,7 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel, struct perf_sample *sample, int max_stack, bool symbols, callchain_iter_fn cb, void *data); +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 25e2769b5e74..040eb75f0804 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -10,6 +10,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <sys/statfs.h> +#include <errno.h> #include <fcntl.h> #include <stdlib.h> #include <string.h> diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 6f914620c6ff..e0219bc6330a 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -37,6 +37,8 @@ #define METRIC_ONLY_LEN 20 +static struct stats walltime_nsecs_stats; + struct perf_stat_config stat_config = { .aggr_mode = AGGR_GLOBAL, .aggr_level = MAX_CACHE_LVL + 1, @@ -45,7 +47,6 @@ struct perf_stat_config stat_config = { .run_count = 1, .metric_only_len = METRIC_ONLY_LEN, .walltime_nsecs_stats = &walltime_nsecs_stats, - .ru_stats = &ru_stats, .big_num = true, .ctl_fd = -1, .ctl_fd_ack = -1, diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 89570397a4b3..a80845038a5e 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -684,16 +684,21 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size) unsigned char *bitmap; struct perf_cpu c, last_cpu = perf_cpu_map__max(map); - if (buf == NULL) + if (buf == NULL || size == 0) return 0; + if (last_cpu.cpu < 0) { + buf[0] = '\0'; + return 0; + } + bitmap = zalloc(last_cpu.cpu / 8 + 1); if (bitmap == NULL) { buf[0] = '\0'; return 0; } - perf_cpu_map__for_each_cpu(c, idx, map) + perf_cpu_map__for_each_cpu_skip_any(c, idx, map) bitmap[c.cpu / 8] |= 1 << (c.cpu % 8); for (int cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) { diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build index 056d665f7f88..27550db2aa4c 100644 --- a/tools/perf/util/cs-etm-decoder/Build +++ b/tools/perf/util/cs-etm-decoder/Build @@ -1 +1 @@ -perf-util-$(CONFIG_AUXTRACE) += cs-etm-decoder.o +perf-util-y += cs-etm-decoder.o diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index b85a8837bddc..3050fe212666 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -588,6 +588,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( const ocsd_generic_trace_elem *elem) { ocsd_datapath_resp_t resp = OCSD_RESP_CONT; + ocsd_gen_trc_elem_t type; struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; struct cs_etm_queue *etmq = decoder->data; struct cs_etm_packet_queue *packet_queue; @@ -597,52 +598,29 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( if (!packet_queue) return OCSD_RESP_FATAL_SYS_ERR; - switch (elem->elem_type) { - case OCSD_GEN_TRC_ELEM_UNKNOWN: - break; - case OCSD_GEN_TRC_ELEM_EO_TRACE: - case OCSD_GEN_TRC_ELEM_NO_SYNC: - case OCSD_GEN_TRC_ELEM_TRACE_ON: + type = elem->elem_type; + + if (type == OCSD_GEN_TRC_ELEM_EO_TRACE || + type == OCSD_GEN_TRC_ELEM_NO_SYNC || + type == OCSD_GEN_TRC_ELEM_TRACE_ON) resp = cs_etm_decoder__buffer_discontinuity(etmq, packet_queue, trace_chan_id); - break; - case OCSD_GEN_TRC_ELEM_INSTR_RANGE: + else if (type == OCSD_GEN_TRC_ELEM_INSTR_RANGE) resp = cs_etm_decoder__buffer_range(etmq, packet_queue, elem, trace_chan_id); - break; - case OCSD_GEN_TRC_ELEM_EXCEPTION: + else if (type == OCSD_GEN_TRC_ELEM_EXCEPTION) resp = cs_etm_decoder__buffer_exception(etmq, packet_queue, elem, trace_chan_id); - break; - case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: + else if (type == OCSD_GEN_TRC_ELEM_EXCEPTION_RET) resp = cs_etm_decoder__buffer_exception_ret(etmq, packet_queue, trace_chan_id); - break; - case OCSD_GEN_TRC_ELEM_TIMESTAMP: + else if (type == OCSD_GEN_TRC_ELEM_TIMESTAMP) resp = cs_etm_decoder__do_hard_timestamp(etmq, elem, trace_chan_id, indx); - break; - case OCSD_GEN_TRC_ELEM_PE_CONTEXT: + else if (type == OCSD_GEN_TRC_ELEM_PE_CONTEXT) resp = cs_etm_decoder__set_tid(etmq, packet_queue, elem, trace_chan_id); - break; - /* Unused packet types */ - case OCSD_GEN_TRC_ELEM_I_RANGE_NOPATH: - case OCSD_GEN_TRC_ELEM_ADDR_NACC: - case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: - case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: - case OCSD_GEN_TRC_ELEM_EVENT: - case OCSD_GEN_TRC_ELEM_SWTRACE: - case OCSD_GEN_TRC_ELEM_CUSTOM: - case OCSD_GEN_TRC_ELEM_SYNC_MARKER: - case OCSD_GEN_TRC_ELEM_MEMTRANS: -#if (OCSD_VER_NUM >= 0x010400) - case OCSD_GEN_TRC_ELEM_INSTRUMENTATION: -#endif - default: - break; - } return resp; } diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 30f4bb3e7fa3..25d56e0f1c07 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -777,7 +777,7 @@ static void cs_etm__packet_dump(const char *pkt_string, void *data) char queue_nr[64]; if (verbose) - snprintf(queue_nr, sizeof(queue_nr), "Qnr:%d; ", etmq->queue_nr); + snprintf(queue_nr, sizeof(queue_nr), "Qnr:%u; ", etmq->queue_nr); else queue_nr[0] = '\0'; @@ -1726,10 +1726,7 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, attr.read_format = evsel->core.attr.read_format; /* create new id val to be a fixed offset from evsel id */ - id = evsel->core.id[0] + 1000000000; - - if (!id) - id = 1; + id = auxtrace_synth_id_range_start(evsel); if (etm->synth_opts.branches) { attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; diff --git a/tools/perf/util/debuginfo.c b/tools/perf/util/debuginfo.c index bb9ebd84ec2d..4a559b3e8cdc 100644 --- a/tools/perf/util/debuginfo.c +++ b/tools/perf/util/debuginfo.c @@ -115,7 +115,7 @@ struct debuginfo *debuginfo__new(const char *path) * incase the path isn't for a regular file. */ assert(!dso__has_build_id(dso)); - if (filename__read_build_id(path, &bid, /*block=*/false) > 0) + if (filename__read_build_id(path, &bid) > 0) dso__set_build_id(dso, &bid); for (type = distro_dwarf_types; diff --git a/tools/perf/util/drm_pmu.c b/tools/perf/util/drm_pmu.c index 98d4d2b556d4..b48a375e4584 100644 --- a/tools/perf/util/drm_pmu.c +++ b/tools/perf/util/drm_pmu.c @@ -10,6 +10,7 @@ #include <api/io.h> #include <ctype.h> #include <dirent.h> +#include <errno.h> #include <fcntl.h> #include <unistd.h> #include <linux/unistd.h> @@ -119,7 +120,7 @@ static struct drm_pmu *add_drm_pmu(struct list_head *pmus, char *line, size_t li return NULL; } - drm->pmu.cpus = perf_cpu_map__new("0"); + drm->pmu.cpus = perf_cpu_map__new_int(0); if (!drm->pmu.cpus) { perf_pmu__delete(&drm->pmu); return NULL; diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index 64c1d65b0149..0a7645c7fae7 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -81,13 +81,13 @@ static int dsos__read_build_ids_cb(struct dso *dso, void *data) return 0; } nsinfo__mountns_enter(dso__nsinfo(dso), &nsc); - if (filename__read_build_id(dso__long_name(dso), &bid, /*block=*/true) > 0) { + if (filename__read_build_id(dso__long_name(dso), &bid) > 0) { dso__set_build_id(dso, &bid); args->have_build_id = true; } else if (errno == ENOENT && dso__nsinfo(dso)) { char *new_name = dso__filename_with_chroot(dso, dso__long_name(dso)); - if (new_name && filename__read_build_id(new_name, &bid, /*block=*/true) > 0) { + if (new_name && filename__read_build_id(new_name, &bid) > 0) { dso__set_build_id(dso, &bid); args->have_build_id = true; } diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index fcf44149feb2..4c92cc1a952c 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -61,6 +61,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_CGROUP] = "CGROUP", [PERF_RECORD_TEXT_POKE] = "TEXT_POKE", [PERF_RECORD_AUX_OUTPUT_HW_ID] = "AUX_OUTPUT_HW_ID", + [PERF_RECORD_CALLCHAIN_DEFERRED] = "CALLCHAIN_DEFERRED", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 80d8387e6b97..03674d2cbd01 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -85,6 +85,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus, evlist->ctl_fd.pos = -1; evlist->nr_br_cntr = -1; metricgroup__rblist_init(&evlist->metric_events); + INIT_LIST_HEAD(&evlist->deferred_samples); } struct evlist *evlist__new(void) @@ -101,16 +102,24 @@ struct evlist *evlist__new_default(void) { struct evlist *evlist = evlist__new(); bool can_profile_kernel; - int err; + struct perf_pmu *pmu = NULL; if (!evlist) return NULL; can_profile_kernel = perf_event_paranoid_check(1); - err = parse_event(evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); - if (err) { - evlist__delete(evlist); - return NULL; + + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + char buf[256]; + int err; + + snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name, + can_profile_kernel ? "P" : "Pu"); + err = parse_event(evlist, buf); + if (err) { + evlist__delete(evlist); + return NULL; + } } if (evlist->core.nr_entries > 1) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 5e71e3dc6042..911834ae7c2a 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -92,6 +92,8 @@ struct evlist { * of struct metric_expr. */ struct rblist metric_events; + /* samples with deferred_callchain would wait here. */ + struct list_head deferred_samples; }; struct evsel_str_handler { diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 56ebefd075f2..9cd706f62793 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -402,7 +402,6 @@ void evsel__init(struct evsel *evsel, evsel->sample_size = __evsel__sample_size(attr->sample_type); evsel__calc_id_pos(evsel); evsel->cmdline_group_boundary = false; - evsel->metric_events = NULL; evsel->per_pkg_mask = NULL; evsel->collect_stat = false; evsel->group_pmu_name = NULL; @@ -539,6 +538,7 @@ struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig) #endif evsel->handler = orig->handler; evsel->core.leader = orig->core.leader; + evsel->metric_leader = orig->metric_leader; evsel->max_events = orig->max_events; zfree(&evsel->unit); @@ -1066,6 +1066,9 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o pr_info("Disabling user space callchains for function trace event.\n"); attr->exclude_callchain_user = 1; } + + if (param->defer && !attr->exclude_callchain_user) + attr->defer_callchain = 1; } void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts, @@ -1512,6 +1515,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; attr->build_id = track && opts->build_id; + attr->defer_output = track && callchain && callchain->defer; /* * ksymbol is tracked separately with text poke because it needs to be @@ -1754,7 +1758,6 @@ void evsel__exit(struct evsel *evsel) evsel__zero_per_pkg(evsel); hashmap__free(evsel->per_pkg_mask); evsel->per_pkg_mask = NULL; - zfree(&evsel->metric_events); if (evsel__priv_destructor) evsel__priv_destructor(evsel->priv); perf_evsel__object.fini(evsel); @@ -1940,16 +1943,19 @@ bool __evsel__match(const struct evsel *evsel, u32 type, u64 config) u32 e_type = evsel->core.attr.type; u64 e_config = evsel->core.attr.config; - if (e_type != type) { - return type == PERF_TYPE_HARDWARE && evsel->pmu && evsel->pmu->is_core && - evsel->alternate_hw_config == config; - } - - if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) && - perf_pmus__supports_extended_type()) + if (e_type == type && e_config == config) + return true; + if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE) + return false; + if ((e_type == PERF_TYPE_HARDWARE || e_type == PERF_TYPE_HW_CACHE) && + perf_pmus__supports_extended_type()) e_config &= PERF_HW_EVENT_MASK; - - return e_config == config; + if (e_type == type && e_config == config) + return true; + if (type == PERF_TYPE_HARDWARE && evsel->pmu && evsel->pmu->is_core && + evsel->alternate_hw_config == config) + return true; + return false; } int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread) @@ -2198,6 +2204,10 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callchain) + evsel->core.attr.defer_callchain = 0; + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_output) + evsel->core.attr.defer_output = 0; if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit && (evsel->core.attr.sample_type & PERF_SAMPLE_READ)) evsel->core.attr.inherit = 0; @@ -2472,8 +2482,15 @@ static bool evsel__detect_missing_features(struct evsel *evsel, struct perf_cpu /* Please add new feature detection here. */ + attr.defer_callchain = true; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.defer_callchain = true; + pr_debug2("switching off deferred callchain support\n"); + attr.defer_callchain = false; + attr.inherit = true; - attr.sample_type = PERF_SAMPLE_READ; + attr.sample_type = PERF_SAMPLE_READ | PERF_SAMPLE_TID; if (has_attr_feature(&attr, /*flags=*/0)) goto found; perf_missing_features.inherit_sample_read = true; @@ -2583,6 +2600,10 @@ found: errno = old_errno; check: + if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) && + perf_missing_features.defer_callchain) + return true; + if (evsel->core.attr.inherit && (evsel->core.attr.sample_type & PERF_SAMPLE_READ) && perf_missing_features.inherit_sample_read) @@ -3088,6 +3109,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, data->data_src = PERF_MEM_DATA_SRC_NONE; data->vcpu = -1; + if (event->header.type == PERF_RECORD_CALLCHAIN_DEFERRED) { + const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); + + data->callchain = (struct ip_callchain *)&event->callchain_deferred.nr; + if (data->callchain->nr > max_callchain_nr) + return -EFAULT; + + data->deferred_cookie = event->callchain_deferred.cookie; + + if (evsel->core.attr.sample_id_all) + perf_evsel__parse_id_sample(evsel, event, data); + return 0; + } + if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->core.attr.sample_id_all) return 0; @@ -3212,12 +3247,25 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_CALLCHAIN) { const u64 max_callchain_nr = UINT64_MAX / sizeof(u64); + u64 callchain_nr; OVERFLOW_CHECK_u64(array); data->callchain = (struct ip_callchain *)array++; - if (data->callchain->nr > max_callchain_nr) + callchain_nr = data->callchain->nr; + if (callchain_nr > max_callchain_nr) return -EFAULT; - sz = data->callchain->nr * sizeof(u64); + sz = callchain_nr * sizeof(u64); + /* + * Save the cookie for the deferred user callchain. The last 2 + * entries in the callchain should be the context marker and the + * cookie. The cookie will be used to match PERF_RECORD_ + * CALLCHAIN_DEFERRED later. + */ + if (evsel->core.attr.defer_callchain && callchain_nr >= 2 && + data->callchain->ips[callchain_nr - 2] == PERF_CONTEXT_USER_DEFERRED) { + data->deferred_cookie = data->callchain->ips[callchain_nr - 1]; + data->deferred_callchain = true; + } OVERFLOW_CHECK(array, sz, max_size); array = (void *)array + sz; } @@ -3973,6 +4021,9 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist) if (evsel__is_retire_lat(evsel)) return 0; + if (perf_pmu__kind(evsel->pmu) != PERF_PMU_KIND_PE) + return 0; + for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) { for (thread = 0; thread < xyarray__max_y(evsel->core.fd); thread++) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index f8de0f9a719b..a08130ff2e47 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -100,7 +100,6 @@ struct evsel { * metric fields are similar, but needs more care as they can have * references to other metric (evsel). */ - struct evsel **metric_events; struct evsel *metric_leader; void *handler; @@ -123,6 +122,7 @@ struct evsel { bool reset_group; bool needs_auxtrace_mmap; bool default_metricgroup; /* A member of the Default metricgroup */ + bool default_show_events; /* If a default group member, show the event */ bool needs_uniquify; struct hashmap *per_pkg_mask; int err; @@ -221,6 +221,7 @@ struct perf_missing_features { bool branch_counters; bool aux_action; bool inherit_sample_read; + bool defer_callchain; }; extern struct perf_missing_features perf_missing_features; diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index 103984b29b1e..10f1a03c2860 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -168,7 +168,10 @@ int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment, node_al.addr = addr; node_al.map = map__get(map); - if (print_symoffset) { + if (sample->deferred_callchain && + sample->deferred_cookie == node->ip) { + printed += fprintf(fp, "(cookie)"); + } else if (print_symoffset) { printed += __symbol__fprintf_symname_offs(sym, &node_al, print_unknown_as_addr, true, fp); diff --git a/tools/perf/util/evswitch.c b/tools/perf/util/evswitch.c index 40cb56a9347d..d4c06a3f825a 100644 --- a/tools/perf/util/evswitch.c +++ b/tools/perf/util/evswitch.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only // Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> +#include <errno.h> #include "evswitch.h" #include "evlist.h" diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 7fda0ff89c16..465fe2e9bbbe 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -401,14 +401,12 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx if (ev != TOOL_PMU__EVENT_NONE) { u64 count; - if (tool_pmu__read_event(ev, /*evsel=*/NULL, &count)) + if (tool_pmu__read_event(ev, /*evsel=*/NULL, + ctx->system_wide, ctx->user_requested_cpu_list, + &count)) result = count; else pr_err("Failure to read '%s'", literal); - - } else if (!strcmp("#core_wide", literal)) { - result = core_wide(ctx->system_wide, ctx->user_requested_cpu_list) - ? 1.0 : 0.0; } else { pr_err("Unrecognized literal '%s'", literal); } diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index 591548b10e34..a1cd5196f4ec 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -173,6 +173,8 @@ jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym, Elf_Shdr *shdr; uint64_t eh_frame_base_offset; char *strsym = NULL; + void *build_id_data = NULL, *tmp; + int build_id_data_len; int symlen; int retval = -1; @@ -251,6 +253,14 @@ jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym, shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; shdr->sh_entsize = 0; + build_id_data = malloc(csize); + if (build_id_data == NULL) { + warnx("cannot allocate build-id data"); + goto error; + } + memcpy(build_id_data, code, csize); + build_id_data_len = csize; + /* * Setup .eh_frame_hdr and .eh_frame */ @@ -334,6 +344,15 @@ jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym, shdr->sh_entsize = sizeof(Elf_Sym); shdr->sh_link = unwinding ? 6 : 4; /* index of .strtab section */ + tmp = realloc(build_id_data, build_id_data_len + sizeof(symtab)); + if (tmp == NULL) { + warnx("cannot allocate build-id data"); + goto error; + } + memcpy(tmp + build_id_data_len, symtab, sizeof(symtab)); + build_id_data = tmp; + build_id_data_len += sizeof(symtab); + /* * setup symbols string table * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry @@ -376,6 +395,15 @@ jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym, shdr->sh_flags = 0; shdr->sh_entsize = 0; + tmp = realloc(build_id_data, build_id_data_len + symlen); + if (tmp == NULL) { + warnx("cannot allocate build-id data"); + goto error; + } + memcpy(tmp + build_id_data_len, strsym, symlen); + build_id_data = tmp; + build_id_data_len += symlen; + /* * setup build-id section */ @@ -394,7 +422,7 @@ jit_write_elf(int fd, uint64_t load_addr __maybe_unused, const char *sym, /* * build-id generation */ - sha1(code, csize, bnote.build_id); + sha1(build_id_data, build_id_data_len, bnote.build_id); bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */ bnote.desc.descsz = sizeof(bnote.build_id); bnote.desc.type = NT_GNU_BUILD_ID; @@ -439,7 +467,7 @@ error: (void)elf_end(e); free(strsym); - + free(build_id_data); return retval; } diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4f2a6e10ed5c..f5cad377c99e 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -335,7 +335,6 @@ static int write_build_id(struct feat_fd *ff, pr_debug("failed to write buildid table\n"); return err; } - perf_session__cache_build_ids(session); return 0; } @@ -1022,12 +1021,9 @@ static int write_bpf_prog_info(struct feat_fd *ff, down_read(&env->bpf_progs.lock); - if (env->bpf_progs.infos_cnt == 0) - goto out; - ret = do_write(ff, &env->bpf_progs.infos_cnt, sizeof(env->bpf_progs.infos_cnt)); - if (ret < 0) + if (ret < 0 || env->bpf_progs.infos_cnt == 0) goto out; root = &env->bpf_progs.infos; @@ -1067,13 +1063,10 @@ static int write_bpf_btf(struct feat_fd *ff, down_read(&env->bpf_progs.lock); - if (env->bpf_progs.btfs_cnt == 0) - goto out; - ret = do_write(ff, &env->bpf_progs.btfs_cnt, sizeof(env->bpf_progs.btfs_cnt)); - if (ret < 0) + if (ret < 0 || env->bpf_progs.btfs_cnt == 0) goto out; root = &env->bpf_progs.btfs; @@ -1561,7 +1554,7 @@ static int __write_pmu_caps(struct feat_fd *ff, struct perf_pmu *pmu, static int write_cpu_pmu_caps(struct feat_fd *ff, struct evlist *evlist __maybe_unused) { - struct perf_pmu *cpu_pmu = perf_pmus__find("cpu"); + struct perf_pmu *cpu_pmu = perf_pmus__find_core_pmu(); int ret; if (!cpu_pmu) @@ -4541,7 +4534,8 @@ int perf_event__process_event_update(const struct perf_tool *tool __maybe_unused } #ifdef HAVE_LIBTRACEEVENT -int perf_event__process_tracing_data(struct perf_session *session, +int perf_event__process_tracing_data(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { ssize_t size_read, padding, size = event->tracing_data.size; @@ -4589,7 +4583,8 @@ int perf_event__process_tracing_data(struct perf_session *session, } #endif -int perf_event__process_build_id(struct perf_session *session, +int perf_event__process_build_id(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { __event_process_build_id(&event->build_id, diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index d16dfceccd74..c058021c3150 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -178,10 +178,12 @@ int perf_event__process_event_update(const struct perf_tool *tool, size_t perf_event__fprintf_attr(union perf_event *event, FILE *fp); size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp); #ifdef HAVE_LIBTRACEEVENT -int perf_event__process_tracing_data(struct perf_session *session, +int perf_event__process_tracing_data(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); #endif -int perf_event__process_build_id(struct perf_session *session, +int perf_event__process_build_id(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); bool is_perf_magic(u64 magic); diff --git a/tools/perf/util/hisi-ptt-decoder/Build b/tools/perf/util/hisi-ptt-decoder/Build index 3298f7b7e308..2ee0eb731656 100644 --- a/tools/perf/util/hisi-ptt-decoder/Build +++ b/tools/perf/util/hisi-ptt-decoder/Build @@ -1 +1 @@ -perf-util-$(CONFIG_AUXTRACE) += hisi-ptt-pkt-decoder.o +perf-util-y += hisi-ptt-pkt-decoder.o diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 64ff427040c3..ef4b569f7df4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -608,10 +608,8 @@ err_infos: map_symbol__exit(&he->branch_info->to.ms); zfree(&he->branch_info); } - if (he->mem_info) { - map_symbol__exit(&mem_info__iaddr(he->mem_info)->ms); - map_symbol__exit(&mem_info__daddr(he->mem_info)->ms); - } + if (he->mem_info) + mem_info__zput(he->mem_info); err: map_symbol__exit(&he->ms); zfree(&he->stat_acc); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index c64005278687..1d5ea632ca4e 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -709,16 +709,18 @@ struct block_hist { struct hist_entry he; }; +#define NO_ADDR 0 + #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" void attr_to_script(char *buf, struct perf_event_attr *attr); int __hist_entry__tui_annotate(struct hist_entry *he, struct map_symbol *ms, struct evsel *evsel, - struct hist_browser_timer *hbt); + struct hist_browser_timer *hbt, u64 al_addr); int hist_entry__tui_annotate(struct hist_entry *he, struct evsel *evsel, - struct hist_browser_timer *hbt); + struct hist_browser_timer *hbt, u64 al_addr); int evlist__tui_browse_hists(struct evlist *evlist, const char *help, struct hist_browser_timer *hbt, float min_pcnt, struct perf_env *env, bool warn_lost_event); @@ -746,14 +748,16 @@ int evlist__tui_browse_hists(struct evlist *evlist __maybe_unused, static inline int __hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct map_symbol *ms __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused, + u64 al_addr __maybe_unused) { return 0; } static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct evsel *evsel __maybe_unused, - struct hist_browser_timer *hbt __maybe_unused) + struct hist_browser_timer *hbt __maybe_unused, + u64 al_addr __maybe_unused) { return 0; } diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c index 416dfea9ffff..279d6b1a47f0 100644 --- a/tools/perf/util/hwmon_pmu.c +++ b/tools/perf/util/hwmon_pmu.c @@ -376,7 +376,7 @@ struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, const char *hwmon_dir, perf_pmu__delete(&hwm->pmu); return NULL; } - hwm->pmu.cpus = perf_cpu_map__new("0"); + hwm->pmu.cpus = perf_cpu_map__new_int(0); if (!hwm->pmu.cpus) { perf_pmu__delete(&hwm->pmu); return NULL; @@ -742,8 +742,7 @@ int perf_pmus__read_hwmon_pmus(struct list_head *pmus) continue; } io__init(&io, name_fd, buf2, sizeof(buf2)); - io__getline(&io, &line, &line_len); - if (line_len > 0 && line[line_len - 1] == '\n') + if (io__getline(&io, &line, &line_len) > 0 && line[line_len - 1] == '\n') line[line_len - 1] = '\0'; hwmon_pmu__new(pmus, buf, class_hwmon_ent->d_name, line); close(name_fd); diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 3625c6224750..382255393fb3 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -777,9 +777,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, attr.sample_id_all = evsel->core.attr.sample_id_all; attr.read_format = evsel->core.attr.read_format; - id = evsel->core.id[0] + 1000000000; - if (!id) - id = 1; + id = auxtrace_synth_id_range_start(evsel); if (bts->synth_opts.branches) { attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 5b8f0149167d..8fd7e4330044 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -1,4 +1,4 @@ -perf-util-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o +perf-util-y += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o inat_tables_script = $(srctree)/tools/arch/x86/tools/gen-insn-attr-x86.awk inat_tables_maps = $(srctree)/tools/arch/x86/lib/x86-opcode-map.txt @@ -7,11 +7,7 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table $(call rule_mkdir) @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ -ifeq ($(SRCARCH),x86) - perf-util-y += inat.o insn.o -else - perf-util-$(CONFIG_AUXTRACE) += inat.o insn.o -endif +perf-util-y += inat.o insn.o $(OUTPUT)util/intel-pt-decoder/inat.o: $(srctree)/tools/arch/x86/lib/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c $(call rule_mkdir) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 9b1011fe4826..fc9eec8b54b8 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -3987,9 +3987,7 @@ static int intel_pt_synth_events(struct intel_pt *pt, attr.sample_id_all = evsel->core.attr.sample_id_all; attr.read_format = evsel->core.attr.read_format; - id = evsel->core.id[0] + 1000000000; - if (!id) - id = 1; + id = auxtrace_synth_id_range_start(evsel); if (pt->synth_opts.branches) { attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c index 8c9aee157ec4..3c958d738ca6 100644 --- a/tools/perf/util/intel-tpebs.c +++ b/tools/perf/util/intel-tpebs.c @@ -25,6 +25,7 @@ #include "stat.h" #include <sys/stat.h> #include <sys/file.h> +#include <errno.h> #include <poll.h> #include <math.h> @@ -216,7 +217,8 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused, return 0; } -static int process_feature_event(struct perf_session *session, +static int process_feature_event(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { if (event->feat.feat_id < HEADER_LAST_FEATURE) diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index b062b1f234b6..f00814e37de9 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -233,7 +233,8 @@ jit_open(struct jit_buf_desc *jd, const char *name) /* * keep dirname for generating files and mmap records */ - strcpy(jd->dir, name); + strncpy(jd->dir, name, PATH_MAX); + jd->dir[PATH_MAX - 1] = '\0'; dirname(jd->dir); free(buf); @@ -546,6 +547,8 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr) if (dso) dso__set_hit(dso); + + dso__put(dso); } out: perf_sample__exit(&sample); diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index 53db3d56108b..a356b839c2ee 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -10,6 +10,7 @@ #include "symbol.h" #include "record.h" +#include <errno.h> #include <stdlib.h> #include <linux/zalloc.h> diff --git a/tools/perf/util/libbfd.c b/tools/perf/util/libbfd.c index 01147fbf73b3..cc0c474cbfaa 100644 --- a/tools/perf/util/libbfd.c +++ b/tools/perf/util/libbfd.c @@ -38,6 +38,39 @@ struct a2l_data { asymbol **syms; }; +static bool perf_bfd_lock(void *bfd_mutex) +{ + mutex_lock(bfd_mutex); + return true; +} + +static bool perf_bfd_unlock(void *bfd_mutex) +{ + mutex_unlock(bfd_mutex); + return true; +} + +static void perf_bfd_init(void) +{ + static struct mutex bfd_mutex; + + mutex_init_recursive(&bfd_mutex); + + if (bfd_init() != BFD_INIT_MAGIC) { + pr_err("Error initializing libbfd\n"); + return; + } + if (!bfd_thread_init(perf_bfd_lock, perf_bfd_unlock, &bfd_mutex)) + pr_err("Error initializing libbfd threading\n"); +} + +static void ensure_bfd_init(void) +{ + static pthread_once_t bfd_init_once = PTHREAD_ONCE_INIT; + + pthread_once(&bfd_init_once, perf_bfd_init); +} + static int bfd_error(const char *string) { const char *errmsg; @@ -132,6 +165,7 @@ static struct a2l_data *addr2line_init(const char *path) bfd *abfd; struct a2l_data *a2l = NULL; + ensure_bfd_init(); abfd = bfd_openr(path, NULL); if (abfd == NULL) return NULL; @@ -288,6 +322,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) bfd *abfd; u64 start, len; + ensure_bfd_init(); abfd = bfd_openr(debugfile, NULL); if (!abfd) return -1; @@ -383,16 +418,22 @@ out_close: return err; } -int libbfd__read_build_id(const char *filename, struct build_id *bid, bool block) +int libbfd__read_build_id(const char *filename, struct build_id *bid) { size_t size = sizeof(bid->data); int err = -1, fd; bfd *abfd; - fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK)); + if (!filename) + return -EFAULT; + if (!is_regular_file(filename)) + return -EWOULDBLOCK; + + fd = open(filename, O_RDONLY); if (fd < 0) return -1; + ensure_bfd_init(); abfd = bfd_fdopenr(filename, /*target=*/NULL, fd); if (!abfd) return -1; @@ -421,6 +462,7 @@ int libbfd_filename__read_debuglink(const char *filename, char *debuglink, asection *section; bfd *abfd; + ensure_bfd_init(); abfd = bfd_openr(filename, NULL); if (!abfd) return -1; @@ -480,6 +522,7 @@ int symbol__disassemble_bpf_libbfd(struct symbol *sym __maybe_unused, memset(tpath, 0, sizeof(tpath)); perf_exe(tpath, sizeof(tpath)); + ensure_bfd_init(); bfdf = bfd_openr(tpath, NULL); if (bfdf == NULL) abort(); diff --git a/tools/perf/util/libbfd.h b/tools/perf/util/libbfd.h index e300f171d1bd..953886f3d62f 100644 --- a/tools/perf/util/libbfd.h +++ b/tools/perf/util/libbfd.h @@ -25,7 +25,7 @@ void dso__free_a2l_libbfd(struct dso *dso); int symbol__disassemble_libbfd(const char *filename, struct symbol *sym, struct annotate_args *args); -int libbfd__read_build_id(const char *filename, struct build_id *bid, bool block); +int libbfd__read_build_id(const char *filename, struct build_id *bid); int libbfd_filename__read_debuglink(const char *filename, char *debuglink, size_t size); @@ -59,8 +59,7 @@ static inline int symbol__disassemble_libbfd(const char *filename __always_unuse } static inline int libbfd__read_build_id(const char *filename __always_unused, - struct build_id *bid __always_unused, - bool block __always_unused) + struct build_id *bid __always_unused) { return -1; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b5dd42588c91..841b711d970e 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2124,6 +2124,7 @@ static int add_callchain_ip(struct thread *thread, *cpumode = PERF_RECORD_MISC_KERNEL; break; case PERF_CONTEXT_USER: + case PERF_CONTEXT_USER_DEFERRED: *cpumode = PERF_RECORD_MISC_USER; break; default: diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 779f6230130a..c321d4f4d846 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -931,8 +931,9 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) return err; } else { struct map *next = NULL; + unsigned int nr_maps = maps__nr_maps(maps); - if (i + 1 < maps__nr_maps(maps)) + if (i + 1 < nr_maps) next = maps_by_address[i + 1]; if (!next || map__start(next) >= map__end(new)) { @@ -953,7 +954,24 @@ static int __maps__fixup_overlap_and_insert(struct maps *maps, struct map *new) check_invariants(maps); return err; } - __maps__remove(maps, pos); + /* + * pos fully covers the previous mapping so remove + * it. The following is an inlined version of + * maps__remove that reuses the already computed + * indices. + */ + map__put(maps_by_address[i]); + memmove(&maps_by_address[i], + &maps_by_address[i + 1], + (nr_maps - i - 1) * sizeof(*maps_by_address)); + + if (maps_by_name) { + map__put(maps_by_name[ni]); + memmove(&maps_by_name[ni], + &maps_by_name[ni + 1], + (nr_maps - ni - 1) * sizeof(*maps_by_name)); + } + --RC_CHK_ACCESS(maps)->nr_maps; check_invariants(maps); /* * Maps are ordered but no need to increase `i` as the diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 80b3069427bc..0b49fce251fc 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -303,12 +303,15 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **eve } if (cpu_map) { - if (!perf_cpu_map__equal(cpu_map, cpu_map__online())) { + struct perf_cpu_map *online = cpu_map__online(); + + if (!perf_cpu_map__equal(cpu_map, online)) { char buf[200]; cpu_map__snprint(cpu_map, buf, sizeof(buf)); pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf); } + perf_cpu_map__put(online); perf_cpu_map__put(cpu_map); } diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 595b83142d2c..25c75fdbfc52 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -152,6 +152,8 @@ struct metric { * Should events of the metric be grouped? */ bool group_events; + /** Show events even if in the Default metric group. */ + bool default_show_events; /** * Parsed events for the metric. Optional as events may be taken from a * different metric whose group contains all the IDs necessary for this @@ -255,6 +257,7 @@ static struct metric *metric__new(const struct pmu_metric *pm, m->pctx->sctx.runtime = runtime; m->pctx->sctx.system_wide = system_wide; m->group_events = !metric_no_group && metric__group_events(pm, metric_no_threshold); + m->default_show_events = pm->default_show_events; m->metric_refs = NULL; m->evlist = NULL; @@ -424,10 +427,18 @@ int metricgroup__for_each_metric(const struct pmu_metrics_table *table, pmu_metr .fn = fn, .data = data, }; + const struct pmu_metrics_table *tables[2] = { + table, + pmu_metrics_table__default(), + }; + + for (size_t i = 0; i < ARRAY_SIZE(tables); i++) { + int ret; - if (table) { - int ret = pmu_metrics_table__for_each_metric(table, fn, data); + if (!tables[i]) + continue; + ret = pmu_metrics_table__for_each_metric(tables[i], fn, data); if (ret) return ret; } @@ -1323,6 +1334,51 @@ err_out: return ret; } +/* How many times will a given evsel be used in a set of metrics? */ +static int count_uses(struct list_head *metric_list, struct evsel *evsel) +{ + const char *metric_id = evsel__metric_id(evsel); + struct metric *m; + int uses = 0; + + list_for_each_entry(m, metric_list, nd) { + if (hashmap__find(m->pctx->ids, metric_id, NULL)) + uses++; + } + return uses; +} + +/* + * Select the evsel that stat-display will use to trigger shadow/metric + * printing. Pick the least shared non-tool evsel, encouraging metrics to be + * with a hardware counter that is specific to them. + */ +static struct evsel *pick_display_evsel(struct list_head *metric_list, + struct evsel **metric_events) +{ + struct evsel *selected = metric_events[0]; + size_t selected_uses; + bool selected_is_tool; + + if (!selected) + return NULL; + + selected_uses = count_uses(metric_list, selected); + selected_is_tool = evsel__is_tool(selected); + for (int i = 1; metric_events[i]; i++) { + struct evsel *candidate = metric_events[i]; + size_t candidate_uses = count_uses(metric_list, candidate); + + if ((selected_is_tool && !evsel__is_tool(candidate)) || + (candidate_uses < selected_uses)) { + selected = candidate; + selected_uses = candidate_uses; + selected_is_tool = evsel__is_tool(selected); + } + } + return selected; +} + static int parse_groups(struct evlist *perf_evlist, const char *pmu, const char *str, bool metric_no_group, @@ -1430,7 +1486,8 @@ static int parse_groups(struct evlist *perf_evlist, goto out; } - me = metricgroup__lookup(&perf_evlist->metric_events, metric_events[0], + me = metricgroup__lookup(&perf_evlist->metric_events, + pick_display_evsel(&metric_list, metric_events), /*create=*/true); expr = malloc(sizeof(struct metric_expr)); @@ -1455,9 +1512,20 @@ static int parse_groups(struct evlist *perf_evlist, if (!expr->metric_name) { ret = -ENOMEM; + free(expr); free(metric_events); goto out; } + if (m->default_show_events) { + struct evsel *pos; + + for (int i = 0; metric_events[i]; i++) + metric_events[i]->default_show_events = true; + evlist__for_each_entry(metric_evlist, pos) { + if (pos->metric_leader && pos->metric_leader->default_show_events) + pos->default_show_events = true; + } + } expr->metric_threshold = m->metric_threshold; expr->metric_unit = m->metric_unit; expr->metric_events = metric_events; @@ -1534,19 +1602,22 @@ static int metricgroup__has_metric_or_groups_callback(const struct pmu_metric *p bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups) { - const struct pmu_metrics_table *table = pmu_metrics_table__find(); + const struct pmu_metrics_table *tables[2] = { + pmu_metrics_table__find(), + pmu_metrics_table__default(), + }; struct metricgroup__has_metric_data data = { .pmu = pmu, .metric_or_groups = metric_or_groups, }; - if (!table) - return false; - - return pmu_metrics_table__for_each_metric(table, - metricgroup__has_metric_or_groups_callback, - &data) - ? true : false; + for (size_t i = 0; i < ARRAY_SIZE(tables); i++) { + if (pmu_metrics_table__for_each_metric(tables[i], + metricgroup__has_metric_or_groups_callback, + &data)) + return true; + } + return false; } static int metricgroup__topdown_max_level_callback(const struct pmu_metric *pm, @@ -1607,6 +1678,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, pr_debug("copying metric event for cgroup '%s': %s (idx=%d)\n", cgrp ? cgrp->name : "root", evsel->name, evsel->core.idx); + new_me->is_default = old_me->is_default; list_for_each_entry(old_expr, &old_me->head, nd) { new_expr = malloc(sizeof(*new_expr)); if (!new_expr) @@ -1620,6 +1692,7 @@ int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp, new_expr->metric_unit = old_expr->metric_unit; new_expr->runtime = old_expr->runtime; + new_expr->default_metricgroup_name = old_expr->default_metricgroup_name; if (old_expr->metric_refs) { /* calculate number of metric_events */ diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 324880b2ed8f..4be6bfc13c46 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -16,7 +16,7 @@ struct cgroup; /** * A node in a rblist keyed by the evsel. The global rblist of metric events - * generally exists in perf_stat_config. The evsel is looked up in the rblist + * generally exists in evlist. The evsel is looked up in the rblist * yielding a list of metric_expr. */ struct metric_event { diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index a34726219af3..b69f926d314b 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -7,6 +7,7 @@ */ #include <sys/mman.h> +#include <errno.h> #include <inttypes.h> #include <asm/bug.h> #include <linux/zalloc.h> diff --git a/tools/perf/util/mutex.c b/tools/perf/util/mutex.c index bca7f0717f35..7aa1f3f55a7d 100644 --- a/tools/perf/util/mutex.c +++ b/tools/perf/util/mutex.c @@ -17,7 +17,7 @@ static void check_err(const char *fn, int err) #define CHECK_ERR(err) check_err(__func__, err) -static void __mutex_init(struct mutex *mtx, bool pshared) +static void __mutex_init(struct mutex *mtx, bool pshared, bool recursive) { pthread_mutexattr_t attr; @@ -27,21 +27,27 @@ static void __mutex_init(struct mutex *mtx, bool pshared) /* In normal builds enable error checking, such as recursive usage. */ CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK)); #endif + if (recursive) + CHECK_ERR(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)); if (pshared) CHECK_ERR(pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)); - CHECK_ERR(pthread_mutex_init(&mtx->lock, &attr)); CHECK_ERR(pthread_mutexattr_destroy(&attr)); } void mutex_init(struct mutex *mtx) { - __mutex_init(mtx, /*pshared=*/false); + __mutex_init(mtx, /*pshared=*/false, /*recursive=*/false); } void mutex_init_pshared(struct mutex *mtx) { - __mutex_init(mtx, /*pshared=*/true); + __mutex_init(mtx, /*pshared=*/true, /*recursive=*/false); +} + +void mutex_init_recursive(struct mutex *mtx) +{ + __mutex_init(mtx, /*pshared=*/false, /*recursive=*/true); } void mutex_destroy(struct mutex *mtx) diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h index 38458f00846f..70232d8d094f 100644 --- a/tools/perf/util/mutex.h +++ b/tools/perf/util/mutex.h @@ -104,6 +104,8 @@ void mutex_init(struct mutex *mtx); * process-private attribute. */ void mutex_init_pshared(struct mutex *mtx); +/* Initializes a mutex that may be recursively held on the same thread. */ +void mutex_init_recursive(struct mutex *mtx); void mutex_destroy(struct mutex *mtx); void mutex_lock(struct mutex *mtx) EXCLUSIVE_LOCK_FUNCTION(*mtx); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index da73d686f6b9..17c1c36a7bf9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -30,6 +30,7 @@ #include "util/event.h" #include "util/bpf-filter.h" #include "util/stat.h" +#include "util/tool_pmu.h" #include "util/util.h" #include "tracepoint.h" #include <api/fs/tracing_path.h> @@ -40,49 +41,7 @@ static int get_config_terms(const struct parse_events_terms *head_config, struct list_head *head_terms); static int parse_events_terms__copy(const struct parse_events_terms *src, struct parse_events_terms *dest); - -const struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = { - .symbol = "cpu-cycles", - .alias = "cycles", - }, - [PERF_COUNT_HW_INSTRUCTIONS] = { - .symbol = "instructions", - .alias = "", - }, - [PERF_COUNT_HW_CACHE_REFERENCES] = { - .symbol = "cache-references", - .alias = "", - }, - [PERF_COUNT_HW_CACHE_MISSES] = { - .symbol = "cache-misses", - .alias = "", - }, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { - .symbol = "branch-instructions", - .alias = "branches", - }, - [PERF_COUNT_HW_BRANCH_MISSES] = { - .symbol = "branch-misses", - .alias = "", - }, - [PERF_COUNT_HW_BUS_CYCLES] = { - .symbol = "bus-cycles", - .alias = "", - }, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = { - .symbol = "stalled-cycles-frontend", - .alias = "idle-cycles-frontend", - }, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = { - .symbol = "stalled-cycles-backend", - .alias = "idle-cycles-backend", - }, - [PERF_COUNT_HW_REF_CPU_CYCLES] = { - .symbol = "ref-cycles", - .alias = "", - }, -}; +static int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb); static const char *const event_types[] = { [PERF_TYPE_HARDWARE] = "hardware", @@ -257,6 +216,8 @@ __add_event(struct list_head *list, int *idx, PERF_PMU_FORMAT_VALUE_CONFIG2, "config2"); perf_pmu__warn_invalid_config(pmu, attr->config3, name, PERF_PMU_FORMAT_VALUE_CONFIG3, "config3"); + perf_pmu__warn_invalid_config(pmu, attr->config4, name, + PERF_PMU_FORMAT_VALUE_CONFIG4, "config4"); } } /* @@ -269,8 +230,12 @@ __add_event(struct list_head *list, int *idx, if (pmu) { is_pmu_core = pmu->is_core; pmu_cpus = perf_cpu_map__get(pmu->cpus); - if (perf_cpu_map__is_empty(pmu_cpus)) - pmu_cpus = cpu_map__online(); + if (perf_cpu_map__is_empty(pmu_cpus)) { + if (perf_pmu__is_tool(pmu)) + pmu_cpus = tool_pmu__cpus(attr); + else + pmu_cpus = cpu_map__online(); + } } else { is_pmu_core = (attr->type == PERF_TYPE_HARDWARE || attr->type == PERF_TYPE_HW_CACHE); @@ -471,84 +436,7 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state, static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - struct evsel *first_wildcard_match, u64 alternate_hw_config); - -int parse_events_add_cache(struct list_head *list, int *idx, const char *name, - struct parse_events_state *parse_state, - struct parse_events_terms *parsed_terms) -{ - struct perf_pmu *pmu = NULL; - bool found_supported = false; - const char *config_name = get_config_name(parsed_terms); - const char *metric_id = get_config_metric_id(parsed_terms); - struct perf_cpu_map *cpus = get_config_cpu(parsed_terms, parse_state->fake_pmu); - int ret = 0; - struct evsel *first_wildcard_match = NULL; - - while ((pmu = perf_pmus__scan_for_event(pmu, name)) != NULL) { - LIST_HEAD(config_terms); - struct perf_event_attr attr; - - if (parse_events__filter_pmu(parse_state, pmu)) - continue; - - if (perf_pmu__have_event(pmu, name)) { - /* - * The PMU has the event so add as not a legacy cache - * event. - */ - ret = parse_events_add_pmu(parse_state, list, pmu, - parsed_terms, - first_wildcard_match, - /*alternate_hw_config=*/PERF_COUNT_HW_MAX); - if (ret) - goto out_err; - if (first_wildcard_match == NULL) - first_wildcard_match = - container_of(list->prev, struct evsel, core.node); - continue; - } - - if (!pmu->is_core) { - /* Legacy cache events are only supported by core PMUs. */ - continue; - } - - memset(&attr, 0, sizeof(attr)); - attr.type = PERF_TYPE_HW_CACHE; - - ret = parse_events__decode_legacy_cache(name, pmu->type, &attr.config); - if (ret) - return ret; - - found_supported = true; - - if (parsed_terms) { - if (config_attr(&attr, parsed_terms, parse_state, config_term_common)) { - ret = -EINVAL; - goto out_err; - } - if (get_config_terms(parsed_terms, &config_terms)) { - ret = -ENOMEM; - goto out_err; - } - } - - if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name, - metric_id, pmu, &config_terms, first_wildcard_match, - cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL) - ret = -ENOMEM; - - if (first_wildcard_match == NULL) - first_wildcard_match = container_of(list->prev, struct evsel, core.node); - free_config_terms(&config_terms); - if (ret) - goto out_err; - } -out_err: - perf_cpu_map__put(cpus); - return found_supported ? 0 : -EINVAL; -} + struct evsel *first_wildcard_match); static void tracepoint_error(struct parse_events_error *e, int err, const char *sys, const char *name, int column) @@ -819,6 +707,7 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type) [PARSE_EVENTS__TERM_TYPE_CONFIG1] = "config1", [PARSE_EVENTS__TERM_TYPE_CONFIG2] = "config2", [PARSE_EVENTS__TERM_TYPE_CONFIG3] = "config3", + [PARSE_EVENTS__TERM_TYPE_CONFIG4] = "config4", [PARSE_EVENTS__TERM_TYPE_NAME] = "name", [PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD] = "period", [PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ] = "freq", @@ -839,8 +728,8 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type) [PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE] = "aux-sample-size", [PARSE_EVENTS__TERM_TYPE_METRIC_ID] = "metric-id", [PARSE_EVENTS__TERM_TYPE_RAW] = "raw", - [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache", - [PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware", + [PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG] = "legacy-hardware-config", + [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG] = "legacy-cache-config", [PARSE_EVENTS__TERM_TYPE_CPU] = "cpu", [PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV] = "ratio-to-prev", }; @@ -868,6 +757,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_CONFIG3: + case PARSE_EVENTS__TERM_TYPE_CONFIG4: case PARSE_EVENTS__TERM_TYPE_NAME: case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: @@ -891,9 +781,9 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er case PARSE_EVENTS__TERM_TYPE_AUX_ACTION: case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: case PARSE_EVENTS__TERM_TYPE_RAW: - case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: - case PARSE_EVENTS__TERM_TYPE_HARDWARE: case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: default: if (!err) return false; @@ -938,6 +828,10 @@ do { \ CHECK_TYPE_VAL(NUM); attr->config3 = term->val.num; break; + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + CHECK_TYPE_VAL(NUM); + attr->config4 = term->val.num; + break; case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: CHECK_TYPE_VAL(NUM); break; @@ -1064,8 +958,8 @@ do { \ break; case PARSE_EVENTS__TERM_TYPE_DRV_CFG: case PARSE_EVENTS__TERM_TYPE_USER: - case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: - case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: default: parse_events_error__handle(parse_state->error, term->err_term, strdup(parse_events__term_type_str(term->type_term)), @@ -1088,61 +982,66 @@ do { \ #undef CHECK_TYPE_VAL } +static bool check_pmu_is_core(__u32 type, const struct parse_events_term *term, + struct parse_events_error *err) +{ + struct perf_pmu *pmu = NULL; + + /* Avoid loading all PMUs with perf_pmus__find_by_type, just scan the core ones. */ + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + if (pmu->type == type) + return true; + } + parse_events_error__handle(err, term->err_val, + strdup("needs a core PMU"), + NULL); + return false; +} + static int config_term_pmu(struct perf_event_attr *attr, struct parse_events_term *term, struct parse_events_state *parse_state) { - if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) { - struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); - - if (!pmu) { - char *err_str; - - if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0) - parse_events_error__handle(parse_state->error, term->err_term, - err_str, /*help=*/NULL); + if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG) { + if (check_type_val(term, parse_state->error, PARSE_EVENTS__TERM_TYPE_NUM)) + return -EINVAL; + if (term->val.num >= PERF_COUNT_HW_MAX) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("too big"), + NULL); return -EINVAL; } - /* - * Rewrite the PMU event to a legacy cache one unless the PMU - * doesn't support legacy cache events or the event is present - * within the PMU. - */ - if (perf_pmu__supports_legacy_cache(pmu) && - !perf_pmu__have_event(pmu, term->config)) { - attr->type = PERF_TYPE_HW_CACHE; - return parse_events__decode_legacy_cache(term->config, pmu->type, - &attr->config); - } else { - term->type_term = PARSE_EVENTS__TERM_TYPE_USER; - term->no_value = true; - } + if (!check_pmu_is_core(attr->type, term, parse_state->error)) + return -EINVAL; + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)attr->type << PERF_PMU_TYPE_SHIFT; + attr->type = PERF_TYPE_HARDWARE; + return 0; } - if (term->type_term == PARSE_EVENTS__TERM_TYPE_HARDWARE) { - struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type); - - if (!pmu) { - char *err_str; + if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG) { + int cache_type, cache_op, cache_result; - if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0) - parse_events_error__handle(parse_state->error, term->err_term, - err_str, /*help=*/NULL); + if (check_type_val(term, parse_state->error, PARSE_EVENTS__TERM_TYPE_NUM)) + return -EINVAL; + cache_type = term->val.num & 0xFF; + cache_op = (term->val.num >> 8) & 0xFF; + cache_result = (term->val.num >> 16) & 0xFF; + if ((term->val.num & ~0xFFFFFF) || + cache_type >= PERF_COUNT_HW_CACHE_MAX || + cache_op >= PERF_COUNT_HW_CACHE_OP_MAX || + cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) { + parse_events_error__handle(parse_state->error, term->err_val, + strdup("too big"), + NULL); return -EINVAL; } - /* - * If the PMU has a sysfs or json event prefer it over - * legacy. ARM requires this. - */ - if (perf_pmu__have_event(pmu, term->config)) { - term->type_term = PARSE_EVENTS__TERM_TYPE_USER; - term->no_value = true; - term->alternate_hw_config = true; - } else { - attr->type = PERF_TYPE_HARDWARE; - attr->config = term->val.num; - if (perf_pmus__supports_extended_type()) - attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; - } + if (!check_pmu_is_core(attr->type, term, parse_state->error)) + return -EINVAL; + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)attr->type << PERF_PMU_TYPE_SHIFT; + attr->type = PERF_TYPE_HW_CACHE; return 0; } if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER || @@ -1178,6 +1077,9 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_CONFIG3: + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: case PARSE_EVENTS__TERM_TYPE_NAME: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: @@ -1187,8 +1089,6 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_PERCORE: case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_RAW: - case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: - case PARSE_EVENTS__TERM_TYPE_HARDWARE: case PARSE_EVENTS__TERM_TYPE_CPU: case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: default: @@ -1321,11 +1221,12 @@ do { \ case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_CONFIG3: + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: case PARSE_EVENTS__TERM_TYPE_NAME: case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_RAW: - case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: - case PARSE_EVENTS__TERM_TYPE_HARDWARE: case PARSE_EVENTS__TERM_TYPE_CPU: default: break; @@ -1359,6 +1260,9 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head case PARSE_EVENTS__TERM_TYPE_CONFIG1: case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_CONFIG3: + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: case PARSE_EVENTS__TERM_TYPE_NAME: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: @@ -1379,8 +1283,6 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head case PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE: case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_RAW: - case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: - case PARSE_EVENTS__TERM_TYPE_HARDWARE: case PARSE_EVENTS__TERM_TYPE_CPU: case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: default: @@ -1505,8 +1407,9 @@ static bool config_term_percore(struct list_head *config_terms) static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - struct evsel *first_wildcard_match, u64 alternate_hw_config) + struct evsel *first_wildcard_match) { + u64 alternate_hw_config = PERF_COUNT_HW_MAX; struct perf_event_attr attr; struct perf_pmu_info info; struct evsel *evsel; @@ -1639,7 +1542,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, } int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - const char *event_name, u64 hw_config, + const char *event_name, const struct parse_events_terms *const_parsed_terms, struct list_head **listp, void *loc_) { @@ -1691,7 +1594,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, continue; if (!parse_events_add_pmu(parse_state, list, pmu, - &parsed_terms, first_wildcard_match, hw_config)) { + &parsed_terms, first_wildcard_match)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); @@ -1706,7 +1609,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, if (parse_state->fake_pmu) { if (!parse_events_add_pmu(parse_state, list, perf_pmus__fake_pmu(), &parsed_terms, - first_wildcard_match, hw_config)) { + first_wildcard_match)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); @@ -1748,15 +1651,13 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state /* Attempt to add to list assuming event_or_pmu is a PMU name. */ pmu = perf_pmus__find(event_or_pmu); if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, - first_wildcard_match, - /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) + first_wildcard_match)) return 0; if (parse_state->fake_pmu) { if (!parse_events_add_pmu(parse_state, *listp, perf_pmus__fake_pmu(), const_parsed_terms, - first_wildcard_match, - /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) + first_wildcard_match)) return 0; } @@ -1769,8 +1670,7 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state if (!parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, - first_wildcard_match, - /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) { + first_wildcard_match)) { ok++; parse_state->wild_card_pmus = true; } @@ -1784,7 +1684,7 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state /* Failure to add, assume event_or_pmu is an event name. */ zfree(listp); - if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, PERF_COUNT_HW_MAX, + if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, const_parsed_terms, listp, loc)) return 0; @@ -1957,7 +1857,6 @@ int parse_events__set_default_name(struct list_head *list, char *name) } static int parse_events__scanner(const char *str, - FILE *input, struct parse_events_state *parse_state) { YY_BUFFER_STATE buffer; @@ -1968,10 +1867,7 @@ static int parse_events__scanner(const char *str, if (ret) return ret; - if (str) - buffer = parse_events__scan_string(str, scanner); - else - parse_events_set_in(input, scanner); + buffer = parse_events__scan_string(str, scanner); #ifdef PARSER_DEBUG parse_events_debug = 1; @@ -1979,10 +1875,8 @@ static int parse_events__scanner(const char *str, #endif ret = parse_events_parse(parse_state, scanner); - if (str) { - parse_events__flush_buffer(buffer, scanner); - parse_events__delete_buffer(buffer, scanner); - } + parse_events__flush_buffer(buffer, scanner); + parse_events__delete_buffer(buffer, scanner); parse_events_lex_destroy(scanner); return ret; } @@ -1990,7 +1884,7 @@ static int parse_events__scanner(const char *str, /* * parse event config string, return a list of event terms. */ -int parse_events_terms(struct parse_events_terms *terms, const char *str, FILE *input) +int parse_events_terms(struct parse_events_terms *terms, const char *str) { struct parse_events_state parse_state = { .terms = NULL, @@ -1998,7 +1892,7 @@ int parse_events_terms(struct parse_events_terms *terms, const char *str, FILE * }; int ret; - ret = parse_events__scanner(str, input, &parse_state); + ret = parse_events__scanner(str, &parse_state); if (!ret) list_splice(&parse_state.terms->terms, &terms->terms); @@ -2095,14 +1989,18 @@ static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct li * event's index is used. An index may be forced for events that * must be in the same group, namely Intel topdown events. */ - if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)) { + if (lhs->dont_regroup) { + lhs_sort_idx = lhs_core->idx; + } else if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)) { lhs_sort_idx = *force_grouped_idx; } else { bool lhs_has_group = lhs_core->leader != lhs_core || lhs_core->nr_members > 1; lhs_sort_idx = lhs_has_group ? lhs_core->leader->idx : lhs_core->idx; } - if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)) { + if (rhs->dont_regroup) { + rhs_sort_idx = rhs_core->idx; + } else if (*force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)) { rhs_sort_idx = *force_grouped_idx; } else { bool rhs_has_group = rhs_core->leader != rhs_core || rhs_core->nr_members > 1; @@ -2200,10 +2098,10 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) */ idx = 0; list_for_each_entry(pos, list, core.node) { - const struct evsel *pos_leader = evsel__leader(pos); + struct evsel *pos_leader = evsel__leader(pos); const char *pos_pmu_name = pos->group_pmu_name; const char *cur_leader_pmu_name; - bool pos_force_grouped = force_grouped_idx != -1 && + bool pos_force_grouped = force_grouped_idx != -1 && !pos->dont_regroup && arch_evsel__must_be_in_group(pos); /* Reset index and nr_members. */ @@ -2217,8 +2115,8 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list) * groups can't span PMUs. */ if (!cur_leader || pos->dont_regroup) { - cur_leader = pos; - cur_leaders_grp = &pos->core; + cur_leader = pos->dont_regroup ? pos_leader : pos; + cur_leaders_grp = &cur_leader->core; if (pos_force_grouped) force_grouped_leader = pos; } @@ -2302,7 +2200,7 @@ int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filte }; int ret, ret2; - ret = parse_events__scanner(str, /*input=*/ NULL, &parse_state); + ret = parse_events__scanner(str, &parse_state); if (!ret && list_empty(&parse_state.list)) { WARN_ONCE(true, "WARNING: event parser found nothing\n"); @@ -2354,6 +2252,8 @@ int parse_event(struct evlist *evlist, const char *str) parse_events_error__init(&err); ret = parse_events(evlist, str, &err); + if (ret && verbose > 0) + parse_events_error__print(&err, str); parse_events_error__exit(&err); return ret; } @@ -2850,7 +2750,7 @@ void parse_events_terms__delete(struct parse_events_terms *terms) free(terms); } -int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb) +static int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb) { struct parse_events_term *term; bool first = true; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 8f8c8e7fbcf1..3577ab213730 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -9,7 +9,6 @@ #include <stdbool.h> #include <linux/types.h> #include <linux/perf_event.h> -#include <stdio.h> #include <string.h> #include <sys/types.h> @@ -60,6 +59,7 @@ enum parse_events__term_type { PARSE_EVENTS__TERM_TYPE_CONFIG1, PARSE_EVENTS__TERM_TYPE_CONFIG2, PARSE_EVENTS__TERM_TYPE_CONFIG3, + PARSE_EVENTS__TERM_TYPE_CONFIG4, PARSE_EVENTS__TERM_TYPE_NAME, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ, @@ -80,11 +80,11 @@ enum parse_events__term_type { PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE, PARSE_EVENTS__TERM_TYPE_METRIC_ID, PARSE_EVENTS__TERM_TYPE_RAW, - PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, - PARSE_EVENTS__TERM_TYPE_HARDWARE, PARSE_EVENTS__TERM_TYPE_CPU, PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV, -#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV + 1) + PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG, + PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG, +#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG + 1) }; struct parse_events_term { @@ -132,12 +132,6 @@ struct parse_events_term { * value is assumed to be 1. An event name also has no value. */ bool no_value; - /** - * @alternate_hw_config: config is the event name but num is an - * alternate PERF_TYPE_HARDWARE config value which is often nice for the - * sake of quick matching. - */ - bool alternate_hw_config; }; struct parse_events_error { @@ -199,8 +193,7 @@ void parse_events_term__delete(struct parse_events_term *term); void parse_events_terms__delete(struct parse_events_terms *terms); void parse_events_terms__init(struct parse_events_terms *terms); void parse_events_terms__exit(struct parse_events_terms *terms); -int parse_events_terms(struct parse_events_terms *terms, const char *str, FILE *input); -int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct strbuf *sb); +int parse_events_terms(struct parse_events_terms *terms, const char *str); struct parse_events_modifier { u8 precise; /* Number of repeated 'p' for precision. */ @@ -235,9 +228,6 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, u32 type, u64 config, const struct parse_events_terms *head_config, bool wildcard); -int parse_events_add_cache(struct list_head *list, int *idx, const char *name, - struct parse_events_state *parse_state, - struct parse_events_terms *parsed_terms); int parse_events__decode_legacy_cache(const char *name, int pmu_type, __u64 *config); int parse_events_add_breakpoint(struct parse_events_state *parse_state, struct list_head *list, @@ -249,7 +239,7 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, struct perf_pmu *pmu); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - const char *event_name, u64 hw_config, + const char *event_name, const struct parse_events_terms *const_parsed_terms, struct list_head **listp, void *loc); @@ -265,7 +255,6 @@ struct event_symbol { const char *symbol; const char *alias; }; -extern const struct event_symbol event_symbols_hw[]; char *parse_events_formats_error_string(char *additional_terms); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index d65eb32124c8..251ce4321878 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -73,11 +73,6 @@ static int quoted_str(yyscan_t scanner, int token) return token; } -static int lc_str(yyscan_t scanner, const struct parse_events_state *state) -{ - return str(scanner, state->match_legacy_cache_terms ? PE_LEGACY_CACHE : PE_NAME); -} - /* * This function is called when the parser gets two kind of input: * @@ -115,14 +110,6 @@ do { \ yyless(0); \ } while (0) -static int sym(yyscan_t scanner, int config) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - - yylval->num = config; - return PE_VALUE_SYM_HW; -} - static int term(yyscan_t scanner, enum parse_events__term_type type) { YYSTYPE *yylval = parse_events_get_lval(scanner); @@ -131,16 +118,6 @@ static int term(yyscan_t scanner, enum parse_events__term_type type) return PE_TERM; } -static int hw_term(yyscan_t scanner, int config) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - char *text = parse_events_get_text(scanner); - - yylval->hardware_term.str = strdup(text); - yylval->hardware_term.num = PERF_TYPE_HARDWARE + config; - return PE_TERM_HW; -} - static void modifiers_error(struct parse_events_state *parse_state, yyscan_t scanner, int pos, char mod_char, const char *mod_name) { @@ -251,8 +228,6 @@ drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)? */ modifier_event [ukhpPGHSDIWebRX]{1,17} modifier_bp [rwx]{1,3} -lc_type (L1-dcache|l1-d|l1d|L1-data|L1-icache|l1-i|l1i|L1-instruction|LLC|L2|dTLB|d-tlb|Data-TLB|iTLB|i-tlb|Instruction-TLB|branch|branches|bpu|btb|bpc|node) -lc_op_result (load|loads|read|store|stores|write|prefetch|prefetches|speculative-read|speculative-load|refs|Reference|ops|access|misses|miss) digit [0-9] non_digit [^0-9] @@ -312,6 +287,7 @@ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } config3 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG3); } +config4 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG4); } name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); } @@ -332,23 +308,12 @@ aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZ metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } cpu { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CPU); } ratio-to-prev { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV); } -cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); } -stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } -stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } -instructions { return hw_term(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); } -cache-references { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); } -cache-misses { return hw_term(yyscanner, PERF_COUNT_HW_CACHE_MISSES); } -branch-instructions|branches { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } -branch-misses { return hw_term(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); } -bus-cycles { return hw_term(yyscanner, PERF_COUNT_HW_BUS_CYCLES); } -ref-cycles { return hw_term(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); } +legacy-hardware-config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG); } +legacy-cache-config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG); } r{num_raw_hex} { return str(yyscanner, PE_RAW); } r0x{num_raw_hex} { return str(yyscanner, PE_RAW); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } -{lc_type} { return lc_str(yyscanner, _parse_state); } -{lc_type}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } -{lc_type}-{lc_op_result}-{lc_op_result} { return lc_str(yyscanner, _parse_state); } {num_dec} { return value(_parse_state, yyscanner, 10); } {num_hex} { return value(_parse_state, yyscanner, 16); } {term_name} { return str(yyscanner, PE_NAME); } @@ -387,20 +352,6 @@ r0x{num_raw_hex} { return str(yyscanner, PE_RAW); } <<EOF>> { BEGIN(INITIAL); } } -cpu-cycles|cycles { return sym(yyscanner, PERF_COUNT_HW_CPU_CYCLES); } -stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } -stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } -instructions { return sym(yyscanner, PERF_COUNT_HW_INSTRUCTIONS); } -cache-references { return sym(yyscanner, PERF_COUNT_HW_CACHE_REFERENCES); } -cache-misses { return sym(yyscanner, PERF_COUNT_HW_CACHE_MISSES); } -branch-instructions|branches { return sym(yyscanner, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); } -branch-misses { return sym(yyscanner, PERF_COUNT_HW_BRANCH_MISSES); } -bus-cycles { return sym(yyscanner, PERF_COUNT_HW_BUS_CYCLES); } -ref-cycles { return sym(yyscanner, PERF_COUNT_HW_REF_CPU_CYCLES); } - -{lc_type} { return str(yyscanner, PE_LEGACY_CACHE); } -{lc_type}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); } -{lc_type}-{lc_op_result}-{lc_op_result} { return str(yyscanner, PE_LEGACY_CACHE); } mem: { BEGIN(mem); return PE_PREFIX_MEM; } r{num_raw_hex} { return str(yyscanner, PE_RAW); } {num_dec} { return value(_parse_state, yyscanner, 10); } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index a2361c0040d7..c194de5ec1ec 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -55,22 +55,18 @@ static void free_list_evsel(struct list_head* list_evsel) %} %token PE_START_EVENTS PE_START_TERMS -%token PE_VALUE PE_VALUE_SYM_HW PE_TERM +%token PE_VALUE PE_TERM %token PE_EVENT_NAME %token PE_RAW PE_NAME %token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH -%token PE_LEGACY_CACHE %token PE_PREFIX_MEM %token PE_ERROR %token PE_DRV_CFG_TERM -%token PE_TERM_HW %type <num> PE_VALUE -%type <num> PE_VALUE_SYM_HW %type <mod> PE_MODIFIER_EVENT %type <term_type> PE_TERM %type <str> PE_RAW %type <str> PE_NAME -%type <str> PE_LEGACY_CACHE %type <str> PE_MODIFIER_BP %type <str> PE_EVENT_NAME %type <str> PE_DRV_CFG_TERM @@ -83,8 +79,6 @@ static void free_list_evsel(struct list_head* list_evsel) %type <list_terms> opt_pmu_config %destructor { parse_events_terms__delete ($$); } <list_terms> %type <list_evsel> event_pmu -%type <list_evsel> event_legacy_symbol -%type <list_evsel> event_legacy_cache %type <list_evsel> event_legacy_mem %type <list_evsel> event_legacy_tracepoint %type <list_evsel> event_legacy_numeric @@ -100,8 +94,6 @@ static void free_list_evsel(struct list_head* list_evsel) %destructor { free_list_evsel ($$); } <list_evsel> %type <tracepoint_name> tracepoint_name %destructor { free ($$.sys); free ($$.event); } <tracepoint_name> -%type <hardware_term> PE_TERM_HW -%destructor { free ($$.str); } <hardware_term> %union { @@ -116,10 +108,6 @@ static void free_list_evsel(struct list_head* list_evsel) char *sys; char *event; } tracepoint_name; - struct hardware_term { - char *str; - u64 num; - } hardware_term; } %% @@ -262,8 +250,6 @@ PE_EVENT_NAME event_def event_def event_def: event_pmu | - event_legacy_symbol | - event_legacy_cache sep_dc | event_legacy_mem sep_dc | event_legacy_tracepoint sep_dc | event_legacy_numeric sep_dc | @@ -288,7 +274,7 @@ PE_NAME sep_dc struct list_head *list; int err; - err = parse_events_multi_pmu_add(_parse_state, $1, PERF_COUNT_HW_MAX, NULL, &list, &@1); + err = parse_events_multi_pmu_add(_parse_state, $1, /*const_parsed_terms*/NULL, &list, &@1); if (err < 0) { struct parse_events_state *parse_state = _parse_state; struct parse_events_error *error = parse_state->error; @@ -304,66 +290,6 @@ PE_NAME sep_dc $$ = list; } -event_legacy_symbol: -PE_VALUE_SYM_HW '/' event_config '/' -{ - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYNOMEM; - err = parse_events_add_numeric(_parse_state, list, - PERF_TYPE_HARDWARE, $1, - $3, - /*wildcard=*/true); - parse_events_terms__delete($3); - if (err) { - free_list_evsel(list); - PE_ABORT(err); - } - $$ = list; -} -| -PE_VALUE_SYM_HW sep_slash_slash_dc -{ - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYNOMEM; - err = parse_events_add_numeric(_parse_state, list, - PERF_TYPE_HARDWARE, $1, - /*head_config=*/NULL, - /*wildcard=*/true); - if (err) - PE_ABORT(err); - $$ = list; -} - -event_legacy_cache: -PE_LEGACY_CACHE opt_event_config -{ - struct parse_events_state *parse_state = _parse_state; - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYNOMEM; - - err = parse_events_add_cache(list, &parse_state->idx, $1, parse_state, $2); - - parse_events_terms__delete($2); - free($1); - if (err) { - free_list_evsel(list); - PE_ABORT(err); - } - $$ = list; -} - event_legacy_mem: PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config { @@ -582,12 +508,7 @@ event_term $$ = head; } -name_or_raw: PE_RAW | PE_NAME | PE_LEGACY_CACHE -| -PE_TERM_HW -{ - $$ = $1.str; -} +name_or_raw: PE_RAW | PE_NAME event_term: PE_RAW @@ -629,19 +550,6 @@ name_or_raw '=' PE_VALUE $$ = term; } | -PE_LEGACY_CACHE -{ - struct parse_events_term *term; - int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, - $1, /*num=*/1, /*novalue=*/true, &@1, /*loc_val=*/NULL); - - if (err) { - free($1); - PE_ABORT(err); - } - $$ = term; -} -| PE_NAME { struct parse_events_term *term; @@ -655,20 +563,6 @@ PE_NAME $$ = term; } | -PE_TERM_HW -{ - struct parse_events_term *term; - int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE, - $1.str, $1.num & 255, /*novalue=*/false, - &@1, /*loc_val=*/NULL); - - if (err) { - free($1.str); - PE_ABORT(err); - } - $$ = term; -} -| PE_TERM '=' name_or_raw { struct parse_events_term *term; @@ -737,8 +631,6 @@ PE_DRV_CFG_TERM sep_dc: ':' | -sep_slash_slash_dc: '/' '/' | ':' | - %% void parse_events_error(YYLTYPE *loc, void *_parse_state, diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 1de3b69cdf4a..6ecf38314f01 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -59,10 +59,10 @@ out_delete: static bool perf_probe_api(setup_probe_fn_t fn) { - const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL}; + struct perf_pmu *pmu; struct perf_cpu_map *cpus; struct perf_cpu cpu; - int ret, i = 0; + int ret = 0; cpus = perf_cpu_map__new_online_cpus(); if (!cpus) @@ -70,12 +70,23 @@ static bool perf_probe_api(setup_probe_fn_t fn) cpu = perf_cpu_map__cpu(cpus, 0); perf_cpu_map__put(cpus); - do { - ret = perf_do_probe_api(fn, cpu, try[i++]); - if (!ret) - return true; - } while (ret == -EAGAIN && try[i]); - + ret = perf_do_probe_api(fn, cpu, "software/cpu-clock/u"); + if (!ret) + return true; + + pmu = perf_pmus__scan_core(/*pmu=*/NULL); + if (pmu) { + const char *try[] = {"cycles", "instructions", NULL}; + char buf[256]; + int i = 0; + + while (ret == -EAGAIN && try[i]) { + snprintf(buf, sizeof(buf), "%s/%s/u", pmu->name, try[i++]); + ret = perf_do_probe_api(fn, cpu, buf); + if (!ret) + return true; + } + } return false; } diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 66b666d9ce64..741c3d657a8b 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -343,6 +343,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(inherit_thread, p_unsigned); PRINT_ATTRf(remove_on_exec, p_unsigned); PRINT_ATTRf(sigtrap, p_unsigned); + PRINT_ATTRf(defer_callchain, p_unsigned); + PRINT_ATTRf(defer_output, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false); PRINT_ATTRf(bp_type, p_unsigned); diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index e5b3a2a5ddef..d9043f4afbe7 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -15,6 +15,7 @@ #include "util/strbuf.h" #include "util/thread_map.h" +#include <errno.h> #include <string.h> #include <linux/kernel.h> #include <perfmon/pfmlib_perf_event.h> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 3d1f975e8db9..956ea273c2c7 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -67,8 +67,13 @@ struct perf_pmu_alias { * json events. */ char *topic; - /** @terms: Owned list of the original parsed parameters. */ - struct parse_events_terms terms; + /** @terms: Owned copy of the event terms. */ + char *terms; + /** + * @legacy_terms: If the event aliases a legacy event, holds a copy + * ofthe legacy event string. + */ + char *legacy_terms; /** * @pmu_name: The name copied from the json struct pmu_event. This can * differ from the PMU name as it won't have suffixes. @@ -101,6 +106,12 @@ struct perf_pmu_alias { * default. */ bool deprecated; + /** + * @legacy_deprecated_checked: Legacy events may not be supported by the + * PMU need to be checked. If they aren't supported they are marked + * deprecated. + */ + bool legacy_deprecated_checked; /** @from_sysfs: Was the alias from sysfs or a json event? */ bool from_sysfs; /** @info_loaded: Have the scale, unit and other values been read from disk? */ @@ -429,7 +440,8 @@ static void perf_pmu_free_alias(struct perf_pmu_alias *alias) zfree(&alias->long_desc); zfree(&alias->topic); zfree(&alias->pmu_name); - parse_events_terms__exit(&alias->terms); + zfree(&alias->terms); + zfree(&alias->legacy_terms); free(alias); } @@ -522,6 +534,7 @@ static void read_alias_info(struct perf_pmu *pmu, struct perf_pmu_alias *alias) struct update_alias_data { struct perf_pmu *pmu; struct perf_pmu_alias *alias; + bool legacy; }; static int update_alias(const struct pmu_event *pe, @@ -537,8 +550,13 @@ static int update_alias(const struct pmu_event *pe, assign_str(pe->name, "topic", &data->alias->topic, pe->topic); data->alias->per_pkg = pe->perpkg; if (pe->event) { - parse_events_terms__exit(&data->alias->terms); - ret = parse_events_terms(&data->alias->terms, pe->event, /*input=*/NULL); + if (data->legacy) { + zfree(&data->alias->legacy_terms); + data->alias->legacy_terms = strdup(pe->event); + } else { + zfree(&data->alias->terms); + data->alias->terms = strdup(pe->event); + } } if (!ret && pe->unit) { char *unit; @@ -563,7 +581,7 @@ static int update_alias(const struct pmu_event *pe, } static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, - const char *desc, const char *val, FILE *val_fd, + const char *desc, const char *val, int val_fd, const struct pmu_event *pe, enum event_source src) { struct perf_pmu_alias *alias, *old_alias; @@ -590,7 +608,6 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, if (!alias) return -ENOMEM; - parse_events_terms__init(&alias->terms); alias->scale = 1.0; alias->unit[0] = '\0'; alias->per_pkg = perpkg; @@ -615,13 +632,22 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, if (ret) return ret; - ret = parse_events_terms(&alias->terms, val, val_fd); - if (ret) { - pr_err("Cannot parse alias %s: %d\n", val, ret); - free(alias); - return ret; - } + if (val_fd < 0) { + alias->terms = strdup(val); + } else { + char buf[256]; + struct io io; + size_t line_len; + io__init(&io, val_fd, buf, sizeof(buf)); + ret = io__getline(&io, &alias->terms, &line_len) < 0 ? -errno : 0; + if (ret) { + pr_err("Failed to read alias %s\n", name); + return ret; + } + if (line_len >= 1 && alias->terms[line_len - 1] == '\n') + alias->terms[line_len - 1] = '\0'; + } alias->name = strdup(name); alias->desc = desc ? strdup(desc) : NULL; alias->long_desc = long_desc ? strdup(long_desc) : NULL; @@ -638,15 +664,29 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, default: case EVENT_SRC_SYSFS: alias->from_sysfs = true; - if (pmu->events_table) { + if (pmu->events_table || pmu->is_core) { /* Update an event from sysfs with json data. */ struct update_alias_data data = { .pmu = pmu, .alias = alias, + .legacy = false, }; - if (pmu_events_table__find_event(pmu->events_table, pmu, name, - update_alias, &data) == 0) + if ((pmu_events_table__find_event(pmu->events_table, pmu, name, + update_alias, &data) == 0)) { + /* + * Override sysfs encodings with json encodings + * specific to the cpuid. + */ pmu->cpu_common_json_aliases++; + } + if (pmu->is_core) { + /* Add in legacy encodings. */ + data.legacy = true; + if (pmu_events_table__find_event( + perf_pmu__default_core_events_table(), + pmu, name, update_alias, &data) == 0) + pmu->cpu_common_json_aliases++; + } } pmu->sysfs_aliases++; break; @@ -694,7 +734,6 @@ static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd) while ((evt_ent = io_dir__readdir(&event_dir))) { char *name = evt_ent->d_name; int fd; - FILE *file; if (!strcmp(name, ".") || !strcmp(name, "..")) continue; @@ -710,17 +749,12 @@ static int __pmu_aliases_parse(struct perf_pmu *pmu, int events_dir_fd) pr_debug("Cannot open %s\n", name); continue; } - file = fdopen(fd, "r"); - if (!file) { - close(fd); - continue; - } if (perf_pmu__new_alias(pmu, name, /*desc=*/ NULL, - /*val=*/ NULL, file, /*pe=*/ NULL, + /*val=*/ NULL, fd, /*pe=*/ NULL, EVENT_SRC_SYSFS) < 0) pr_debug("Cannot set up %s\n", name); - fclose(file); + close(fd); } pmu->sysfs_aliases_loaded = true; @@ -767,29 +801,29 @@ static int pmu_aliases_parse_eager(struct perf_pmu *pmu, int sysfs_fd) return ret; } -static int pmu_alias_terms(struct perf_pmu_alias *alias, int err_loc, struct list_head *terms) +static int pmu_alias_terms(struct perf_pmu_alias *alias, struct list_head *terms) { - struct parse_events_term *term, *cloned; - struct parse_events_terms clone_terms; - - parse_events_terms__init(&clone_terms); - list_for_each_entry(term, &alias->terms.terms, list) { - int ret = parse_events_term__clone(&cloned, term); + struct parse_events_terms alias_terms; + struct parse_events_term *term; + int ret; - if (ret) { - parse_events_terms__exit(&clone_terms); - return ret; - } + parse_events_terms__init(&alias_terms); + ret = parse_events_terms(&alias_terms, alias->terms); + if (ret) { + pr_err("Cannot parse '%s' terms '%s': %d\n", + alias->name, alias->terms, ret); + parse_events_terms__exit(&alias_terms); + return ret; + } + list_for_each_entry(term, &alias_terms.terms, list) { /* * Weak terms don't override command line options, * which we don't want for implicit terms in aliases. */ - cloned->weak = true; - cloned->err_term = cloned->err_val = err_loc; - list_add_tail(&cloned->list, &clone_terms.terms); + term->weak = true; } - list_splice_init(&clone_terms.terms, terms); - parse_events_terms__exit(&clone_terms); + list_splice_init(&alias_terms.terms, terms); + parse_events_terms__exit(&alias_terms); return 0; } @@ -1045,7 +1079,7 @@ static int pmu_add_cpu_aliases_map_callback(const struct pmu_event *pe, { struct perf_pmu *pmu = vdata; - perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ NULL, + perf_pmu__new_alias(pmu, pe->name, pe->desc, pe->event, /*val_fd=*/ -1, pe, EVENT_SRC_CPU_JSON); return 0; } @@ -1061,13 +1095,16 @@ void pmu_add_cpu_aliases_table(struct perf_pmu *pmu, const struct pmu_events_tab static void pmu_add_cpu_aliases(struct perf_pmu *pmu) { - if (!pmu->events_table) + if (!pmu->events_table && !pmu->is_core) return; if (pmu->cpu_aliases_added) return; pmu_add_cpu_aliases_table(pmu, pmu->events_table); + if (pmu->is_core) + pmu_add_cpu_aliases_table(pmu, perf_pmu__default_core_events_table()); + pmu->cpu_aliases_added = true; } @@ -1094,7 +1131,7 @@ static int pmu_add_sys_aliases_iter_fn(const struct pmu_event *pe, pe->name, pe->desc, pe->event, - /*val_fd=*/ NULL, + /*val_fd=*/ -1, pe, EVENT_SRC_SYS_JSON); } @@ -1539,6 +1576,38 @@ static int pmu_config_term(const struct perf_pmu *pmu, assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); pmu_format_value(bits, term->val.num, &attr->config3, zero); break; + case PARSE_EVENTS__TERM_TYPE_CONFIG4: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config4, zero); + break; + case PARSE_EVENTS__TERM_TYPE_LEGACY_HARDWARE_CONFIG: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + assert(term->val.num < PERF_COUNT_HW_MAX); + assert(pmu->is_core); + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + attr->type = PERF_TYPE_HARDWARE; + break; + case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE_CONFIG: { +#ifndef NDEBUG + int cache_type = term->val.num & 0xFF; + int cache_op = (term->val.num >> 8) & 0xFF; + int cache_result = (term->val.num >> 16) & 0xFF; + + assert(cache_type < PERF_COUNT_HW_CACHE_MAX); + assert(cache_op < PERF_COUNT_HW_CACHE_OP_MAX); + assert(cache_result < PERF_COUNT_HW_CACHE_RESULT_MAX); +#endif + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + assert((term->val.num & ~0xFFFFFF) == 0); + assert(pmu->is_core); + attr->config = term->val.num; + if (perf_pmus__supports_extended_type()) + attr->config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; + attr->type = PERF_TYPE_HW_CACHE; + break; + } case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */ return -EINVAL; case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: @@ -1586,6 +1655,9 @@ static int pmu_config_term(const struct perf_pmu *pmu, case PERF_PMU_FORMAT_VALUE_CONFIG3: vp = &attr->config3; break; + case PERF_PMU_FORMAT_VALUE_CONFIG4: + vp = &attr->config4; + break; default: return -EINVAL; } @@ -1717,10 +1789,14 @@ static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, return alias; /* Alias doesn't exist, try to get it from the json events. */ - if (pmu->events_table && - pmu_events_table__find_event(pmu->events_table, pmu, name, - pmu_add_cpu_aliases_map_callback, - pmu) == 0) { + if ((pmu_events_table__find_event(pmu->events_table, pmu, name, + pmu_add_cpu_aliases_map_callback, + pmu) == 0) || + (pmu->is_core && + pmu_events_table__find_event(perf_pmu__default_core_events_table(), + pmu, name, + pmu_add_cpu_aliases_map_callback, + pmu) == 0)) { alias = perf_pmu__find_alias(pmu, name, /*load=*/ false); } return alias; @@ -1770,6 +1846,24 @@ static int check_info_data(struct perf_pmu *pmu, return 0; } +static int perf_pmu__parse_terms_to_attr(struct perf_pmu *pmu, const char *terms_str, + struct perf_event_attr *attr) +{ + struct parse_events_terms terms; + int ret; + + parse_events_terms__init(&terms); + ret = parse_events_terms(&terms, terms_str); + if (ret) { + pr_debug("Failed to parse terms '%s': %d\n", terms_str, ret); + parse_events_terms__exit(&terms); + return ret; + } + ret = perf_pmu__config(pmu, attr, &terms, /*apply_hardcoded=*/true, /*err=*/NULL); + parse_events_terms__exit(&terms); + return ret; +} + /* * Find alias in the terms list and replace it with the terms * defined for the alias @@ -1813,10 +1907,10 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ alias = pmu_find_alias(pmu, term); if (!alias) continue; - ret = pmu_alias_terms(alias, term->err_term, &term->list); + ret = pmu_alias_terms(alias, &term->list); if (ret) { parse_events_error__handle(err, term->err_term, - strdup("Failure to duplicate terms"), + strdup("Failed to parse terms"), NULL); return ret; } @@ -1826,12 +1920,23 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ if (ret) return ret; + if (alias->legacy_terms) { + struct perf_event_attr attr = {.config = 0,}; + + ret = perf_pmu__parse_terms_to_attr(pmu, alias->legacy_terms, &attr); + if (ret) { + parse_events_error__handle(err, term->err_term, + strdup("Error evaluating legacy terms"), + NULL); + return ret; + } + if (attr.type == PERF_TYPE_HARDWARE) + *alternate_hw_config = attr.config & PERF_HW_EVENT_MASK; + } + if (alias->per_pkg) info->per_pkg = true; - if (term->alternate_hw_config) - *alternate_hw_config = term->val.num; - info->retirement_latency_mean = alias->retirement_latency_mean; info->retirement_latency_min = alias->retirement_latency_min; info->retirement_latency_max = alias->retirement_latency_max; @@ -1912,6 +2017,9 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call "config1=0..0xffffffffffffffff", "config2=0..0xffffffffffffffff", "config3=0..0xffffffffffffffff", + "config4=0..0xffffffffffffffff", + "legacy-hardware-config=0..9,", + "legacy-cache-config=0..0xffffff,", "name=string", "period=number", "freq=number", @@ -1937,10 +2045,10 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call /* * max-events and driver-config are missing above as are the internal - * types user, metric-id, raw, legacy cache and hardware. Assert against - * the enum parse_events__term_type so they are kept in sync. + * types user, metric-id, and raw. Assert against the enum + * parse_events__term_type so they are kept in sync. */ - _Static_assert(ARRAY_SIZE(terms) == __PARSE_EVENTS__TERM_TYPE_NR - 6, + _Static_assert(ARRAY_SIZE(terms) == __PARSE_EVENTS__TERM_TYPE_NR - 4, "perf_pmu__for_each_format()'s terms must be kept in sync with enum parse_events__term_type"); list_for_each_entry(format, &pmu->format, list) { perf_pmu_format__load(pmu, format); @@ -1993,9 +2101,13 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) return drm_pmu__have_event(pmu, name); if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL) return true; - if (pmu->cpu_aliases_added || !pmu->events_table) + if (pmu->cpu_aliases_added || (!pmu->events_table && !pmu->is_core)) return false; - return pmu_events_table__find_event(pmu->events_table, pmu, name, NULL, NULL) == 0; + if (pmu_events_table__find_event(pmu->events_table, pmu, name, NULL, NULL) == 0) + return true; + return pmu->is_core && + pmu_events_table__find_event(perf_pmu__default_core_events_table(), + pmu, name, NULL, NULL) == 0; } size_t perf_pmu__num_events(struct perf_pmu *pmu) @@ -2012,13 +2124,18 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) pmu_aliases_parse(pmu); nr = pmu->sysfs_aliases + pmu->sys_json_aliases; - if (pmu->cpu_aliases_added) - nr += pmu->cpu_json_aliases; - else if (pmu->events_table) - nr += pmu_events_table__num_events(pmu->events_table, pmu) - - pmu->cpu_common_json_aliases; - else + if (pmu->cpu_aliases_added) { + nr += pmu->cpu_json_aliases; + } else if (pmu->events_table || pmu->is_core) { + nr += pmu_events_table__num_events(pmu->events_table, pmu); + if (pmu->is_core) { + nr += pmu_events_table__num_events( + perf_pmu__default_core_events_table(), pmu); + } + nr -= pmu->cpu_common_json_aliases; + } else { assert(pmu->cpu_json_aliases == 0 && pmu->cpu_common_json_aliases == 0); + } if (perf_pmu__is_tool(pmu)) nr -= tool_pmu__num_skip_events(); @@ -2036,18 +2153,37 @@ static int sub_non_neg(int a, int b) static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, const struct perf_pmu_alias *alias, bool skip_duplicate_pmus) { + struct parse_events_terms terms; struct parse_events_term *term; + int ret, used; size_t pmu_name_len = pmu_deduped_name_len(pmu, pmu->name, skip_duplicate_pmus); - int used = snprintf(buf, len, "%.*s/%s", (int)pmu_name_len, pmu->name, alias->name); - list_for_each_entry(term, &alias->terms.terms, list) { + /* Paramemterized events have the parameters shown. */ + if (strstr(alias->terms, "=?")) { + /* No parameters. */ + snprintf(buf, len, "%.*s/%s/", (int)pmu_name_len, pmu->name, alias->name); + return buf; + } + + parse_events_terms__init(&terms); + ret = parse_events_terms(&terms, alias->terms); + if (ret) { + pr_err("Failure to parse '%s' terms '%s': %d\n", + alias->name, alias->terms, ret); + parse_events_terms__exit(&terms); + snprintf(buf, len, "%.*s/%s/", (int)pmu_name_len, pmu->name, alias->name); + return buf; + } + used = snprintf(buf, len, "%.*s/%s", (int)pmu_name_len, pmu->name, alias->name); + + list_for_each_entry(term, &terms.terms, list) { if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) used += snprintf(buf + used, sub_non_neg(len, used), ",%s=%s", term->config, term->val.str); } - + parse_events_terms__exit(&terms); if (sub_non_neg(len, used) > 0) { buf[used] = '/'; used++; @@ -2061,6 +2197,42 @@ static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, return buf; } +static bool perf_pmu_alias__check_deprecated(struct perf_pmu *pmu, struct perf_pmu_alias *alias) +{ + struct perf_event_attr attr = {.config = 0,}; + const char *check_terms; + bool has_legacy_config; + + if (alias->legacy_deprecated_checked) + return alias->deprecated; + + alias->legacy_deprecated_checked = true; + if (alias->deprecated) + return true; + + check_terms = alias->terms; + has_legacy_config = + strstr(check_terms, "legacy-hardware-config=") != NULL || + strstr(check_terms, "legacy-cache-config=") != NULL; + if (!has_legacy_config && alias->legacy_terms) { + check_terms = alias->legacy_terms; + has_legacy_config = + strstr(check_terms, "legacy-hardware-config=") != NULL || + strstr(check_terms, "legacy-cache-config=") != NULL; + } + if (!has_legacy_config) + return false; + + if (perf_pmu__parse_terms_to_attr(pmu, check_terms, &attr) != 0) { + /* Parsing failed, set as deprecated. */ + alias->deprecated = true; + } else if (attr.type < PERF_TYPE_MAX) { + /* Flag unsupported legacy events as deprecated. */ + alias->deprecated = !is_event_supported(attr.type, attr.config); + } + return alias->deprecated; +} + int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, void *state, pmu_event_callback cb) { @@ -2070,7 +2242,6 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, .event_type_desc = "Kernel PMU event", }; int ret = 0; - struct strbuf sb; struct hashmap_entry *entry; size_t bkt; @@ -2081,7 +2252,6 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, if (perf_pmu__is_drm(pmu)) return drm_pmu__for_each_event(pmu, state, cb); - strbuf_init(&sb, /*hint=*/ 0); pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); hashmap__for_each_entry(pmu->aliases, entry, bkt) { @@ -2116,16 +2286,14 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, info.desc = event->desc; info.long_desc = event->long_desc; info.encoding_desc = buf + buf_used; - parse_events_terms__to_strbuf(&event->terms, &sb); buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used, - "%.*s/%s/", (int)pmu_name_len, info.pmu_name, sb.buf) + 1; + "%.*s/%s/", (int)pmu_name_len, info.pmu_name, event->terms) + 1; + info.str = event->terms; info.topic = event->topic; - info.str = sb.buf; - info.deprecated = event->deprecated; + info.deprecated = perf_pmu_alias__check_deprecated(pmu, event); ret = cb(state, &info); if (ret) goto out; - strbuf_setlen(&sb, /*len=*/ 0); } if (pmu->selectable) { info.name = buf; @@ -2141,7 +2309,6 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, ret = cb(state, &info); } out: - strbuf_release(&sb); return ret; } @@ -2589,9 +2756,7 @@ const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config) hashmap__for_each_entry(pmu->aliases, entry, bkt) { struct perf_pmu_alias *event = entry->pvalue; struct perf_event_attr attr = {.config = 0,}; - - int ret = perf_pmu__config(pmu, &attr, &event->terms, /*apply_hardcoded=*/true, - /*err=*/NULL); + int ret = perf_pmu__parse_terms_to_attr(pmu, event->terms, &attr); if (ret == 0 && config == attr.config) return event->name; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 1ebcf0242af8..8f11bfe8ed6d 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -23,6 +23,7 @@ enum { PERF_PMU_FORMAT_VALUE_CONFIG1, PERF_PMU_FORMAT_VALUE_CONFIG2, PERF_PMU_FORMAT_VALUE_CONFIG3, + PERF_PMU_FORMAT_VALUE_CONFIG4, PERF_PMU_FORMAT_VALUE_CONFIG_END, }; @@ -37,6 +38,19 @@ struct perf_pmu_caps { struct list_head list; }; +enum pmu_kind { + /* A perf event syscall PMU. */ + PERF_PMU_KIND_PE, + /* A perf tool provided DRM PMU. */ + PERF_PMU_KIND_DRM, + /* A perf tool provided HWMON PMU. */ + PERF_PMU_KIND_HWMON, + /* Perf tool provided PMU for tool events like time. */ + PERF_PMU_KIND_TOOL, + /* A testing PMU kind. */ + PERF_PMU_KIND_FAKE +}; + enum { PERF_PMU_TYPE_PE_START = 0, PERF_PMU_TYPE_PE_END = 0xFFFDFFFF, @@ -306,4 +320,23 @@ void perf_pmu__delete(struct perf_pmu *pmu); const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config); bool perf_pmu__is_fake(const struct perf_pmu *pmu); +static inline enum pmu_kind perf_pmu__kind(const struct perf_pmu *pmu) +{ + __u32 type; + + if (!pmu) + return PERF_PMU_KIND_PE; + + type = pmu->type; + if (type <= PERF_PMU_TYPE_PE_END) + return PERF_PMU_KIND_PE; + if (type <= PERF_PMU_TYPE_DRM_END) + return PERF_PMU_KIND_DRM; + if (type <= PERF_PMU_TYPE_HWMON_END) + return PERF_PMU_KIND_HWMON; + if (type == PERF_PMU_TYPE_TOOL) + return PERF_PMU_KIND_TOOL; + return PERF_PMU_KIND_FAKE; +} + #endif /* __PMU_H */ diff --git a/tools/perf/util/powerpc-vpadtl.c b/tools/perf/util/powerpc-vpadtl.c index 39a3fb3f1330..d1c3396f182f 100644 --- a/tools/perf/util/powerpc-vpadtl.c +++ b/tools/perf/util/powerpc-vpadtl.c @@ -4,6 +4,7 @@ */ #include <linux/string.h> +#include <errno.h> #include <inttypes.h> #include "color.h" #include "evlist.h" @@ -656,9 +657,7 @@ powerpc_vpadtl_synth_events(struct powerpc_vpadtl *vpa, struct perf_session *ses attr.config = PERF_SYNTH_POWERPC_VPA_DTL; /* create new id val to be a fixed offset from evsel id */ - id = evsel->core.id[0] + 1000000000; - if (!id) - id = 1; + id = auxtrace_synth_id_range_start(evsel); err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index 4153124a9948..8f3ed83853a9 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -186,113 +186,6 @@ bool is_event_supported(u8 type, u64 config) return ret; } -int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state) -{ - struct perf_pmu *pmu = NULL; - const char *event_type_descriptor = event_type_descriptors[PERF_TYPE_HW_CACHE]; - - /* - * Only print core PMUs, skipping uncore for performance and - * PERF_TYPE_SOFTWARE that can succeed in opening legacy cache evenst. - */ - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { - if (pmu->is_uncore || pmu->type == PERF_TYPE_SOFTWARE) - continue; - - for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { - for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { - /* skip invalid cache type */ - if (!evsel__is_cache_op_valid(type, op)) - continue; - - for (int res = 0; res < PERF_COUNT_HW_CACHE_RESULT_MAX; res++) { - char name[64]; - char alias_name[128]; - __u64 config; - int ret; - - __evsel__hw_cache_type_op_res_name(type, op, res, - name, sizeof(name)); - - ret = parse_events__decode_legacy_cache(name, pmu->type, - &config); - if (ret || !is_event_supported(PERF_TYPE_HW_CACHE, config)) - continue; - snprintf(alias_name, sizeof(alias_name), "%s/%s/", - pmu->name, name); - print_cb->print_event(print_state, - "cache", - pmu->name, - pmu->type, - name, - alias_name, - /*scale_unit=*/NULL, - /*deprecated=*/false, - event_type_descriptor, - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); - } - } - } - } - return 0; -} - -void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, - unsigned int type, const struct event_symbol *syms, - unsigned int max) -{ - struct strlist *evt_name_list = strlist__new(NULL, NULL); - struct str_node *nd; - - if (!evt_name_list) { - pr_debug("Failed to allocate new strlist for symbol events\n"); - return; - } - for (unsigned int i = 0; i < max; i++) { - /* - * New attr.config still not supported here, the latest - * example was PERF_COUNT_SW_CGROUP_SWITCHES - */ - if (syms[i].symbol == NULL) - continue; - - if (!is_event_supported(type, i)) - continue; - - if (strlen(syms[i].alias)) { - char name[MAX_NAME_LEN]; - - snprintf(name, MAX_NAME_LEN, "%s OR %s", syms[i].symbol, syms[i].alias); - strlist__add(evt_name_list, name); - } else - strlist__add(evt_name_list, syms[i].symbol); - } - - strlist__for_each_entry(nd, evt_name_list) { - char *alias = strstr(nd->s, " OR "); - - if (alias) { - *alias = '\0'; - alias += 4; - } - print_cb->print_event(print_state, - /*topic=*/NULL, - /*pmu_name=*/NULL, - type, - nd->s, - alias, - /*scale_unit=*/NULL, - /*deprecated=*/false, - event_type_descriptors[type], - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); - } - strlist__delete(evt_name_list); -} - /** struct mep - RB-tree node for building printing information. */ struct mep { /** nd - RB-tree element. */ @@ -431,11 +324,6 @@ void metricgroup__print(const struct print_callbacks *print_cb, void *print_stat */ void print_events(const struct print_callbacks *print_cb, void *print_state) { - print_symbol_events(print_cb, print_state, PERF_TYPE_HARDWARE, - event_symbols_hw, PERF_COUNT_HW_MAX); - - print_hwcache_events(print_cb, print_state); - perf_pmus__print_pmu_events(print_cb, print_state); print_cb->print_event(print_state, diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index d6ba384f0c66..eabba5d4a1fd 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -32,11 +32,7 @@ struct print_callbacks { /** Print all events, the default when no options are specified. */ void print_events(const struct print_callbacks *print_cb, void *print_state); -int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state); void print_sdt_events(const struct print_callbacks *print_cb, void *print_state); -void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, - unsigned int type, const struct event_symbol *syms, - unsigned int max); void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); bool is_event_supported(u8 type, u64 config); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 6ab2eb551b6c..710e4620923e 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2419,6 +2419,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev) } pev->nargs = 0; zfree(&pev->args); + nsinfo__zput(pev->nsi); } #define strdup_or_goto(str, label) \ @@ -3767,12 +3768,11 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs) /* Loop 3: cleanup and free trace events */ for (i = 0; i < npevs; i++) { pev = &pevs[i]; - for (j = 0; j < pevs[i].ntevs; j++) - clear_probe_trace_event(&pevs[i].tevs[j]); - zfree(&pevs[i].tevs); - pevs[i].ntevs = 0; - nsinfo__zput(pev->nsi); - clear_perf_probe_event(&pevs[i]); + for (j = 0; j < pev->ntevs; j++) + clear_probe_trace_event(&pev->tevs[j]); + zfree(&pev->tevs); + pev->ntevs = 0; + clear_perf_probe_event(pev); } } diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 779fe1280a56..cc1019d29a5d 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -1340,27 +1340,48 @@ static int prepare_metric(const struct metric_expr *mexp, struct metric_ref *metric_refs = mexp->metric_refs; for (int i = 0; metric_events[i]; i++) { - char *n = strdup(evsel__metric_id(metric_events[i])); + struct evsel *cur = metric_events[i]; double val, ena, run; - int source_count = evsel__source_count(metric_events[i]); - int ret; + int ret, source_count = 0; struct perf_counts_values *old_count, *new_count; + char *n = strdup(evsel__metric_id(cur)); if (!n) return -ENOMEM; + /* + * If there are multiple uncore PMUs and we're not reading the + * leader's stats, determine the stats for the appropriate + * uncore PMU. + */ + if (evsel && evsel->metric_leader && + evsel->pmu != evsel->metric_leader->pmu && + cur->pmu == evsel->metric_leader->pmu) { + struct evsel *pos; + + evlist__for_each_entry(evsel->evlist, pos) { + if (pos->pmu != evsel->pmu) + continue; + if (pos->metric_leader != cur) + continue; + cur = pos; + source_count = 1; + break; + } + } + if (source_count == 0) - source_count = 1; + source_count = evsel__source_count(cur); - ret = evsel__ensure_counts(metric_events[i]); + ret = evsel__ensure_counts(cur); if (ret) return ret; /* Set up pointers to the old and newly read counter values. */ - old_count = perf_counts(metric_events[i]->prev_raw_counts, cpu_idx, thread_idx); - new_count = perf_counts(metric_events[i]->counts, cpu_idx, thread_idx); - /* Update the value in metric_events[i]->counts. */ - evsel__read_counter(metric_events[i], cpu_idx, thread_idx); + old_count = perf_counts(cur->prev_raw_counts, cpu_idx, thread_idx); + new_count = perf_counts(cur->counts, cpu_idx, thread_idx); + /* Update the value in cur->counts. */ + evsel__read_counter(cur, cpu_idx, thread_idx); val = new_count->val - old_count->val; ena = new_count->ena - old_count->ena; @@ -1392,6 +1413,7 @@ static PyObject *pyrf_evlist__compute_metric(struct pyrf_evlist *pevlist, struct metric_expr *mexp = NULL; struct expr_parse_ctx *pctx; double result = 0; + struct evsel *metric_evsel = NULL; if (!PyArg_ParseTuple(args, "sii", &metric, &cpu, &thread)) return NULL; @@ -1404,6 +1426,7 @@ static PyObject *pyrf_evlist__compute_metric(struct pyrf_evlist *pevlist, list_for_each(pos, &me->head) { struct metric_expr *e = container_of(pos, struct metric_expr, nd); + struct evsel *pos2; if (strcmp(e->metric_name, metric)) continue; @@ -1411,20 +1434,24 @@ static PyObject *pyrf_evlist__compute_metric(struct pyrf_evlist *pevlist, if (e->metric_events[0] == NULL) continue; - cpu_idx = perf_cpu_map__idx(e->metric_events[0]->core.cpus, - (struct perf_cpu){.cpu = cpu}); - if (cpu_idx < 0) - continue; - - thread_idx = perf_thread_map__idx(e->metric_events[0]->core.threads, - thread); - if (thread_idx < 0) - continue; - - mexp = e; - break; + evlist__for_each_entry(&pevlist->evlist, pos2) { + if (pos2->metric_leader != e->metric_events[0]) + continue; + cpu_idx = perf_cpu_map__idx(pos2->core.cpus, + (struct perf_cpu){.cpu = cpu}); + if (cpu_idx < 0) + continue; + + thread_idx = perf_thread_map__idx(pos2->core.threads, thread); + if (thread_idx < 0) + continue; + metric_evsel = pos2; + mexp = e; + goto done; + } } } +done: if (!mexp) { PyErr_Format(PyExc_TypeError, "Unknown metric '%s' for CPU '%d' and thread '%d'", metric, cpu, thread); @@ -1435,7 +1462,7 @@ static PyObject *pyrf_evlist__compute_metric(struct pyrf_evlist *pevlist, if (!pctx) return PyErr_NoMemory(); - ret = prepare_metric(mexp, mexp->metric_events[0], pctx, cpu_idx, thread_idx); + ret = prepare_metric(mexp, metric_evsel, pctx, cpu_idx, thread_idx); if (ret) { expr__ctx_free(pctx); errno = -ret; @@ -1996,6 +2023,17 @@ static PyObject *pyrf_evlist__from_evlist(struct evlist *evlist) else if (leader == NULL) evsel__set_leader(pos, pos); } + + leader = pos->metric_leader; + + if (pos != leader) { + int idx = evlist__pos(evlist, leader); + + if (idx >= 0) + pos->metric_leader = evlist__at(&pevlist->evlist, idx); + else if (leader == NULL) + pos->metric_leader = pos; + } } metricgroup__copy_metric_events(&pevlist->evlist, /*cgrp=*/NULL, &pevlist->evlist.metric_events, @@ -2051,7 +2089,7 @@ static PyObject *pyrf__parse_events(PyObject *self, PyObject *args) static PyObject *pyrf__parse_metrics(PyObject *self, PyObject *args) { - const char *input; + const char *input, *pmu = NULL; struct evlist evlist = {}; PyObject *result; PyObject *pcpus = NULL, *pthreads = NULL; @@ -2059,14 +2097,14 @@ static PyObject *pyrf__parse_metrics(PyObject *self, PyObject *args) struct perf_thread_map *threads; int ret; - if (!PyArg_ParseTuple(args, "s|OO", &input, &pcpus, &pthreads)) + if (!PyArg_ParseTuple(args, "s|sOO", &input, &pmu, &pcpus, &pthreads)) return NULL; threads = pthreads ? ((struct pyrf_thread_map *)pthreads)->threads : NULL; cpus = pcpus ? ((struct pyrf_cpu_map *)pcpus)->cpus : NULL; evlist__init(&evlist, cpus, threads); - ret = metricgroup__parse_groups(&evlist, /*pmu=*/"all", input, + ret = metricgroup__parse_groups(&evlist, pmu ?: "all", input, /*metric_no_group=*/ false, /*metric_no_merge=*/ false, /*metric_no_threshold=*/ true, diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 335217bb532b..c6ae0ae8d86a 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -19,12 +19,14 @@ #include <sys/stat.h> #include <linux/compiler.h> +#include <linux/err.h> #include <asm/byteorder.h> #include "debug.h" #include "session.h" #include "evlist.h" #include "color.h" +#include "hashmap.h" #include "sample-raw.h" #include "s390-cpumcf-kernel.h" #include "util/pmu.h" @@ -132,8 +134,8 @@ static int get_counterset_start(int setnr) } struct get_counter_name_data { - int wanted; - char *result; + long wanted; + const char *result; }; static int get_counter_name_callback(void *vdata, struct pmu_event_info *info) @@ -151,12 +153,22 @@ static int get_counter_name_callback(void *vdata, struct pmu_event_info *info) rc = sscanf(event_str, "event=%x", &event_nr); if (rc == 1 && event_nr == data->wanted) { - data->result = strdup(info->name); + data->result = info->name; return 1; /* Terminate the search. */ } return 0; } +static size_t get_counter_name_hash_fn(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool get_counter_name_hashmap_equal_fn(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + /* Scan the PMU and extract the logical name of a counter from the event. Input * is the counter set and counter number with in the set. Construct the event * number and use this as key. If they match return the name of this counter. @@ -164,17 +176,50 @@ static int get_counter_name_callback(void *vdata, struct pmu_event_info *info) */ static char *get_counter_name(int set, int nr, struct perf_pmu *pmu) { + static struct hashmap *cache; + static struct perf_pmu *cache_pmu; + long cache_key = get_counterset_start(set) + nr; struct get_counter_name_data data = { - .wanted = get_counterset_start(set) + nr, + .wanted = cache_key, .result = NULL, }; + char *result = NULL; if (!pmu) return NULL; + if (cache_pmu == pmu && hashmap__find(cache, cache_key, &result)) + return strdup(result); + perf_pmu__for_each_event(pmu, /*skip_duplicate_pmus=*/ true, &data, get_counter_name_callback); - return data.result; + + result = strdup(data.result ?: "<unknown>"); + + if (cache_pmu == NULL) { + struct hashmap *tmp = hashmap__new(get_counter_name_hash_fn, + get_counter_name_hashmap_equal_fn, + /*ctx=*/NULL); + + if (!IS_ERR(tmp)) { + cache = tmp; + cache_pmu = pmu; + } + } + + if (cache_pmu == pmu && result) { + char *old_value = NULL, *new_value = strdup(result); + + if (new_value) { + hashmap__set(cache, cache_key, new_value, /*old_key=*/NULL, &old_value); + /* + * Free in case of a race, but resizing would be broken + * in that case. + */ + free(old_value); + } + } + return result; } static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample) diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index fae834144ef4..a8307b20a9ea 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -107,6 +107,8 @@ struct perf_sample { /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */ u16 weight3; bool no_hw_idx; /* No hw_idx collected in branch_stack */ + bool deferred_callchain; /* Has deferred user callchains */ + u64 deferred_cookie; char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 09af486c83e4..4236503c8f6c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -720,6 +720,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_CGROUP] = perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, [PERF_RECORD_AUX_OUTPUT_HW_ID] = perf_event__all64_swap, + [PERF_RECORD_CALLCHAIN_DEFERRED] = perf_event__all64_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, @@ -854,6 +855,9 @@ static void callchain__printf(struct evsel *evsel, for (i = 0; i < callchain->nr; i++) printf("..... %2d: %016" PRIx64 "\n", i, callchain->ips[i]); + + if (sample->deferred_callchain) + printf("...... (deferred)\n"); } static void branch_stack__printf(struct perf_sample *sample, @@ -1123,6 +1127,19 @@ static void dump_sample(struct evsel *evsel, union perf_event *event, sample_read__printf(sample, evsel->core.attr.read_format); } +static void dump_deferred_callchain(struct evsel *evsel, union perf_event *event, + struct perf_sample *sample) +{ + if (!dump_trace) + return; + + printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n", + event->header.misc, sample->pid, sample->tid, sample->deferred_cookie); + + if (evsel__has_callchain(evsel)) + callchain__printf(evsel, sample); +} + static void dump_read(struct evsel *evsel, union perf_event *event) { struct perf_record_read *read_event = &event->read; @@ -1268,6 +1285,106 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool per_thread); } +/* + * Samples with deferred callchains should wait for the next matching + * PERF_RECORD_CALLCHAIN_RECORD entries. Keep the events in a list and + * deliver them once it finds the callchains. + */ +struct deferred_event { + struct list_head list; + union perf_event *event; +}; + +/* + * This is called when a deferred callchain record comes up. Find all matching + * samples, merge the callchains and process them. + */ +static int evlist__deliver_deferred_callchain(struct evlist *evlist, + const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct deferred_event *de, *tmp; + struct evsel *evsel; + int ret = 0; + + if (!tool->merge_deferred_callchains) { + evsel = evlist__id2evsel(evlist, sample->id); + return tool->callchain_deferred(tool, event, sample, + evsel, machine); + } + + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) { + struct perf_sample orig_sample; + + ret = evlist__parse_sample(evlist, de->event, &orig_sample); + if (ret < 0) { + pr_err("failed to parse original sample\n"); + break; + } + + if (sample->tid != orig_sample.tid) + continue; + + if (event->callchain_deferred.cookie == orig_sample.deferred_cookie) + sample__merge_deferred_callchain(&orig_sample, sample); + else + orig_sample.deferred_callchain = false; + + evsel = evlist__id2evsel(evlist, orig_sample.id); + ret = evlist__deliver_sample(evlist, tool, de->event, + &orig_sample, evsel, machine); + + if (orig_sample.deferred_callchain) + free(orig_sample.callchain); + + list_del(&de->list); + free(de->event); + free(de); + + if (ret) + break; + } + return ret; +} + +/* + * This is called at the end of the data processing for the session. Flush the + * remaining samples as there's no hope for matching deferred callchains. + */ +static int session__flush_deferred_samples(struct perf_session *session, + const struct perf_tool *tool) +{ + struct evlist *evlist = session->evlist; + struct machine *machine = &session->machines.host; + struct deferred_event *de, *tmp; + struct evsel *evsel; + int ret = 0; + + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) { + struct perf_sample sample; + + ret = evlist__parse_sample(evlist, de->event, &sample); + if (ret < 0) { + pr_err("failed to parse original sample\n"); + break; + } + + evsel = evlist__id2evsel(evlist, sample.id); + ret = evlist__deliver_sample(evlist, tool, de->event, + &sample, evsel, machine); + + list_del(&de->list); + free(de->event); + free(de); + + if (ret) + break; + } + return ret; +} + static int machines__deliver_event(struct machines *machines, struct evlist *evlist, union perf_event *event, @@ -1296,6 +1413,22 @@ static int machines__deliver_event(struct machines *machines, return 0; } dump_sample(evsel, event, sample, perf_env__arch(machine->env)); + if (sample->deferred_callchain && tool->merge_deferred_callchains) { + struct deferred_event *de = malloc(sizeof(*de)); + size_t sz = event->header.size; + + if (de == NULL) + return -ENOMEM; + + de->event = malloc(sz); + if (de->event == NULL) { + free(de); + return -ENOMEM; + } + memcpy(de->event, event, sz); + list_add_tail(&de->list, &evlist->deferred_samples); + return 0; + } return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); @@ -1353,6 +1486,10 @@ static int machines__deliver_event(struct machines *machines, return tool->text_poke(tool, event, sample, machine); case PERF_RECORD_AUX_OUTPUT_HW_ID: return tool->aux_output_hw_id(tool, event, sample, machine); + case PERF_RECORD_CALLCHAIN_DEFERRED: + dump_deferred_callchain(evsel, event, sample); + return evlist__deliver_deferred_callchain(evlist, tool, event, + sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; @@ -1437,19 +1574,19 @@ static s64 perf_session__process_user_event(struct perf_session *session, */ if (!perf_data__is_pipe(session->data)) lseek(fd, file_offset, SEEK_SET); - err = tool->tracing_data(session, event); + err = tool->tracing_data(tool, session, event); break; case PERF_RECORD_HEADER_BUILD_ID: - err = tool->build_id(session, event); + err = tool->build_id(tool, session, event); break; case PERF_RECORD_FINISHED_ROUND: err = tool->finished_round(tool, event, oe); break; case PERF_RECORD_ID_INDEX: - err = tool->id_index(session, event); + err = tool->id_index(tool, session, event); break; case PERF_RECORD_AUXTRACE_INFO: - err = tool->auxtrace_info(session, event); + err = tool->auxtrace_info(tool, session, event); break; case PERF_RECORD_AUXTRACE: /* @@ -1459,45 +1596,45 @@ static s64 perf_session__process_user_event(struct perf_session *session, */ if (!perf_data__is_pipe(session->data)) lseek(fd, file_offset + event->header.size, SEEK_SET); - err = tool->auxtrace(session, event); + err = tool->auxtrace(tool, session, event); break; case PERF_RECORD_AUXTRACE_ERROR: perf_session__auxtrace_error_inc(session, event); - err = tool->auxtrace_error(session, event); + err = tool->auxtrace_error(tool, session, event); break; case PERF_RECORD_THREAD_MAP: - err = tool->thread_map(session, event); + err = tool->thread_map(tool, session, event); break; case PERF_RECORD_CPU_MAP: - err = tool->cpu_map(session, event); + err = tool->cpu_map(tool, session, event); break; case PERF_RECORD_STAT_CONFIG: - err = tool->stat_config(session, event); + err = tool->stat_config(tool, session, event); break; case PERF_RECORD_STAT: - err = tool->stat(session, event); + err = tool->stat(tool, session, event); break; case PERF_RECORD_STAT_ROUND: - err = tool->stat_round(session, event); + err = tool->stat_round(tool, session, event); break; case PERF_RECORD_TIME_CONV: session->time_conv = event->time_conv; - err = tool->time_conv(session, event); + err = tool->time_conv(tool, session, event); break; case PERF_RECORD_HEADER_FEATURE: - err = tool->feature(session, event); + err = tool->feature(tool, session, event); break; case PERF_RECORD_COMPRESSED: case PERF_RECORD_COMPRESSED2: - err = tool->compressed(session, event, file_offset, file_path); + err = tool->compressed(tool, session, event, file_offset, file_path); if (err) dump_event(session->evlist, event, file_offset, &sample, file_path); break; case PERF_RECORD_FINISHED_INIT: - err = tool->finished_init(session, event); + err = tool->finished_init(tool, session, event); break; case PERF_RECORD_BPF_METADATA: - err = tool->bpf_metadata(session, event); + err = tool->bpf_metadata(tool, session, event); break; default: err = -EINVAL; @@ -1943,6 +2080,9 @@ done: err = ordered_events__flush(oe, OE_FLUSH__FINAL); if (err) goto out_err; + err = session__flush_deferred_samples(session, tool); + if (err) + goto out_err; err = auxtrace__flush_events(session, tool); if (err) goto out_err; @@ -2289,6 +2429,9 @@ static int __perf_session__process_events(struct perf_session *session) err = auxtrace__flush_events(session, tool); if (err) goto out_err; + err = session__flush_deferred_samples(session, tool); + if (err) + goto out_err; err = perf_session__flush_thread_stacks(session); out_err: ui_progress__finish(); @@ -2409,6 +2552,10 @@ static int __perf_session__process_dir_events(struct perf_session *session) if (ret) goto out_err; + ret = session__flush_deferred_samples(session, tool); + if (ret) + goto out_err; + ret = perf_session__flush_thread_stacks(session); out_err: ui_progress__finish(); @@ -2647,7 +2794,8 @@ static int perf_session__set_guest_cpu(struct perf_session *session, pid_t pid, return 0; } -int perf_event__process_id_index(struct perf_session *session, +int perf_event__process_id_index(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { struct evlist *evlist = session->evlist; diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index cf88d65a25cb..22d3ff877e83 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -202,7 +202,8 @@ int perf_session__deliver_synth_attr_event(struct perf_session *session, int perf_session__dsos_hit_all(struct perf_session *session); -int perf_event__process_id_index(struct perf_session *session, +int perf_event__process_id_index(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); int perf_event__process_finished_round(const struct perf_tool *tool, diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 9cae2c472f4a..b65b1792ca05 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -23,10 +23,17 @@ assert srctree, "Environment variable srctree, for the Linux sources, not set" src_feature_tests = f'{srctree}/tools/build/feature' def clang_has_option(option): - cmd = shlex.split(f"{cc} {cc_options} {option}") - cmd.append(path.join(src_feature_tests, "test-hello.c")) + error_substrings = ( + b"unknown argument", + b"is not supported", + b"unknown warning option" + ) + cmd = shlex.split(f"{cc} {cc_options} {option}") + [ + "-o", "/dev/null", + path.join(src_feature_tests, "test-hello.c") + ] cc_output = Popen(cmd, stderr=PIPE).stderr.readlines() - return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o) or (b"unknown warning option" in o))] == [ ] + return not any(any(error in line for error in error_substrings) for line in cc_output) if cc_is_clang: from sysconfig import get_config_vars diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a67b991f4e81..6d02f84c5691 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -439,9 +439,9 @@ static inline void __new_line_std_csv(struct perf_stat_config *config, aggr_printout(config, os, os->evsel, os->id, os->aggr_nr); } -static inline void __new_line_std(struct outstate *os) +static inline void __new_line_std(struct perf_stat_config *config, struct outstate *os) { - fprintf(os->fh, " "); + fprintf(os->fh, "%*s", COUNTS_LEN + EVNAME_LEN + config->unit_width + 2, ""); } static void do_new_line_std(struct perf_stat_config *config, @@ -450,7 +450,7 @@ static void do_new_line_std(struct perf_stat_config *config, __new_line_std_csv(config, os); if (config->aggr_mode == AGGR_NONE) fprintf(os->fh, " "); - __new_line_std(os); + __new_line_std(config, os); } static void print_metric_std(struct perf_stat_config *config, @@ -583,36 +583,13 @@ static void print_metricgroup_header_std(struct perf_stat_config *config, int n; if (!metricgroup_name) { - __new_line_std(os); + __new_line_std(config, os); return; } n = fprintf(config->output, " %*s", EVNAME_LEN, metricgroup_name); - fprintf(config->output, "%*s", MGROUP_LEN - n - 1, ""); -} - -/* Filter out some columns that don't work well in metrics only mode */ - -static bool valid_only_metric(const char *unit) -{ - if (!unit) - return false; - if (strstr(unit, "/sec") || - strstr(unit, "CPUs utilized")) - return false; - return true; -} - -static const char *fixunit(char *buf, struct evsel *evsel, - const char *unit) -{ - if (!strncmp(unit, "of all", 6)) { - snprintf(buf, 1024, "%s %s", evsel__name(evsel), - unit); - return buf; - } - return unit; + fprintf(config->output, "%*s", MGROUP_LEN + config->unit_width + 2 - n, ""); } static void print_metric_only(struct perf_stat_config *config, @@ -621,13 +598,12 @@ static void print_metric_only(struct perf_stat_config *config, { struct outstate *os = ctx; FILE *out = os->fh; - char buf[1024], str[1024]; + char str[1024]; unsigned mlen = config->metric_only_len; const char *color = metric_threshold_classify__color(thresh); - if (!valid_only_metric(unit)) - return; - unit = fixunit(buf, os->evsel, unit); + if (!unit) + unit = ""; if (mlen < strlen(unit)) mlen = strlen(unit) + 1; @@ -643,16 +619,15 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused void *ctx, enum metric_threshold_classify thresh __maybe_unused, const char *fmt, - const char *unit, double val) + const char *unit __maybe_unused, double val) { struct outstate *os = ctx; FILE *out = os->fh; char buf[64], *vals, *ends; - char tbuf[1024]; - if (!valid_only_metric(unit)) + if (!unit) return; - unit = fixunit(tbuf, os->evsel, unit); + snprintf(buf, sizeof(buf), fmt ?: "", val); ends = vals = skip_spaces(buf); while (isdigit(*ends) || *ends == '.') @@ -670,13 +645,9 @@ static void print_metric_only_json(struct perf_stat_config *config __maybe_unuse { struct outstate *os = ctx; char buf[64], *ends; - char tbuf[1024]; const char *vals; - if (!valid_only_metric(unit)) - return; - unit = fixunit(tbuf, os->evsel, unit); - if (!unit[0]) + if (!unit || !unit[0]) return; snprintf(buf, sizeof(buf), fmt ?: "", val); vals = ends = skip_spaces(buf); @@ -695,7 +666,6 @@ static void print_metric_header(struct perf_stat_config *config, const char *unit, double val __maybe_unused) { struct outstate *os = ctx; - char tbuf[1024]; /* In case of iostat, print metric header for first root port only */ if (config->iostat_run && @@ -705,9 +675,8 @@ static void print_metric_header(struct perf_stat_config *config, if (os->evsel->cgrp != os->cgrp) return; - if (!valid_only_metric(unit)) + if (!unit) return; - unit = fixunit(tbuf, os->evsel, unit); if (config->json_output) return; @@ -872,7 +841,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, out.ctx = os; out.force_header = false; - if (!config->metric_only && !counter->default_metricgroup) { + if (!config->metric_only && (!counter->default_metricgroup || counter->default_show_events)) { abs_printout(config, os, os->id, os->aggr_nr, counter, uval, ok); print_noise(config, os, counter, noise, /*before_metric=*/true); @@ -880,7 +849,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, } if (ok) { - if (!config->metric_only && counter->default_metricgroup) { + if (!config->metric_only && counter->default_metricgroup && !counter->default_show_events) { void *from = NULL; aggr_printout(config, os, os->evsel, os->id, os->aggr_nr); @@ -902,7 +871,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, &num, from, &out); } while (from != NULL); } else { - perf_stat__print_shadow_stats(config, counter, uval, aggr_idx, &out); + perf_stat__print_shadow_stats(config, counter, aggr_idx, &out); } } else { pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/NULL, /*val=*/0); @@ -944,6 +913,9 @@ static bool should_skip_zero_counter(struct perf_stat_config *config, if (verbose == 0 && counter->skippable && !counter->supported) return true; + /* Metric only counts won't be displayed but the metric wants to be computed. */ + if (config->metric_only) + return false; /* * Skip value 0 when enabling --per-thread globally, * otherwise it will have too many 0 output. @@ -1274,7 +1246,7 @@ static void print_metric_headers(struct perf_stat_config *config, os.evsel = counter; - perf_stat__print_shadow_stats(config, counter, 0, 0, &out); + perf_stat__print_shadow_stats(config, counter, /*aggr_idx=*/0, &out); } if (!config->json_output) diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index abaf6b579bfc..9c83f7d96caa 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <errno.h> #include <math.h> #include <stdio.h> #include "evsel.h" @@ -17,361 +18,32 @@ #include "util/hashmap.h" #include "tool_pmu.h" -struct stats walltime_nsecs_stats; -struct rusage_stats ru_stats; - -enum { - CTX_BIT_USER = 1 << 0, - CTX_BIT_KERNEL = 1 << 1, - CTX_BIT_HV = 1 << 2, - CTX_BIT_HOST = 1 << 3, - CTX_BIT_IDLE = 1 << 4, - CTX_BIT_MAX = 1 << 5, -}; - -enum stat_type { - STAT_NONE = 0, - STAT_NSECS, - STAT_CYCLES, - STAT_INSTRUCTIONS, - STAT_STALLED_CYCLES_FRONT, - STAT_STALLED_CYCLES_BACK, - STAT_BRANCHES, - STAT_BRANCH_MISS, - STAT_CACHE_REFS, - STAT_CACHE_MISSES, - STAT_L1_DCACHE, - STAT_L1_ICACHE, - STAT_LL_CACHE, - STAT_ITLB_CACHE, - STAT_DTLB_CACHE, - STAT_L1D_MISS, - STAT_L1I_MISS, - STAT_LL_MISS, - STAT_DTLB_MISS, - STAT_ITLB_MISS, - STAT_MAX -}; - -static int evsel_context(const struct evsel *evsel) +static bool tool_pmu__is_time_event(const struct perf_stat_config *config, + const struct evsel *evsel, int *tool_aggr_idx) { - int ctx = 0; - - if (evsel->core.attr.exclude_kernel) - ctx |= CTX_BIT_KERNEL; - if (evsel->core.attr.exclude_user) - ctx |= CTX_BIT_USER; - if (evsel->core.attr.exclude_hv) - ctx |= CTX_BIT_HV; - if (evsel->core.attr.exclude_host) - ctx |= CTX_BIT_HOST; - if (evsel->core.attr.exclude_idle) - ctx |= CTX_BIT_IDLE; - - return ctx; -} - -void perf_stat__reset_shadow_stats(void) -{ - memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); - memset(&ru_stats, 0, sizeof(ru_stats)); -} - -static enum stat_type evsel__stat_type(struct evsel *evsel) -{ - /* Fake perf_hw_cache_op_id values for use with evsel__match. */ - u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - u64 PERF_COUNT_hw_cache_l1i_miss = PERF_COUNT_HW_CACHE_L1I | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - u64 PERF_COUNT_hw_cache_ll_miss = PERF_COUNT_HW_CACHE_LL | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - u64 PERF_COUNT_hw_cache_dtlb_miss = PERF_COUNT_HW_CACHE_DTLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - u64 PERF_COUNT_hw_cache_itlb_miss = PERF_COUNT_HW_CACHE_ITLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16); - - if (evsel__is_clock(evsel)) - return STAT_NSECS; - else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) - return STAT_CYCLES; - else if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) - return STAT_INSTRUCTIONS; - else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - return STAT_STALLED_CYCLES_FRONT; - else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - return STAT_STALLED_CYCLES_BACK; - else if (evsel__match(evsel, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - return STAT_BRANCHES; - else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) - return STAT_BRANCH_MISS; - else if (evsel__match(evsel, HARDWARE, HW_CACHE_REFERENCES)) - return STAT_CACHE_REFS; - else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) - return STAT_CACHE_MISSES; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1D)) - return STAT_L1_DCACHE; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_L1I)) - return STAT_L1_ICACHE; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_LL)) - return STAT_LL_CACHE; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_DTLB)) - return STAT_DTLB_CACHE; - else if (evsel__match(evsel, HW_CACHE, HW_CACHE_ITLB)) - return STAT_ITLB_CACHE; - else if (evsel__match(evsel, HW_CACHE, hw_cache_l1d_miss)) - return STAT_L1D_MISS; - else if (evsel__match(evsel, HW_CACHE, hw_cache_l1i_miss)) - return STAT_L1I_MISS; - else if (evsel__match(evsel, HW_CACHE, hw_cache_ll_miss)) - return STAT_LL_MISS; - else if (evsel__match(evsel, HW_CACHE, hw_cache_dtlb_miss)) - return STAT_DTLB_MISS; - else if (evsel__match(evsel, HW_CACHE, hw_cache_itlb_miss)) - return STAT_ITLB_MISS; - return STAT_NONE; -} - -static enum metric_threshold_classify get_ratio_thresh(const double ratios[3], double val) -{ - assert(ratios[0] > ratios[1]); - assert(ratios[1] > ratios[2]); - - return val > ratios[1] - ? (val > ratios[0] ? METRIC_THRESHOLD_BAD : METRIC_THRESHOLD_NEARLY_BAD) - : (val > ratios[2] ? METRIC_THRESHOLD_LESS_GOOD : METRIC_THRESHOLD_GOOD); -} - -static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type) -{ - struct evsel *cur; - int evsel_ctx = evsel_context(evsel); - struct perf_pmu *evsel_pmu = evsel__find_pmu(evsel); - - evlist__for_each_entry(evsel->evlist, cur) { - struct perf_stat_aggr *aggr; - - /* Ignore the evsel that is being searched from. */ - if (evsel == cur) - continue; - - /* Ignore evsels that are part of different groups. */ - if (evsel->core.leader->nr_members > 1 && - evsel->core.leader != cur->core.leader) - continue; - /* Ignore evsels with mismatched modifiers. */ - if (evsel_ctx != evsel_context(cur)) - continue; - /* Ignore if not the cgroup we're looking for. */ - if (evsel->cgrp != cur->cgrp) - continue; - /* Ignore if not the stat we're looking for. */ - if (type != evsel__stat_type(cur)) - continue; - - /* - * Except the SW CLOCK events, - * ignore if not the PMU we're looking for. - */ - if ((type != STAT_NSECS) && (evsel_pmu != evsel__find_pmu(cur))) - continue; - - aggr = &cur->stats->aggr[aggr_idx]; - if (type == STAT_NSECS) - return aggr->counts.val; - return aggr->counts.val * cur->scale; - } - return 0.0; -} - -static void print_ratio(struct perf_stat_config *config, - const struct evsel *evsel, int aggr_idx, - double numerator, struct perf_stat_output_ctx *out, - enum stat_type denominator_type, - const double thresh_ratios[3], const char *_unit) -{ - double denominator = find_stat(evsel, aggr_idx, denominator_type); - double ratio = 0; - enum metric_threshold_classify thresh = METRIC_THRESHOLD_UNKNOWN; - const char *fmt = NULL; - const char *unit = NULL; - - if (numerator && denominator) { - ratio = numerator / denominator * 100.0; - thresh = get_ratio_thresh(thresh_ratios, ratio); - fmt = "%7.2f%%"; - unit = _unit; - } - out->print_metric(config, out->ctx, thresh, fmt, unit, ratio); -} - -static void print_stalled_cycles_front(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double stalled, - struct perf_stat_output_ctx *out) -{ - const double thresh_ratios[3] = {50.0, 30.0, 10.0}; - - print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, thresh_ratios, - "frontend cycles idle"); -} - -static void print_stalled_cycles_back(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double stalled, - struct perf_stat_output_ctx *out) -{ - const double thresh_ratios[3] = {75.0, 50.0, 20.0}; - - print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, thresh_ratios, - "backend cycles idle"); -} - -static void print_branch_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, thresh_ratios, - "of all branches"); -} - -static void print_l1d_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, thresh_ratios, - "of all L1-dcache accesses"); -} - -static void print_l1i_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, thresh_ratios, - "of all L1-icache accesses"); -} - -static void print_ll_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, thresh_ratios, - "of all LL-cache accesses"); -} - -static void print_dtlb_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, thresh_ratios, - "of all dTLB cache accesses"); -} + enum tool_pmu_event event = evsel__tool_event(evsel); + int aggr_idx; -static void print_itlb_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, thresh_ratios, - "of all iTLB cache accesses"); -} - -static void print_cache_miss(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out) -{ - const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - - print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, thresh_ratios, - "of all cache refs"); -} - -static void print_instructions(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double instructions, - struct perf_stat_output_ctx *out) -{ - print_metric_t print_metric = out->print_metric; - void *ctxp = out->ctx; - double cycles = find_stat(evsel, aggr_idx, STAT_CYCLES); - double max_stalled = max(find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_FRONT), - find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_BACK)); - - if (cycles) { - print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ", - "insn per cycle", instructions / cycles); - } else { - print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL, - "insn per cycle", 0); - } - if (max_stalled && instructions) { - if (out->new_line) - out->new_line(config, ctxp); - print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ", - "stalled cycles per insn", max_stalled / instructions); - } -} - -static void print_cycles(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double cycles, - struct perf_stat_output_ctx *out) -{ - double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS); - - if (cycles && nsecs) { - double ratio = cycles / nsecs; - - out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, "%8.3f", - "GHz", ratio); - } else { - out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL, - "GHz", 0); - } -} - -static void print_nsecs(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx __maybe_unused, double nsecs, - struct perf_stat_output_ctx *out) -{ - print_metric_t print_metric = out->print_metric; - void *ctxp = out->ctx; - double wall_time = avg_stats(&walltime_nsecs_stats); + if (event != TOOL_PMU__EVENT_DURATION_TIME && + event != TOOL_PMU__EVENT_USER_TIME && + event != TOOL_PMU__EVENT_SYSTEM_TIME) + return false; - if (wall_time) { - print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%8.3f", "CPUs utilized", - nsecs / (wall_time * evsel->scale)); - } else { - print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL, - "CPUs utilized", 0); + if (config) { + cpu_aggr_map__for_each_idx(aggr_idx, config->aggr_map) { + if (config->aggr_map->map[aggr_idx].cpu.cpu == 0) { + *tool_aggr_idx = aggr_idx; + return true; + } + } + pr_debug("Unexpected CPU0 missing in aggregation for tool event.\n"); } + *tool_aggr_idx = 0; /* Assume the first aggregation index works. */ + return true; } -static int prepare_metric(const struct metric_expr *mexp, +static int prepare_metric(struct perf_stat_config *config, + const struct metric_expr *mexp, const struct evsel *evsel, struct expr_parse_ctx *pctx, int aggr_idx) @@ -381,91 +53,51 @@ static int prepare_metric(const struct metric_expr *mexp, int i; for (i = 0; metric_events[i]; i++) { + int source_count = 0, tool_aggr_idx; + bool is_tool_time = + tool_pmu__is_time_event(config, metric_events[i], &tool_aggr_idx); + struct perf_stat_evsel *ps = metric_events[i]->stats; + struct perf_stat_aggr *aggr; char *n; double val; - int source_count = 0; - - if (evsel__is_tool(metric_events[i])) { - struct stats *stats; - double scale; - switch (evsel__tool_event(metric_events[i])) { - case TOOL_PMU__EVENT_DURATION_TIME: - stats = &walltime_nsecs_stats; - scale = 1e-9; - break; - case TOOL_PMU__EVENT_USER_TIME: - stats = &ru_stats.ru_utime_usec_stat; - scale = 1e-6; - break; - case TOOL_PMU__EVENT_SYSTEM_TIME: - stats = &ru_stats.ru_stime_usec_stat; - scale = 1e-6; + /* + * If there are multiple uncore PMUs and we're not reading the + * leader's stats, determine the stats for the appropriate + * uncore PMU. + */ + if (evsel && evsel->metric_leader && + evsel->pmu != evsel->metric_leader->pmu && + mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) { + struct evsel *pos; + + evlist__for_each_entry(evsel->evlist, pos) { + if (pos->pmu != evsel->pmu) + continue; + if (pos->metric_leader != mexp->metric_events[i]) + continue; + ps = pos->stats; + source_count = 1; break; - case TOOL_PMU__EVENT_NONE: - pr_err("Invalid tool event 'none'"); - abort(); - case TOOL_PMU__EVENT_MAX: - pr_err("Invalid tool event 'max'"); - abort(); - case TOOL_PMU__EVENT_HAS_PMEM: - case TOOL_PMU__EVENT_NUM_CORES: - case TOOL_PMU__EVENT_NUM_CPUS: - case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: - case TOOL_PMU__EVENT_NUM_DIES: - case TOOL_PMU__EVENT_NUM_PACKAGES: - case TOOL_PMU__EVENT_SLOTS: - case TOOL_PMU__EVENT_SMT_ON: - case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: - default: - pr_err("Unexpected tool event '%s'", evsel__name(metric_events[i])); - abort(); } - val = avg_stats(stats) * scale; - source_count = 1; - } else { - struct perf_stat_evsel *ps = metric_events[i]->stats; - struct perf_stat_aggr *aggr; - + } + /* Time events are always on CPU0, the first aggregation index. */ + aggr = &ps->aggr[is_tool_time ? tool_aggr_idx : aggr_idx]; + if (!aggr || !metric_events[i]->supported) { /* - * If there are multiple uncore PMUs and we're not - * reading the leader's stats, determine the stats for - * the appropriate uncore PMU. + * Not supported events will have a count of 0, which + * can be confusing in a metric. Explicitly set the + * value to NAN. Not counted events (enable time of 0) + * are read as 0. */ - if (evsel && evsel->metric_leader && - evsel->pmu != evsel->metric_leader->pmu && - mexp->metric_events[i]->pmu == evsel->metric_leader->pmu) { - struct evsel *pos; - - evlist__for_each_entry(evsel->evlist, pos) { - if (pos->pmu != evsel->pmu) - continue; - if (pos->metric_leader != mexp->metric_events[i]) - continue; - ps = pos->stats; - source_count = 1; - break; - } - } - aggr = &ps->aggr[aggr_idx]; - if (!aggr) - break; - - if (!metric_events[i]->supported) { - /* - * Not supported events will have a count of 0, - * which can be confusing in a - * metric. Explicitly set the value to NAN. Not - * counted events (enable time of 0) are read as - * 0. - */ - val = NAN; - source_count = 0; - } else { - val = aggr->counts.val; - if (!source_count) - source_count = evsel__source_count(metric_events[i]); - } + val = NAN; + source_count = 0; + } else { + val = aggr->counts.val; + if (is_tool_time) + val *= 1e-9; /* Convert time event nanoseconds to seconds. */ + if (!source_count) + source_count = evsel__source_count(metric_events[i]); } n = strdup(evsel__metric_id(metric_events[i])); if (!n) @@ -511,7 +143,7 @@ static void generic_metric(struct perf_stat_config *config, pctx->sctx.user_requested_cpu_list = strdup(config->user_requested_cpu_list); pctx->sctx.runtime = runtime; pctx->sctx.system_wide = config->system_wide; - i = prepare_metric(mexp, evsel, pctx, aggr_idx); + i = prepare_metric(config, mexp, evsel, pctx, aggr_idx); if (i < 0) { expr__ctx_free(pctx); return; @@ -572,7 +204,7 @@ double test_generic_metric(struct metric_expr *mexp, int aggr_idx) if (!pctx) return NAN; - if (prepare_metric(mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0) + if (prepare_metric(/*config=*/NULL, mexp, /*evsel=*/NULL, pctx, aggr_idx) < 0) goto out; if (expr__parse(&ratio, pctx, mexp->metric_expr)) @@ -601,11 +233,9 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config, * event. Only align with other metics from * different metric events. */ - if (last_name && !strcmp(last_name, name)) { - if (!need_full_name || last_pmu != evsel->pmu) { - out->print_metricgroup_header(config, ctxp, NULL); - return; - } + if (last_name && !strcmp(last_name, name) && last_pmu == evsel->pmu) { + out->print_metricgroup_header(config, ctxp, NULL); + return; } if (need_full_name && evsel->pmu) @@ -665,7 +295,7 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, if (strcmp(name, mexp->default_metricgroup_name)) return (void *)mexp; /* Only print the name of the metricgroup once */ - if (!header_printed) { + if (!header_printed && !evsel->default_show_events) { header_printed = true; perf_stat__print_metricgroup_header(config, evsel, ctxp, name, out); @@ -682,56 +312,15 @@ void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, - double avg, int aggr_idx, + int aggr_idx, struct perf_stat_output_ctx *out) { - typedef void (*stat_print_function_t)(struct perf_stat_config *config, - const struct evsel *evsel, - int aggr_idx, double misses, - struct perf_stat_output_ctx *out); - static const stat_print_function_t stat_print_function[STAT_MAX] = { - [STAT_INSTRUCTIONS] = print_instructions, - [STAT_BRANCH_MISS] = print_branch_miss, - [STAT_L1D_MISS] = print_l1d_miss, - [STAT_L1I_MISS] = print_l1i_miss, - [STAT_DTLB_MISS] = print_dtlb_miss, - [STAT_ITLB_MISS] = print_itlb_miss, - [STAT_LL_MISS] = print_ll_miss, - [STAT_CACHE_MISSES] = print_cache_miss, - [STAT_STALLED_CYCLES_FRONT] = print_stalled_cycles_front, - [STAT_STALLED_CYCLES_BACK] = print_stalled_cycles_back, - [STAT_CYCLES] = print_cycles, - [STAT_NSECS] = print_nsecs, - }; print_metric_t print_metric = out->print_metric; void *ctxp = out->ctx; - int num = 1; + int num = 0; - if (config->iostat_run) { + if (config->iostat_run) iostat_print_metric(config, evsel, out); - } else { - stat_print_function_t fn = stat_print_function[evsel__stat_type(evsel)]; - - if (fn) - fn(config, evsel, aggr_idx, avg, out); - else { - double nsecs = find_stat(evsel, aggr_idx, STAT_NSECS); - - if (nsecs) { - char unit = ' '; - char unit_buf[10] = "/sec"; - double ratio = convert_unit_double(1000000000.0 * avg / nsecs, - &unit); - - if (unit != ' ') - snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); - print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%8.3f", - unit_buf, ratio); - } else { - num = 0; - } - } - } perf_stat__print_shadow_stats_metricgroup(config, evsel, aggr_idx, &num, NULL, out); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 101ed6c497bc..976a06e63252 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -645,7 +645,8 @@ void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *e evsel__process_percore(evsel); } -int perf_event__process_stat_event(struct perf_session *session, +int perf_event__process_stat_event(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event) { struct perf_counts_values count, *ptr; diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 34f30a295f89..f986911c9296 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -56,11 +56,6 @@ enum aggr_mode { AGGR_MAX }; -struct rusage_stats { - struct stats ru_utime_usec_stat; - struct stats ru_stime_usec_stat; -}; - typedef struct aggr_cpu_id (*aggr_get_id_t)(struct perf_stat_config *config, struct perf_cpu cpu); struct perf_stat_config { @@ -102,7 +97,6 @@ struct perf_stat_config { const char *csv_sep; struct stats *walltime_nsecs_stats; struct rusage ru_data; - struct rusage_stats *ru_stats; struct cpu_aggr_map *aggr_map; aggr_get_id_t aggr_get_id; struct cpu_aggr_map *cpus_aggr_map; @@ -132,26 +126,9 @@ static inline void init_stats(struct stats *stats) stats->max = 0; } -static inline void init_rusage_stats(struct rusage_stats *ru_stats) { - init_stats(&ru_stats->ru_utime_usec_stat); - init_stats(&ru_stats->ru_stime_usec_stat); -} - -static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rusage* rusage) { - const u64 us_to_ns = 1000; - const u64 s_to_ns = 1000000000; - update_stats(&ru_stats->ru_utime_usec_stat, - (rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns)); - update_stats(&ru_stats->ru_stime_usec_stat, - (rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns)); -} - struct evsel; struct evlist; -extern struct stats walltime_nsecs_stats; -extern struct rusage_stats ru_stats; - enum metric_threshold_classify { METRIC_THRESHOLD_UNKNOWN, METRIC_THRESHOLD_BAD, @@ -184,7 +161,7 @@ struct perf_stat_output_ctx { void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct evsel *evsel, - double avg, int aggr_idx, + int aggr_idx, struct perf_stat_output_ctx *out); bool perf_stat__skip_metric_event(struct evsel *evsel, u64 ena, u64 run); void *perf_stat__print_shadow_stats_metricgroup(struct perf_stat_config *config, @@ -216,7 +193,8 @@ union perf_event; struct perf_session; struct target; -int perf_event__process_stat_event(struct perf_session *session, +int perf_event__process_stat_event(const struct perf_tool *tool, + struct perf_session *session, union perf_event *event); size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 9e820599bab3..957143fbf8a0 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -860,20 +860,20 @@ out: return err; } -static int read_build_id(const char *filename, struct build_id *bid, bool block) +static int read_build_id(const char *filename, struct build_id *bid) { size_t size = sizeof(bid->data); int fd, err; Elf *elf; - err = libbfd__read_build_id(filename, bid, block); + err = libbfd__read_build_id(filename, bid); if (err >= 0) goto out; if (size < BUILD_ID_SIZE) goto out; - fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK)); + fd = open(filename, O_RDONLY); if (fd < 0) goto out; @@ -894,7 +894,7 @@ out: return err; } -int filename__read_build_id(const char *filename, struct build_id *bid, bool block) +int filename__read_build_id(const char *filename, struct build_id *bid) { struct kmod_path m = { .name = NULL, }; char path[PATH_MAX]; @@ -902,6 +902,8 @@ int filename__read_build_id(const char *filename, struct build_id *bid, bool blo if (!filename) return -EFAULT; + if (!is_regular_file(filename)) + return -EWOULDBLOCK; err = kmod_path__parse(&m, filename); if (err) @@ -918,10 +920,9 @@ int filename__read_build_id(const char *filename, struct build_id *bid, bool blo } close(fd); filename = path; - block = true; } - err = read_build_id(filename, bid, block); + err = read_build_id(filename, bid); if (m.comp) unlink(filename); @@ -1446,8 +1447,11 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, map__set_mapping_type(curr_map, MAPPING_TYPE__IDENTITY); } dso__set_symtab_type(curr_dso, dso__symtab_type(dso)); - if (maps__insert(kmaps, curr_map)) + if (maps__insert(kmaps, curr_map)) { + dso__put(curr_dso); + map__put(curr_map); return -1; + } dsos__add(&maps__machine(kmaps)->dsos, curr_dso); dso__set_loaded(curr_dso); dso__put(*curr_dsop); diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index aeb253248895..c6b17c14a2e9 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -85,7 +85,7 @@ int filename__read_debuglink(const char *filename __maybe_unused, /* * Just try PT_NOTE header otherwise fails */ -int filename__read_build_id(const char *filename, struct build_id *bid, bool block) +int filename__read_build_id(const char *filename, struct build_id *bid) { int fd, ret = -1; bool need_swap = false, elf32; @@ -102,7 +102,12 @@ int filename__read_build_id(const char *filename, struct build_id *bid, bool blo void *phdr, *buf = NULL; ssize_t phdr_size, ehdr_size, buf_size = 0; - fd = open(filename, block ? O_RDONLY : (O_RDONLY | O_NONBLOCK)); + if (!filename) + return -EFAULT; + if (!is_regular_file(filename)) + return -EWOULDBLOCK; + + fd = open(filename, O_RDONLY); if (fd < 0) return -1; @@ -323,7 +328,7 @@ int dso__load_sym(struct dso *dso, struct map *map __maybe_unused, if (ret >= 0) RC_CHK_ACCESS(dso)->is_64_bit = ret; - if (filename__read_build_id(ss->name, &bid, /*block=*/true) > 0) + if (filename__read_build_id(ss->name, &bid) > 0) dso__set_build_id(dso, &bid); return 0; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index cc26b7bf302b..814f960fa8f8 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -112,9 +112,13 @@ static bool symbol_type__filter(char __symbol_type) // 'N' first seen in: // ffffffff9b35d130 N __pfx__RNCINvNtNtNtCsbDUBuN8AbD4_4core4iter8adapters3map12map_try_foldjNtCs6vVzKs5jPr6_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ // a seemingly Rust mangled name + // Ditto for '1': + // root@x1:~# grep ' 1 ' /proc/kallsyms + // ffffffffb098bc00 1 __pfx__RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ + // ffffffffb098bc10 1 _RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ char symbol_type = toupper(__symbol_type); return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B' || - __symbol_type == 'u' || __symbol_type == 'l' || __symbol_type == 'N'; + __symbol_type == 'u' || __symbol_type == 'l' || __symbol_type == 'N' || __symbol_type == '1'; } static int prefix_underscores_count(const char *str) @@ -951,7 +955,8 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, pos->end -= delta; } - if (count == 0) { + if (map__start(initial_map) <= (pos->start + delta) && + (pos->start + delta) < map__end(initial_map)) { map__zput(curr_map); curr_map = map__get(initial_map); goto add_symbol; @@ -960,11 +965,11 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, if (dso__kernel(dso) == DSO_SPACE__KERNEL_GUEST) snprintf(dso_name, sizeof(dso_name), "[guest.kernel].%d", - kernel_range++); + kernel_range); else snprintf(dso_name, sizeof(dso_name), "[kernel].%d", - kernel_range++); + kernel_range); ndso = dso__new(dso_name); map__zput(curr_map); @@ -972,6 +977,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, return -1; dso__set_kernel(ndso, dso__kernel(dso)); + dso__set_loaded(ndso); curr_map = map__new2(pos->start, ndso); if (curr_map == NULL) { @@ -985,6 +991,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta, dso__put(ndso); return -1; } + dso__put(ndso); ++kernel_range; } else if (delta) { /* Kernel was relocated at boot time */ @@ -1743,14 +1750,13 @@ int dso__load(struct dso *dso, struct map *map) /* * Read the build id if possible. This is required for - * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work. Don't block in case path - * isn't for a regular file. + * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work. */ if (!dso__has_build_id(dso)) { struct build_id bid = { .size = 0, }; __symbol__join_symfs(name, PATH_MAX, dso__long_name(dso)); - if (filename__read_build_id(name, &bid, /*block=*/false) > 0) + if (filename__read_build_id(name, &bid) > 0) dso__set_build_id(dso, &bid); } @@ -2001,6 +2007,7 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) char sbuild_id[SBUILD_ID_SIZE]; bool is_host = false; char path[PATH_MAX]; + struct maps *kmaps = map__kmaps(map); if (!dso__has_build_id(dso)) { /* @@ -2037,8 +2044,13 @@ static char *dso__find_kallsyms(struct dso *dso, struct map *map) return strdup(path); /* Use current /proc/kallsyms if possible */ - if (is_host) { proc_kallsyms: + if (kmaps) { + struct machine *machine = maps__machine(kmaps); + + scnprintf(path, sizeof(path), "%s/proc/kallsyms", machine->root_dir); + return strdup(path); + } else if (is_host) { return strdup("/proc/kallsyms"); } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 347106218799..3fb5d146d9b1 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -140,7 +140,7 @@ struct symbol *dso__next_symbol(struct symbol *sym); enum dso_type dso__type_fd(int fd); -int filename__read_build_id(const char *filename, struct build_id *id, bool block); +int filename__read_build_id(const char *filename, struct build_id *id); int sysfs__read_build_id(const char *filename, struct build_id *bid); int modules__parse(const char *filename, void *arg, int (*process_module)(void *arg, const char *name, diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index fcd1fd13c30e..2ba9fa25e00a 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -389,7 +389,7 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, dso_id.ino_generation = event->ino_generation; dso_id.mmap2_valid = true; dso_id.mmap2_ino_generation_valid = true; - }; + } dso = dsos__findnew_id(&machine->dsos, event->filename, &dso_id); if (dso && dso__has_build_id(dso)) { @@ -401,7 +401,7 @@ static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event, nsi = nsinfo__new(event->pid); nsinfo__mountns_enter(nsi, &nc); - rc = filename__read_build_id(event->filename, &bid, /*block=*/false) > 0 ? 0 : -1; + rc = filename__read_build_id(event->filename, &bid) > 0 ? 0 : -1; nsinfo__mountns_exit(&nc); nsinfo__put(nsi); diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h index ee29615d68e5..f8588b6cf11a 100644 --- a/tools/perf/util/synthetic-events.h +++ b/tools/perf/util/synthetic-events.h @@ -107,24 +107,9 @@ int machine__synthesize_threads(struct machine *machine, struct target *target, struct perf_thread_map *threads, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize); -#ifdef HAVE_AUXTRACE_SUPPORT int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, const struct perf_tool *tool, struct perf_session *session, perf_event__handler_t process); -#else // HAVE_AUXTRACE_SUPPORT - -#include <errno.h> - -static inline int -perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused, - const struct perf_tool *tool __maybe_unused, - struct perf_session *session __maybe_unused, - perf_event__handler_t process __maybe_unused) -{ - return -EINVAL; -} -#endif // HAVE_AUXTRACE_SUPPORT - #ifdef HAVE_LIBBPF_SUPPORT int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, struct machine *machine, struct record_opts *opts); diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index e83c7ababc2a..27ba5849c74a 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -13,7 +13,8 @@ #include <unistd.h> #ifdef HAVE_ZSTD_SUPPORT -static int perf_session__process_compressed_event(struct perf_session *session, +static int perf_session__process_compressed_event(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, union perf_event *event, u64 file_offset, const char *file_path) { @@ -79,10 +80,9 @@ static int perf_session__process_compressed_event(struct perf_session *session, } #endif -static int process_event_synth_tracing_data_stub(struct perf_session *session - __maybe_unused, - union perf_event *event - __maybe_unused) +static int process_event_synth_tracing_data_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -90,8 +90,7 @@ static int process_event_synth_tracing_data_stub(struct perf_session *session static int process_event_synth_attr_stub(const struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, - struct evlist **pevlist - __maybe_unused) + struct evlist **pevlist __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -99,8 +98,7 @@ static int process_event_synth_attr_stub(const struct perf_tool *tool __maybe_un static int process_event_synth_event_update_stub(const struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, - struct evlist **pevlist - __maybe_unused) + struct evlist **pevlist __maybe_unused) { if (dump_trace) perf_event__fprintf_event_update(event, stdout); @@ -151,7 +149,8 @@ static int skipn(int fd, off_t n) return 0; } -static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unused, +static s64 process_event_auxtrace_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, union perf_event *event) { dump_printf(": unhandled!\n"); @@ -160,7 +159,8 @@ static s64 process_event_auxtrace_stub(struct perf_session *session __maybe_unus return event->auxtrace.size; } -static int process_event_op2_stub(struct perf_session *session __maybe_unused, +static int process_event_op2_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, union perf_event *event __maybe_unused) { dump_printf(": unhandled!\n"); @@ -169,7 +169,8 @@ static int process_event_op2_stub(struct perf_session *session __maybe_unused, static -int process_event_thread_map_stub(struct perf_session *session __maybe_unused, +int process_event_thread_map_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, union perf_event *event __maybe_unused) { if (dump_trace) @@ -180,7 +181,8 @@ int process_event_thread_map_stub(struct perf_session *session __maybe_unused, } static -int process_event_cpu_map_stub(struct perf_session *session __maybe_unused, +int process_event_cpu_map_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, union perf_event *event __maybe_unused) { if (dump_trace) @@ -191,7 +193,8 @@ int process_event_cpu_map_stub(struct perf_session *session __maybe_unused, } static -int process_event_stat_config_stub(struct perf_session *session __maybe_unused, +int process_event_stat_config_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, union perf_event *event __maybe_unused) { if (dump_trace) @@ -201,7 +204,8 @@ int process_event_stat_config_stub(struct perf_session *session __maybe_unused, return 0; } -static int process_stat_stub(struct perf_session *perf_session __maybe_unused, +static int process_stat_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *perf_session __maybe_unused, union perf_event *event) { if (dump_trace) @@ -211,7 +215,8 @@ static int process_stat_stub(struct perf_session *perf_session __maybe_unused, return 0; } -static int process_stat_round_stub(struct perf_session *perf_session __maybe_unused, +static int process_stat_round_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *perf_session __maybe_unused, union perf_event *event) { if (dump_trace) @@ -221,7 +226,8 @@ static int process_stat_round_stub(struct perf_session *perf_session __maybe_unu return 0; } -static int process_event_time_conv_stub(struct perf_session *perf_session __maybe_unused, +static int process_event_time_conv_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *perf_session __maybe_unused, union perf_event *event) { if (dump_trace) @@ -231,7 +237,8 @@ static int process_event_time_conv_stub(struct perf_session *perf_session __mayb return 0; } -static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused, +static int perf_session__process_compressed_event_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, union perf_event *event __maybe_unused, u64 file_offset __maybe_unused, const char *file_path __maybe_unused) @@ -240,7 +247,8 @@ static int perf_session__process_compressed_event_stub(struct perf_session *sess return 0; } -static int perf_event__process_bpf_metadata_stub(struct perf_session *perf_session __maybe_unused, +static int perf_event__process_bpf_metadata_stub(const struct perf_tool *tool __maybe_unused, + struct perf_session *perf_session __maybe_unused, union perf_event *event) { if (dump_trace) @@ -258,6 +266,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events) tool->cgroup_events = false; tool->no_warn = false; tool->show_feat_hdr = SHOW_FEAT_NO_HEADER; + tool->merge_deferred_callchains = true; tool->sample = process_event_sample_stub; tool->mmap = process_event_stub; @@ -279,6 +288,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events) tool->read = process_event_sample_stub; tool->throttle = process_event_stub; tool->unthrottle = process_event_stub; + tool->callchain_deferred = process_event_sample_stub; tool->attr = process_event_synth_attr_stub; tool->event_update = process_event_synth_event_update_stub; tool->tracing_data = process_event_synth_tracing_data_stub; @@ -313,3 +323,177 @@ bool perf_tool__compressed_is_stub(const struct perf_tool *tool) { return tool->compressed == perf_session__process_compressed_event_stub; } + +#define CREATE_DELEGATE_SAMPLE(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + union perf_event *event, \ + struct perf_sample *sample, \ + struct evsel *evsel, \ + struct machine *machine) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, event, sample, evsel, machine); \ + } +CREATE_DELEGATE_SAMPLE(read); +CREATE_DELEGATE_SAMPLE(sample); +CREATE_DELEGATE_SAMPLE(callchain_deferred); + +#define CREATE_DELEGATE_ATTR(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + union perf_event *event, \ + struct evlist **pevlist) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, event, pevlist); \ + } +CREATE_DELEGATE_ATTR(attr); +CREATE_DELEGATE_ATTR(event_update); + +#define CREATE_DELEGATE_OE(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + union perf_event *event, \ + struct ordered_events *oe) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, event, oe); \ + } +CREATE_DELEGATE_OE(finished_round); + +#define CREATE_DELEGATE_OP(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + union perf_event *event, \ + struct perf_sample *sample, \ + struct machine *machine) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, event, sample, machine); \ + } +CREATE_DELEGATE_OP(aux); +CREATE_DELEGATE_OP(aux_output_hw_id); +CREATE_DELEGATE_OP(bpf); +CREATE_DELEGATE_OP(cgroup); +CREATE_DELEGATE_OP(comm); +CREATE_DELEGATE_OP(context_switch); +CREATE_DELEGATE_OP(exit); +CREATE_DELEGATE_OP(fork); +CREATE_DELEGATE_OP(itrace_start); +CREATE_DELEGATE_OP(ksymbol); +CREATE_DELEGATE_OP(lost); +CREATE_DELEGATE_OP(lost_samples); +CREATE_DELEGATE_OP(mmap); +CREATE_DELEGATE_OP(mmap2); +CREATE_DELEGATE_OP(namespaces); +CREATE_DELEGATE_OP(text_poke); +CREATE_DELEGATE_OP(throttle); +CREATE_DELEGATE_OP(unthrottle); + +#define CREATE_DELEGATE_OP2(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + struct perf_session *session, \ + union perf_event *event) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, session, event); \ + } +CREATE_DELEGATE_OP2(auxtrace_error); +CREATE_DELEGATE_OP2(auxtrace_info); +CREATE_DELEGATE_OP2(bpf_metadata); +CREATE_DELEGATE_OP2(build_id); +CREATE_DELEGATE_OP2(cpu_map); +CREATE_DELEGATE_OP2(feature); +CREATE_DELEGATE_OP2(finished_init); +CREATE_DELEGATE_OP2(id_index); +CREATE_DELEGATE_OP2(stat); +CREATE_DELEGATE_OP2(stat_config); +CREATE_DELEGATE_OP2(stat_round); +CREATE_DELEGATE_OP2(thread_map); +CREATE_DELEGATE_OP2(time_conv); +CREATE_DELEGATE_OP2(tracing_data); + +#define CREATE_DELEGATE_OP3(name) \ + static s64 delegate_ ## name(const struct perf_tool *tool, \ + struct perf_session *session, \ + union perf_event *event) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, session, event); \ + } +CREATE_DELEGATE_OP3(auxtrace); + +#define CREATE_DELEGATE_OP4(name) \ + static int delegate_ ## name(const struct perf_tool *tool, \ + struct perf_session *session, \ + union perf_event *event, \ + u64 data, \ + const char *str) \ + { \ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); \ + struct perf_tool *delegate = del_tool->delegate; \ + return delegate->name(delegate, session, event, data, str); \ + } +CREATE_DELEGATE_OP4(compressed); + +void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate) +{ + tool->delegate = delegate; + + tool->tool.ordered_events = delegate->ordered_events; + tool->tool.ordering_requires_timestamps = delegate->ordering_requires_timestamps; + tool->tool.namespace_events = delegate->namespace_events; + tool->tool.cgroup_events = delegate->cgroup_events; + tool->tool.no_warn = delegate->no_warn; + tool->tool.show_feat_hdr = delegate->show_feat_hdr; + tool->tool.merge_deferred_callchains = delegate->merge_deferred_callchains; + + tool->tool.sample = delegate_sample; + tool->tool.read = delegate_read; + + tool->tool.mmap = delegate_mmap; + tool->tool.mmap2 = delegate_mmap2; + tool->tool.comm = delegate_comm; + tool->tool.namespaces = delegate_namespaces; + tool->tool.cgroup = delegate_cgroup; + tool->tool.fork = delegate_fork; + tool->tool.exit = delegate_exit; + tool->tool.lost = delegate_lost; + tool->tool.lost_samples = delegate_lost_samples; + tool->tool.aux = delegate_aux; + tool->tool.itrace_start = delegate_itrace_start; + tool->tool.aux_output_hw_id = delegate_aux_output_hw_id; + tool->tool.context_switch = delegate_context_switch; + tool->tool.throttle = delegate_throttle; + tool->tool.unthrottle = delegate_unthrottle; + tool->tool.ksymbol = delegate_ksymbol; + tool->tool.bpf = delegate_bpf; + tool->tool.text_poke = delegate_text_poke; + tool->tool.callchain_deferred = delegate_callchain_deferred; + + tool->tool.attr = delegate_attr; + tool->tool.event_update = delegate_event_update; + + tool->tool.tracing_data = delegate_tracing_data; + + tool->tool.finished_round = delegate_finished_round; + + tool->tool.build_id = delegate_build_id; + tool->tool.id_index = delegate_id_index; + tool->tool.auxtrace_info = delegate_auxtrace_info; + tool->tool.auxtrace_error = delegate_auxtrace_error; + tool->tool.time_conv = delegate_time_conv; + tool->tool.thread_map = delegate_thread_map; + tool->tool.cpu_map = delegate_cpu_map; + tool->tool.stat_config = delegate_stat_config; + tool->tool.stat = delegate_stat; + tool->tool.stat_round = delegate_stat_round; + tool->tool.feature = delegate_feature; + tool->tool.finished_init = delegate_finished_init; + tool->tool.bpf_metadata = delegate_bpf_metadata; + tool->tool.compressed = delegate_compressed; + tool->tool.auxtrace = delegate_auxtrace; +} diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 18b76ff0f26a..e96b69d25a5b 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -26,10 +26,12 @@ typedef int (*event_attr_op)(const struct perf_tool *tool, union perf_event *event, struct evlist **pevlist); -typedef int (*event_op2)(struct perf_session *session, union perf_event *event); -typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event); -typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data, - const char *str); +typedef int (*event_op2)(const struct perf_tool *tool, struct perf_session *session, + union perf_event *event); +typedef s64 (*event_op3)(const struct perf_tool *tool, struct perf_session *session, + union perf_event *event); +typedef int (*event_op4)(const struct perf_tool *tool, struct perf_session *session, + union perf_event *event, u64 data, const char *str); typedef int (*event_oe)(const struct perf_tool *tool, union perf_event *event, struct ordered_events *oe); @@ -42,7 +44,8 @@ enum show_feature_header { struct perf_tool { event_sample sample, - read; + read, + callchain_deferred; event_op mmap, mmap2, comm, @@ -87,6 +90,7 @@ struct perf_tool { bool cgroup_events; bool no_warn; bool dont_split_sample_group; + bool merge_deferred_callchains; enum show_feature_header show_feat_hdr; }; @@ -100,4 +104,13 @@ int process_event_sample_stub(const struct perf_tool *tool, struct evsel *evsel, struct machine *machine); +struct delegate_tool { + /** @tool: The actual tool that calls the delegate. */ + struct perf_tool tool; + /** @delegate: The tool that is delegated to. */ + struct perf_tool *delegate; +}; + +void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate); + #endif /* __PERF_TOOL_H */ diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c index f075098488ba..37c4eae0bef1 100644 --- a/tools/perf/util/tool_pmu.c +++ b/tools/perf/util/tool_pmu.c @@ -2,16 +2,19 @@ #include "cgroup.h" #include "counts.h" #include "cputopo.h" +#include "debug.h" #include "evsel.h" #include "pmu.h" #include "print-events.h" #include "smt.h" +#include "stat.h" #include "time-utils.h" #include "tool_pmu.h" #include "tsc.h" #include <api/fs/fs.h> #include <api/io.h> #include <internal/threadmap.h> +#include <perf/cpumap.h> #include <perf/threadmap.h> #include <fcntl.h> #include <strings.h> @@ -30,6 +33,8 @@ static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = { "slots", "smt_on", "system_tsc_freq", + "core_wide", + "target_cpu", }; bool tool_pmu__skip_event(const char *name __maybe_unused) @@ -106,6 +111,23 @@ const char *evsel__tool_pmu_event_name(const struct evsel *evsel) return tool_pmu__event_to_str(evsel->core.attr.config); } +struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr) +{ + static struct perf_cpu_map *cpu0_map; + enum tool_pmu_event event = (enum tool_pmu_event)attr->config; + + if (event <= TOOL_PMU__EVENT_NONE || event >= TOOL_PMU__EVENT_MAX) { + pr_err("Invalid tool PMU event config %llx\n", attr->config); + return NULL; + } + if (event == TOOL_PMU__EVENT_USER_TIME || event == TOOL_PMU__EVENT_SYSTEM_TIME) + return cpu_map__online(); + + if (!cpu0_map) + cpu0_map = perf_cpu_map__new_int(0); + return perf_cpu_map__get(cpu0_map); +} + static bool read_until_char(struct io *io, char e) { int c; @@ -329,7 +351,11 @@ static bool has_pmem(void) return has_pmem; } -bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result) +bool tool_pmu__read_event(enum tool_pmu_event ev, + struct evsel *evsel, + bool system_wide, + const char *user_requested_cpu_list, + u64 *result) { const struct cpu_topology *topology; @@ -421,6 +447,14 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *resu *result = arch_get_tsc_freq(); return true; + case TOOL_PMU__EVENT_CORE_WIDE: + *result = core_wide(system_wide, user_requested_cpu_list) ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_TARGET_CPU: + *result = system_wide || (user_requested_cpu_list != NULL) ? 1 : 0; + return true; + case TOOL_PMU__EVENT_NONE: case TOOL_PMU__EVENT_DURATION_TIME: case TOOL_PMU__EVENT_USER_TIME: @@ -431,16 +465,39 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *resu } } +static void perf_counts__update(struct perf_counts_values *count, + const struct perf_counts_values *old_count, + bool raw, u64 val) +{ + /* + * The values of enabled and running must make a ratio of 100%. The + * exact values don't matter as long as they are non-zero to avoid + * issues with evsel__count_has_error. + */ + if (old_count) { + count->val = raw ? val : old_count->val + val; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + count->lost = old_count->lost; + } else { + count->val = val; + count->run++; + count->ena++; + count->lost = 0; + } +} + int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) { __u64 *start_time, cur_time, delta_start; - u64 val; - int fd, err = 0; + int err = 0; struct perf_counts_values *count, *old_count = NULL; bool adjust = false; enum tool_pmu_event ev = evsel__tool_event(evsel); count = perf_counts(evsel->counts, cpu_map_idx, thread); + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); switch (ev) { case TOOL_PMU__EVENT_HAS_PMEM: @@ -451,26 +508,23 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) case TOOL_PMU__EVENT_NUM_PACKAGES: case TOOL_PMU__EVENT_SLOTS: case TOOL_PMU__EVENT_SMT_ON: - case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: - if (evsel->prev_raw_counts) - old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); - val = 0; + case TOOL_PMU__EVENT_CORE_WIDE: + case TOOL_PMU__EVENT_TARGET_CPU: + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: { + u64 val = 0; + if (cpu_map_idx == 0 && thread == 0) { - if (!tool_pmu__read_event(ev, evsel, &val)) { + if (!tool_pmu__read_event(ev, evsel, + stat_config.system_wide, + stat_config.user_requested_cpu_list, + &val)) { count->lost++; val = 0; } } - if (old_count) { - count->val = old_count->val + val; - count->run = old_count->run + 1; - count->ena = old_count->ena + 1; - } else { - count->val = val; - count->run++; - count->ena++; - } + perf_counts__update(count, old_count, /*raw=*/false, val); return 0; + } case TOOL_PMU__EVENT_DURATION_TIME: /* * Pretend duration_time is only on the first CPU and thread, or @@ -486,9 +540,9 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) case TOOL_PMU__EVENT_USER_TIME: case TOOL_PMU__EVENT_SYSTEM_TIME: { bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME; + int fd = FD(evsel, cpu_map_idx, thread); start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); - fd = FD(evsel, cpu_map_idx, thread); lseek(fd, SEEK_SET, 0); if (evsel->pid_stat) { /* The event exists solely on 1 CPU. */ @@ -522,17 +576,9 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) if (adjust) { __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); - delta_start *= 1000000000 / ticks_per_sec; + delta_start *= 1e9 / ticks_per_sec; } - count->val = delta_start; - count->lost = 0; - /* - * The values of enabled and running must make a ratio of 100%. The - * exact values don't matter as long as they are non-zero to avoid - * issues with evsel__count_has_error. - */ - count->ena++; - count->run++; + perf_counts__update(count, old_count, /*raw=*/true, delta_start); return 0; } diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h index d642e7d73910..ea343d1983d3 100644 --- a/tools/perf/util/tool_pmu.h +++ b/tools/perf/util/tool_pmu.h @@ -22,6 +22,8 @@ enum tool_pmu_event { TOOL_PMU__EVENT_SLOTS, TOOL_PMU__EVENT_SMT_ON, TOOL_PMU__EVENT_SYSTEM_TSC_FREQ, + TOOL_PMU__EVENT_CORE_WIDE, + TOOL_PMU__EVENT_TARGET_CPU, TOOL_PMU__EVENT_MAX, }; @@ -34,11 +36,17 @@ enum tool_pmu_event tool_pmu__str_to_event(const char *str); bool tool_pmu__skip_event(const char *name); int tool_pmu__num_skip_events(void); -bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result); +bool tool_pmu__read_event(enum tool_pmu_event ev, + struct evsel *evsel, + bool system_wide, + const char *user_requested_cpu_list, + u64 *result); + u64 tool_pmu__cpu_slots_per_cycle(void); bool perf_pmu__is_tool(const struct perf_pmu *pmu); +struct perf_cpu_map *tool_pmu__cpus(struct perf_event_attr *attr); bool evsel__is_tool(const struct evsel *evsel); enum tool_pmu_event evsel__tool_event(const struct evsel *evsel); |
