diff options
Diffstat (limited to 'tools/perf/util')
78 files changed, 4334 insertions, 869 deletions
| diff --git a/tools/perf/util/Build b/tools/perf/util/Build index a3de7916fe63..ea0a452550b0 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -44,7 +44,7 @@ libperf-y += machine.o  libperf-y += map.o  libperf-y += pstack.o  libperf-y += session.o -libperf-$(CONFIG_AUDIT) += syscalltbl.o +libperf-$(CONFIG_TRACE) += syscalltbl.o  libperf-y += ordered-events.o  libperf-y += namespaces.o  libperf-y += comm.o @@ -86,6 +86,14 @@ libperf-$(CONFIG_AUXTRACE) += auxtrace.o  libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/  libperf-$(CONFIG_AUXTRACE) += intel-pt.o  libperf-$(CONFIG_AUXTRACE) += intel-bts.o +libperf-$(CONFIG_AUXTRACE) += arm-spe.o +libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o + +ifdef CONFIG_LIBOPENCSD +libperf-$(CONFIG_AUXTRACE) += cs-etm.o +libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ +endif +  libperf-y += parse-branch-options.o  libperf-y += dump-insn.o  libperf-y += parse-regs-options.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index da1c4c4a0dd8..28b233c3dcbe 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -26,7 +26,6 @@  #include <pthread.h>  #include <linux/bitops.h>  #include <linux/kernel.h> -#include <sys/utsname.h>  #include "sane_ctype.h" @@ -165,7 +164,7 @@ static void ins__delete(struct ins_operands *ops)  static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,  			      struct ins_operands *ops)  { -	return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw); +	return scnprintf(bf, size, "%-6s %s", ins->name, ops->raw);  }  int ins__scnprintf(struct ins *ins, char *bf, size_t size, @@ -230,12 +229,12 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size,  			   struct ins_operands *ops)  {  	if (ops->target.name) -		return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->target.name); +		return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name);  	if (ops->target.addr == 0)  		return ins__raw_scnprintf(ins, bf, size, ops); -	return scnprintf(bf, size, "%-6.6s *%" PRIx64, ins->name, ops->target.addr); +	return scnprintf(bf, size, "%-6s *%" PRIx64, ins->name, ops->target.addr);  }  static struct ins_ops call_ops = { @@ -299,7 +298,7 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size,  			c++;  	} -	return scnprintf(bf, size, "%-6.6s %.*s%" PRIx64, +	return scnprintf(bf, size, "%-6s %.*s%" PRIx64,  			 ins->name, c ? c - ops->raw : 0, ops->raw,  			 ops->target.offset);  } @@ -322,6 +321,8 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)  		return 0;  	*addrp = strtoull(comment, &endptr, 16); +	if (endptr == comment) +		return 0;  	name = strchr(endptr, '<');  	if (name == NULL)  		return -1; @@ -372,7 +373,7 @@ static int lock__scnprintf(struct ins *ins, char *bf, size_t size,  	if (ops->locked.ins.ops == NULL)  		return ins__raw_scnprintf(ins, bf, size, ops); -	printed = scnprintf(bf, size, "%-6.6s ", ins->name); +	printed = scnprintf(bf, size, "%-6s ", ins->name);  	return printed + ins__scnprintf(&ops->locked.ins, bf + printed,  					size - printed, ops->locked.ops);  } @@ -435,8 +436,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *m  		return 0;  	comment = ltrim(comment); -	comment__symbol(ops->source.raw, comment, &ops->source.addr, &ops->source.name); -	comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); +	comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); +	comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);  	return 0; @@ -448,7 +449,7 @@ out_free_source:  static int mov__scnprintf(struct ins *ins, char *bf, size_t size,  			   struct ins_operands *ops)  { -	return scnprintf(bf, size, "%-6.6s %s,%s", ins->name, +	return scnprintf(bf, size, "%-6s %s,%s", ins->name,  			 ops->source.name ?: ops->source.raw,  			 ops->target.name ?: ops->target.raw);  } @@ -480,7 +481,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops  		return 0;  	comment = ltrim(comment); -	comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); +	comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);  	return 0;  } @@ -488,7 +489,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops  static int dec__scnprintf(struct ins *ins, char *bf, size_t size,  			   struct ins_operands *ops)  { -	return scnprintf(bf, size, "%-6.6s %s", ins->name, +	return scnprintf(bf, size, "%-6s %s", ins->name,  			 ops->target.name ?: ops->target.raw);  } @@ -500,7 +501,7 @@ static struct ins_ops dec_ops = {  static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,  			  struct ins_operands *ops __maybe_unused)  { -	return scnprintf(bf, size, "%-6.6s", "nop"); +	return scnprintf(bf, size, "%-6s", "nop");  }  static struct ins_ops nop_ops = { @@ -878,32 +879,99 @@ out_free_name:  	return -1;  } -static struct disasm_line *disasm_line__new(s64 offset, char *line, -					    size_t privsize, int line_nr, -					    struct arch *arch, -					    struct map *map) +struct annotate_args { +	size_t			 privsize; +	struct arch		*arch; +	struct map		*map; +	struct perf_evsel	*evsel; +	s64			 offset; +	char			*line; +	int			 line_nr; +}; + +static void annotation_line__delete(struct annotation_line *al)  { -	struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); +	void *ptr = (void *) al - al->privsize; + +	free_srcline(al->path); +	zfree(&al->line); +	free(ptr); +} + +/* + * Allocating the annotation line data with following + * structure: + * + *    -------------------------------------- + *    private space | struct annotation_line + *    -------------------------------------- + * + * Size of the private space is stored in 'struct annotation_line'. + * + */ +static struct annotation_line * +annotation_line__new(struct annotate_args *args, size_t privsize) +{ +	struct annotation_line *al; +	struct perf_evsel *evsel = args->evsel; +	size_t size = privsize + sizeof(*al); +	int nr = 1; + +	if (perf_evsel__is_group_event(evsel)) +		nr = evsel->nr_members; + +	size += sizeof(al->samples[0]) * nr; + +	al = zalloc(size); +	if (al) { +		al = (void *) al + privsize; +		al->privsize   = privsize; +		al->offset     = args->offset; +		al->line       = strdup(args->line); +		al->line_nr    = args->line_nr; +		al->samples_nr = nr; +	} + +	return al; +} + +/* + * Allocating the disasm annotation line data with + * following structure: + * + *    ------------------------------------------------------------ + *    privsize space | struct disasm_line | struct annotation_line + *    ------------------------------------------------------------ + * + * We have 'struct annotation_line' member as last member + * of 'struct disasm_line' to have an easy access. + * + */ +static struct disasm_line *disasm_line__new(struct annotate_args *args) +{ +	struct disasm_line *dl = NULL; +	struct annotation_line *al; +	size_t privsize = args->privsize + offsetof(struct disasm_line, al); + +	al = annotation_line__new(args, privsize); +	if (al != NULL) { +		dl = disasm_line(al); -	if (dl != NULL) { -		dl->offset = offset; -		dl->line = strdup(line); -		dl->line_nr = line_nr; -		if (dl->line == NULL) +		if (dl->al.line == NULL)  			goto out_delete; -		if (offset != -1) { -			if (disasm_line__parse(dl->line, &dl->ins.name, &dl->ops.raw) < 0) +		if (args->offset != -1) { +			if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)  				goto out_free_line; -			disasm_line__init_ins(dl, arch, map); +			disasm_line__init_ins(dl, args->arch, args->map);  		}  	}  	return dl;  out_free_line: -	zfree(&dl->line); +	zfree(&dl->al.line);  out_delete:  	free(dl);  	return NULL; @@ -911,30 +979,30 @@ out_delete:  void disasm_line__free(struct disasm_line *dl)  { -	zfree(&dl->line);  	if (dl->ins.ops && dl->ins.ops->free)  		dl->ins.ops->free(&dl->ops);  	else  		ins__delete(&dl->ops);  	free((void *)dl->ins.name);  	dl->ins.name = NULL; -	free(dl); +	annotation_line__delete(&dl->al);  }  int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw)  {  	if (raw || !dl->ins.ops) -		return scnprintf(bf, size, "%-6.6s %s", dl->ins.name, dl->ops.raw); +		return scnprintf(bf, size, "%-6s %s", dl->ins.name, dl->ops.raw);  	return ins__scnprintf(&dl->ins, bf, size, &dl->ops);  } -static void disasm__add(struct list_head *head, struct disasm_line *line) +static void annotation_line__add(struct annotation_line *al, struct list_head *head)  { -	list_add_tail(&line->node, head); +	list_add_tail(&al->node, head);  } -struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos) +struct annotation_line * +annotation_line__next(struct annotation_line *pos, struct list_head *head)  {  	list_for_each_entry_continue(pos, head, node)  		if (pos->offset >= 0) @@ -943,50 +1011,6 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa  	return NULL;  } -double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, -			    s64 end, const char **path, struct sym_hist_entry *sample) -{ -	struct source_line *src_line = notes->src->lines; -	double percent = 0.0; - -	sample->nr_samples = sample->period = 0; - -	if (src_line) { -		size_t sizeof_src_line = sizeof(*src_line) + -				sizeof(src_line->samples) * (src_line->nr_pcnt - 1); - -		while (offset < end) { -			src_line = (void *)notes->src->lines + -					(sizeof_src_line * offset); - -			if (*path == NULL) -				*path = src_line->path; - -			percent += src_line->samples[evidx].percent; -			sample->nr_samples += src_line->samples[evidx].nr; -			offset++; -		} -	} else { -		struct sym_hist *h = annotation__histogram(notes, evidx); -		unsigned int hits = 0; -		u64 period = 0; - -		while (offset < end) { -			hits   += h->addr[offset].nr_samples; -			period += h->addr[offset].period; -			++offset; -		} - -		if (h->nr_samples) { -			sample->period	   = period; -			sample->nr_samples = hits; -			percent = 100.0 * hits / h->nr_samples; -		} -	} - -	return percent; -} -  static const char *annotate__address_color(struct block_range *br)  {  	double cov = block_range__coverage(br); @@ -1069,50 +1093,39 @@ static void annotate__branch_printf(struct block_range *br, u64 addr)  	}  } +static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_width) +{ +	s64 offset = dl->al.offset; +	const u64 addr = start + offset; +	struct block_range *br; + +	br = block_range__find(addr); +	color_fprintf(stdout, annotate__address_color(br), "  %*" PRIx64 ":", addr_fmt_width, addr); +	color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); +	annotate__branch_printf(br, addr); +	return 0; +} -static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start, -		      struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, -		      int max_lines, struct disasm_line *queue) +static int +annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, +		       struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, +		       int max_lines, struct annotation_line *queue, int addr_fmt_width)  { +	struct disasm_line *dl = container_of(al, struct disasm_line, al);  	static const char *prev_line;  	static const char *prev_color; -	if (dl->offset != -1) { -		const char *path = NULL; -		double percent, max_percent = 0.0; -		double *ppercents = &percent; -		struct sym_hist_entry sample; -		struct sym_hist_entry *psamples = &sample; +	if (al->offset != -1) { +		double max_percent = 0.0;  		int i, nr_percent = 1;  		const char *color;  		struct annotation *notes = symbol__annotation(sym); -		s64 offset = dl->offset; -		const u64 addr = start + offset; -		struct disasm_line *next; -		struct block_range *br; - -		next = disasm__get_next_ip_line(¬es->src->source, dl); - -		if (perf_evsel__is_group_event(evsel)) { -			nr_percent = evsel->nr_members; -			ppercents = calloc(nr_percent, sizeof(double)); -			psamples = calloc(nr_percent, sizeof(struct sym_hist_entry)); -			if (ppercents == NULL || psamples == NULL) { -				return -1; -			} -		} -		for (i = 0; i < nr_percent; i++) { -			percent = disasm__calc_percent(notes, -					notes->src->lines ? i : evsel->idx + i, -					offset, -					next ? next->offset : (s64) len, -					&path, &sample); - -			ppercents[i] = percent; -			psamples[i] = sample; -			if (percent > max_percent) -				max_percent = percent; +		for (i = 0; i < al->samples_nr; i++) { +			struct annotation_data *sample = &al->samples[i]; + +			if (sample->percent > max_percent) +				max_percent = sample->percent;  		}  		if (max_percent < min_pcnt) @@ -1123,10 +1136,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st  		if (queue != NULL) {  			list_for_each_entry_from(queue, ¬es->src->source, node) { -				if (queue == dl) +				if (queue == al)  					break; -				disasm_line__print(queue, sym, start, evsel, len, -						    0, 0, 1, NULL); +				annotation_line__print(queue, sym, start, evsel, len, +						       0, 0, 1, NULL, addr_fmt_width);  			}  		} @@ -1137,44 +1150,34 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st  		 * the same color than the percentage. Don't print it  		 * twice for close colored addr with the same filename:line  		 */ -		if (path) { -			if (!prev_line || strcmp(prev_line, path) +		if (al->path) { +			if (!prev_line || strcmp(prev_line, al->path)  				       || color != prev_color) { -				color_fprintf(stdout, color, " %s", path); -				prev_line = path; +				color_fprintf(stdout, color, " %s", al->path); +				prev_line = al->path;  				prev_color = color;  			}  		}  		for (i = 0; i < nr_percent; i++) { -			percent = ppercents[i]; -			sample = psamples[i]; -			color = get_percent_color(percent); +			struct annotation_data *sample = &al->samples[i]; + +			color = get_percent_color(sample->percent);  			if (symbol_conf.show_total_period)  				color_fprintf(stdout, color, " %11" PRIu64, -					      sample.period); +					      sample->he.period);  			else if (symbol_conf.show_nr_samples)  				color_fprintf(stdout, color, " %7" PRIu64, -					      sample.nr_samples); +					      sample->he.nr_samples);  			else -				color_fprintf(stdout, color, " %7.2f", percent); +				color_fprintf(stdout, color, " %7.2f", sample->percent);  		} -		printf(" :	"); +		printf(" : "); -		br = block_range__find(addr); -		color_fprintf(stdout, annotate__address_color(br), "  %" PRIx64 ":", addr); -		color_fprintf(stdout, annotate__asm_color(br), "%s", dl->line); -		annotate__branch_printf(br, addr); +		disasm_line__print(dl, start, addr_fmt_width);  		printf("\n"); - -		if (ppercents != &percent) -			free(ppercents); - -		if (psamples != &sample) -			free(psamples); -  	} else if (max_lines && printed >= max_lines)  		return 1;  	else { @@ -1186,10 +1189,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st  		if (perf_evsel__is_group_event(evsel))  			width *= evsel->nr_members; -		if (!*dl->line) +		if (!*al->line)  			printf(" %*s:\n", width, " ");  		else -			printf(" %*s:	%s\n", width, " ", dl->line); +			printf(" %*s:     %*s %s\n", width, " ", addr_fmt_width, " ", al->line);  	}  	return 0; @@ -1215,11 +1218,11 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st   * means that it's not a disassembly line so should be treated differently.   * The ops.raw part will be parsed further according to type of the instruction.   */ -static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, -				      struct arch *arch, -				      FILE *file, size_t privsize, +static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, +				      struct annotate_args *args,  				      int *line_nr)  { +	struct map *map = args->map;  	struct annotation *notes = symbol__annotation(sym);  	struct disasm_line *dl;  	char *line = NULL, *parsed_line, *tmp, *tmp2; @@ -1263,7 +1266,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,  			parsed_line = tmp2 + 1;  	} -	dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, arch, map); +	args->offset  = offset; +	args->line    = parsed_line; +	args->line_nr = *line_nr; + +	dl = disasm_line__new(args);  	free(line);  	(*line_nr)++; @@ -1288,7 +1295,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,  			dl->ops.target.name = strdup(target.sym->name);  	} -	disasm__add(¬es->src->source, dl); +	annotation_line__add(&dl->al, ¬es->src->source);  	return 0;  } @@ -1305,19 +1312,19 @@ static void delete_last_nop(struct symbol *sym)  	struct disasm_line *dl;  	while (!list_empty(list)) { -		dl = list_entry(list->prev, struct disasm_line, node); +		dl = list_entry(list->prev, struct disasm_line, al.node);  		if (dl->ins.ops) {  			if (dl->ins.ops != &nop_ops)  				return;  		} else { -			if (!strstr(dl->line, " nop ") && -			    !strstr(dl->line, " nopl ") && -			    !strstr(dl->line, " nopw ")) +			if (!strstr(dl->al.line, " nop ") && +			    !strstr(dl->al.line, " nopl ") && +			    !strstr(dl->al.line, " nopw "))  				return;  		} -		list_del(&dl->node); +		list_del(&dl->al.node);  		disasm_line__free(dl);  	}  } @@ -1412,25 +1419,11 @@ fallback:  	return 0;  } -static const char *annotate__norm_arch(const char *arch_name) -{ -	struct utsname uts; - -	if (!arch_name) { /* Assume we are annotating locally. */ -		if (uname(&uts) < 0) -			return NULL; -		arch_name = uts.machine; -	} -	return normalize_arch((char *)arch_name); -} - -int symbol__disassemble(struct symbol *sym, struct map *map, -			const char *arch_name, size_t privsize, -			struct arch **parch, char *cpuid) +static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)  { +	struct map *map = args->map;  	struct dso *dso = map->dso;  	char command[PATH_MAX * 2]; -	struct arch *arch = NULL;  	FILE *file;  	char symfs_filename[PATH_MAX];  	struct kcore_extract kce; @@ -1444,25 +1437,6 @@ int symbol__disassemble(struct symbol *sym, struct map *map,  	if (err)  		return err; -	arch_name = annotate__norm_arch(arch_name); -	if (!arch_name) -		return -1; - -	arch = arch__find(arch_name); -	if (arch == NULL) -		return -ENOTSUP; - -	if (parch) -		*parch = arch; - -	if (arch->init) { -		err = arch->init(arch, cpuid); -		if (err) { -			pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); -			return err; -		} -	} -  	pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,  		 symfs_filename, sym->name, map->unmap_ip(map, sym->start),  		 map->unmap_ip(map, sym->end)); @@ -1546,8 +1520,7 @@ int symbol__disassemble(struct symbol *sym, struct map *map,  		 * can associate it with the instructions till the next one.  		 * See disasm_line__new() and struct disasm_line::line_nr.  		 */ -		if (symbol__parse_objdump_line(sym, map, arch, file, privsize, -			    &lineno) < 0) +		if (symbol__parse_objdump_line(sym, file, args, &lineno) < 0)  			break;  		nline++;  	} @@ -1580,21 +1553,110 @@ out_close_stdout:  	goto out_remove_tmp;  } -static void insert_source_line(struct rb_root *root, struct source_line *src_line) +static void calc_percent(struct sym_hist *hist, +			 struct annotation_data *sample, +			 s64 offset, s64 end) +{ +	unsigned int hits = 0; +	u64 period = 0; + +	while (offset < end) { +		hits   += hist->addr[offset].nr_samples; +		period += hist->addr[offset].period; +		++offset; +	} + +	if (hist->nr_samples) { +		sample->he.period     = period; +		sample->he.nr_samples = hits; +		sample->percent = 100.0 * hits / hist->nr_samples; +	} +} + +static void annotation__calc_percent(struct annotation *notes, +				     struct perf_evsel *evsel, s64 len) +{ +	struct annotation_line *al, *next; + +	list_for_each_entry(al, ¬es->src->source, node) { +		s64 end; +		int i; + +		if (al->offset == -1) +			continue; + +		next = annotation_line__next(al, ¬es->src->source); +		end  = next ? next->offset : len; + +		for (i = 0; i < al->samples_nr; i++) { +			struct annotation_data *sample; +			struct sym_hist *hist; + +			hist   = annotation__histogram(notes, evsel->idx + i); +			sample = &al->samples[i]; + +			calc_percent(hist, sample, al->offset, end); +		} +	} +} + +void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) +{ +	struct annotation *notes = symbol__annotation(sym); + +	annotation__calc_percent(notes, evsel, symbol__size(sym)); +} + +int symbol__annotate(struct symbol *sym, struct map *map, +		     struct perf_evsel *evsel, size_t privsize, +		     struct arch **parch)  { -	struct source_line *iter; +	struct annotate_args args = { +		.privsize	= privsize, +		.map		= map, +		.evsel		= evsel, +	}; +	struct perf_env *env = perf_evsel__env(evsel); +	const char *arch_name = perf_env__arch(env); +	struct arch *arch; +	int err; + +	if (!arch_name) +		return -1; + +	args.arch = arch = arch__find(arch_name); +	if (arch == NULL) +		return -ENOTSUP; + +	if (parch) +		*parch = arch; + +	if (arch->init) { +		err = arch->init(arch, env ? env->cpuid : NULL); +		if (err) { +			pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); +			return err; +		} +	} + +	return symbol__disassemble(sym, &args); +} + +static void insert_source_line(struct rb_root *root, struct annotation_line *al) +{ +	struct annotation_line *iter;  	struct rb_node **p = &root->rb_node;  	struct rb_node *parent = NULL;  	int i, ret;  	while (*p != NULL) {  		parent = *p; -		iter = rb_entry(parent, struct source_line, node); +		iter = rb_entry(parent, struct annotation_line, rb_node); -		ret = strcmp(iter->path, src_line->path); +		ret = strcmp(iter->path, al->path);  		if (ret == 0) { -			for (i = 0; i < src_line->nr_pcnt; i++) -				iter->samples[i].percent_sum += src_line->samples[i].percent; +			for (i = 0; i < al->samples_nr; i++) +				iter->samples[i].percent_sum += al->samples[i].percent;  			return;  		} @@ -1604,18 +1666,18 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin  			p = &(*p)->rb_right;  	} -	for (i = 0; i < src_line->nr_pcnt; i++) -		src_line->samples[i].percent_sum = src_line->samples[i].percent; +	for (i = 0; i < al->samples_nr; i++) +		al->samples[i].percent_sum = al->samples[i].percent; -	rb_link_node(&src_line->node, parent, p); -	rb_insert_color(&src_line->node, root); +	rb_link_node(&al->rb_node, parent, p); +	rb_insert_color(&al->rb_node, root);  } -static int cmp_source_line(struct source_line *a, struct source_line *b) +static int cmp_source_line(struct annotation_line *a, struct annotation_line *b)  {  	int i; -	for (i = 0; i < a->nr_pcnt; i++) { +	for (i = 0; i < a->samples_nr; i++) {  		if (a->samples[i].percent_sum == b->samples[i].percent_sum)  			continue;  		return a->samples[i].percent_sum > b->samples[i].percent_sum; @@ -1624,135 +1686,47 @@ static int cmp_source_line(struct source_line *a, struct source_line *b)  	return 0;  } -static void __resort_source_line(struct rb_root *root, struct source_line *src_line) +static void __resort_source_line(struct rb_root *root, struct annotation_line *al)  { -	struct source_line *iter; +	struct annotation_line *iter;  	struct rb_node **p = &root->rb_node;  	struct rb_node *parent = NULL;  	while (*p != NULL) {  		parent = *p; -		iter = rb_entry(parent, struct source_line, node); +		iter = rb_entry(parent, struct annotation_line, rb_node); -		if (cmp_source_line(src_line, iter)) +		if (cmp_source_line(al, iter))  			p = &(*p)->rb_left;  		else  			p = &(*p)->rb_right;  	} -	rb_link_node(&src_line->node, parent, p); -	rb_insert_color(&src_line->node, root); +	rb_link_node(&al->rb_node, parent, p); +	rb_insert_color(&al->rb_node, root);  }  static void resort_source_line(struct rb_root *dest_root, struct rb_root *src_root)  { -	struct source_line *src_line; +	struct annotation_line *al;  	struct rb_node *node;  	node = rb_first(src_root);  	while (node) {  		struct rb_node *next; -		src_line = rb_entry(node, struct source_line, node); +		al = rb_entry(node, struct annotation_line, rb_node);  		next = rb_next(node);  		rb_erase(node, src_root); -		__resort_source_line(dest_root, src_line); +		__resort_source_line(dest_root, al);  		node = next;  	}  } -static void symbol__free_source_line(struct symbol *sym, int len) -{ -	struct annotation *notes = symbol__annotation(sym); -	struct source_line *src_line = notes->src->lines; -	size_t sizeof_src_line; -	int i; - -	sizeof_src_line = sizeof(*src_line) + -			  (sizeof(src_line->samples) * (src_line->nr_pcnt - 1)); - -	for (i = 0; i < len; i++) { -		free_srcline(src_line->path); -		src_line = (void *)src_line + sizeof_src_line; -	} - -	zfree(¬es->src->lines); -} - -/* Get the filename:line for the colored entries */ -static int symbol__get_source_line(struct symbol *sym, struct map *map, -				   struct perf_evsel *evsel, -				   struct rb_root *root, int len) -{ -	u64 start; -	int i, k; -	int evidx = evsel->idx; -	struct source_line *src_line; -	struct annotation *notes = symbol__annotation(sym); -	struct sym_hist *h = annotation__histogram(notes, evidx); -	struct rb_root tmp_root = RB_ROOT; -	int nr_pcnt = 1; -	u64 nr_samples = h->nr_samples; -	size_t sizeof_src_line = sizeof(struct source_line); - -	if (perf_evsel__is_group_event(evsel)) { -		for (i = 1; i < evsel->nr_members; i++) { -			h = annotation__histogram(notes, evidx + i); -			nr_samples += h->nr_samples; -		} -		nr_pcnt = evsel->nr_members; -		sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples); -	} - -	if (!nr_samples) -		return 0; - -	src_line = notes->src->lines = calloc(len, sizeof_src_line); -	if (!notes->src->lines) -		return -1; - -	start = map__rip_2objdump(map, sym->start); - -	for (i = 0; i < len; i++) { -		u64 offset; -		double percent_max = 0.0; - -		src_line->nr_pcnt = nr_pcnt; - -		for (k = 0; k < nr_pcnt; k++) { -			double percent = 0.0; - -			h = annotation__histogram(notes, evidx + k); -			nr_samples = h->addr[i].nr_samples; -			if (h->nr_samples) -				percent = 100.0 * nr_samples / h->nr_samples; - -			if (percent > percent_max) -				percent_max = percent; -			src_line->samples[k].percent = percent; -			src_line->samples[k].nr = nr_samples; -		} - -		if (percent_max <= 0.5) -			goto next; - -		offset = start + i; -		src_line->path = get_srcline(map->dso, offset, NULL, -					     false, true); -		insert_source_line(&tmp_root, src_line); - -	next: -		src_line = (void *)src_line + sizeof_src_line; -	} - -	resort_source_line(root, &tmp_root); -	return 0; -} -  static void print_summary(struct rb_root *root, const char *filename)  { -	struct source_line *src_line; +	struct annotation_line *al;  	struct rb_node *node;  	printf("\nSorted summary for file %s\n", filename); @@ -1770,9 +1744,9 @@ static void print_summary(struct rb_root *root, const char *filename)  		char *path;  		int i; -		src_line = rb_entry(node, struct source_line, node); -		for (i = 0; i < src_line->nr_pcnt; i++) { -			percent = src_line->samples[i].percent_sum; +		al = rb_entry(node, struct annotation_line, rb_node); +		for (i = 0; i < al->samples_nr; i++) { +			percent = al->samples[i].percent_sum;  			color = get_percent_color(percent);  			color_fprintf(stdout, color, " %7.2f", percent); @@ -1780,7 +1754,7 @@ static void print_summary(struct rb_root *root, const char *filename)  				percent_max = percent;  		} -		path = src_line->path; +		path = al->path;  		color = get_percent_color(percent_max);  		color_fprintf(stdout, color, " %s\n", path); @@ -1801,6 +1775,19 @@ static void symbol__annotate_hits(struct symbol *sym, struct perf_evsel *evsel)  	printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples);  } +static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) +{ +	char bf[32]; +	struct annotation_line *line; + +	list_for_each_entry_reverse(line, lines, node) { +		if (line->offset != -1) +			return scnprintf(bf, sizeof(bf), "%" PRIx64, start + line->offset); +	} + +	return 0; +} +  int symbol__annotate_printf(struct symbol *sym, struct map *map,  			    struct perf_evsel *evsel, bool full_paths,  			    int min_pcnt, int max_lines, int context) @@ -1811,9 +1798,9 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,  	const char *evsel_name = perf_evsel__name(evsel);  	struct annotation *notes = symbol__annotation(sym);  	struct sym_hist *h = annotation__histogram(notes, evsel->idx); -	struct disasm_line *pos, *queue = NULL; +	struct annotation_line *pos, *queue = NULL;  	u64 start = map__rip_2objdump(map, sym->start); -	int printed = 2, queue_len = 0; +	int printed = 2, queue_len = 0, addr_fmt_width;  	int more = 0;  	u64 len;  	int width = symbol_conf.show_total_period ? 12 : 8; @@ -1844,15 +1831,21 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,  	if (verbose > 0)  		symbol__annotate_hits(sym, evsel); +	addr_fmt_width = annotated_source__addr_fmt_width(¬es->src->source, start); +  	list_for_each_entry(pos, ¬es->src->source, node) { +		int err; +  		if (context && queue == NULL) {  			queue = pos;  			queue_len = 0;  		} -		switch (disasm_line__print(pos, sym, start, evsel, len, -					    min_pcnt, printed, max_lines, -					    queue)) { +		err = annotation_line__print(pos, sym, start, evsel, len, +					     min_pcnt, printed, max_lines, +					     queue, addr_fmt_width); + +		switch (err) {  		case 0:  			++printed;  			if (context) { @@ -1907,13 +1900,13 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)  	}  } -void disasm__purge(struct list_head *head) +void annotated_source__purge(struct annotated_source *as)  { -	struct disasm_line *pos, *n; +	struct annotation_line *al, *n; -	list_for_each_entry_safe(pos, n, head, node) { -		list_del(&pos->node); -		disasm_line__free(pos); +	list_for_each_entry_safe(al, n, &as->source, node) { +		list_del(&al->node); +		disasm_line__free(disasm_line(al));  	}  } @@ -1921,10 +1914,10 @@ static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp)  {  	size_t printed; -	if (dl->offset == -1) -		return fprintf(fp, "%s\n", dl->line); +	if (dl->al.offset == -1) +		return fprintf(fp, "%s\n", dl->al.line); -	printed = fprintf(fp, "%#" PRIx64 " %s", dl->offset, dl->ins.name); +	printed = fprintf(fp, "%#" PRIx64 " %s", dl->al.offset, dl->ins.name);  	if (dl->ops.raw[0] != '\0') {  		printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ", @@ -1939,38 +1932,73 @@ size_t disasm__fprintf(struct list_head *head, FILE *fp)  	struct disasm_line *pos;  	size_t printed = 0; -	list_for_each_entry(pos, head, node) +	list_for_each_entry(pos, head, al.node)  		printed += disasm_line__fprintf(pos, fp);  	return printed;  } +static void annotation__calc_lines(struct annotation *notes, struct map *map, +				  struct rb_root *root, u64 start) +{ +	struct annotation_line *al; +	struct rb_root tmp_root = RB_ROOT; + +	list_for_each_entry(al, ¬es->src->source, node) { +		double percent_max = 0.0; +		int i; + +		for (i = 0; i < al->samples_nr; i++) { +			struct annotation_data *sample; + +			sample = &al->samples[i]; + +			if (sample->percent > percent_max) +				percent_max = sample->percent; +		} + +		if (percent_max <= 0.5) +			continue; + +		al->path = get_srcline(map->dso, start + al->offset, NULL, +				       false, true, start + al->offset); +		insert_source_line(&tmp_root, al); +	} + +	resort_source_line(root, &tmp_root); +} + +static void symbol__calc_lines(struct symbol *sym, struct map *map, +			      struct rb_root *root) +{ +	struct annotation *notes = symbol__annotation(sym); +	u64 start = map__rip_2objdump(map, sym->start); + +	annotation__calc_lines(notes, map, root, start); +} +  int symbol__tty_annotate(struct symbol *sym, struct map *map,  			 struct perf_evsel *evsel, bool print_lines,  			 bool full_paths, int min_pcnt, int max_lines)  {  	struct dso *dso = map->dso;  	struct rb_root source_line = RB_ROOT; -	u64 len; -	if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), -				0, NULL, NULL) < 0) +	if (symbol__annotate(sym, map, evsel, 0, NULL) < 0)  		return -1; -	len = symbol__size(sym); +	symbol__calc_percent(sym, evsel);  	if (print_lines) {  		srcline_full_filename = full_paths; -		symbol__get_source_line(sym, map, evsel, &source_line, len); +		symbol__calc_lines(sym, map, &source_line);  		print_summary(&source_line, dso->long_name);  	}  	symbol__annotate_printf(sym, map, evsel, full_paths,  				min_pcnt, max_lines, 0); -	if (print_lines) -		symbol__free_source_line(sym, len); -	disasm__purge(&symbol__annotation(sym)->src->source); +	annotated_source__purge(symbol__annotation(sym)->src);  	return 0;  } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index f6ba3560de5e..ce427445671f 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -59,33 +59,55 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);  struct annotation; +struct sym_hist_entry { +	u64		nr_samples; +	u64		period; +}; + +struct annotation_data { +	double			 percent; +	double			 percent_sum; +	struct sym_hist_entry	 he; +}; + +struct annotation_line { +	struct list_head	 node; +	struct rb_node		 rb_node; +	s64			 offset; +	char			*line; +	int			 line_nr; +	float			 ipc; +	u64			 cycles; +	size_t			 privsize; +	char			*path; +	int			 samples_nr; +	struct annotation_data	 samples[0]; +}; +  struct disasm_line { -	struct list_head    node; -	s64		    offset; -	char		    *line; -	struct ins	    ins; -	int		    line_nr; -	float		    ipc; -	u64		    cycles; -	struct ins_operands ops; +	struct ins		 ins; +	struct ins_operands	 ops; + +	/* This needs to be at the end. */ +	struct annotation_line	 al;  }; +static inline struct disasm_line *disasm_line(struct annotation_line *al) +{ +	return al ? container_of(al, struct disasm_line, al) : NULL; +} +  static inline bool disasm_line__has_offset(const struct disasm_line *dl)  {  	return dl->ops.target.offset_avail;  } -struct sym_hist_entry { -	u64		nr_samples; -	u64		period; -}; -  void disasm_line__free(struct disasm_line *dl); -struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos); +struct annotation_line * +annotation_line__next(struct annotation_line *pos, struct list_head *head);  int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);  size_t disasm__fprintf(struct list_head *head, FILE *fp); -double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, -			    s64 end, const char **path, struct sym_hist_entry *sample); +void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel);  struct sym_hist {  	u64		      nr_samples; @@ -104,19 +126,6 @@ struct cyc_hist {  	u16	reset;  }; -struct source_line_samples { -	double		percent; -	double		percent_sum; -	u64		nr; -}; - -struct source_line { -	struct rb_node	node; -	char		*path; -	int		nr_pcnt; -	struct source_line_samples samples[1]; -}; -  /** struct annotated_source - symbols with hits have this attached as in sannotation   *   * @histogram: Array of addr hit histograms per event being monitored @@ -132,7 +141,6 @@ struct source_line {   */  struct annotated_source {  	struct list_head   source; -	struct source_line *lines;  	int    		   nr_histograms;  	size_t		   sizeof_sym_hist;  	struct cyc_hist	   *cycles_hist; @@ -169,9 +177,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *samp  int symbol__alloc_hist(struct symbol *sym);  void symbol__annotate_zero_histograms(struct symbol *sym); -int symbol__disassemble(struct symbol *sym, struct map *map, -			const char *arch_name, size_t privsize, -			struct arch **parch, char *cpuid); +int symbol__annotate(struct symbol *sym, struct map *map, +		     struct perf_evsel *evsel, size_t privsize, +		     struct arch **parch);  enum symbol_disassemble_errno {  	SYMBOL_ANNOTATE_ERRNO__SUCCESS		= 0, @@ -198,7 +206,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map,  			    int min_pcnt, int max_lines, int context);  void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);  void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); -void disasm__purge(struct list_head *head); +void annotated_source__purge(struct annotated_source *as);  bool ui__has_annotation(void); diff --git a/tools/perf/util/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-pkt-decoder.c new file mode 100644 index 000000000000..b94001b756c7 --- /dev/null +++ b/tools/perf/util/arm-spe-pkt-decoder.c @@ -0,0 +1,462 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#include <stdio.h> +#include <string.h> +#include <endian.h> +#include <byteswap.h> + +#include "arm-spe-pkt-decoder.h" + +#define BIT(n)		(1ULL << (n)) + +#define NS_FLAG		BIT(63) +#define EL_FLAG		(BIT(62) | BIT(61)) + +#define SPE_HEADER0_PAD			0x0 +#define SPE_HEADER0_END			0x1 +#define SPE_HEADER0_ADDRESS		0x30 /* address packet (short) */ +#define SPE_HEADER0_ADDRESS_MASK	0x38 +#define SPE_HEADER0_COUNTER		0x18 /* counter packet (short) */ +#define SPE_HEADER0_COUNTER_MASK	0x38 +#define SPE_HEADER0_TIMESTAMP		0x71 +#define SPE_HEADER0_TIMESTAMP		0x71 +#define SPE_HEADER0_EVENTS		0x2 +#define SPE_HEADER0_EVENTS_MASK		0xf +#define SPE_HEADER0_SOURCE		0x3 +#define SPE_HEADER0_SOURCE_MASK		0xf +#define SPE_HEADER0_CONTEXT		0x24 +#define SPE_HEADER0_CONTEXT_MASK	0x3c +#define SPE_HEADER0_OP_TYPE		0x8 +#define SPE_HEADER0_OP_TYPE_MASK	0x3c +#define SPE_HEADER1_ALIGNMENT		0x0 +#define SPE_HEADER1_ADDRESS		0xb0 /* address packet (extended) */ +#define SPE_HEADER1_ADDRESS_MASK	0xf8 +#define SPE_HEADER1_COUNTER		0x98 /* counter packet (extended) */ +#define SPE_HEADER1_COUNTER_MASK	0xf8 + +#if __BYTE_ORDER == __BIG_ENDIAN +#define le16_to_cpu bswap_16 +#define le32_to_cpu bswap_32 +#define le64_to_cpu bswap_64 +#define memcpy_le64(d, s, n) do { \ +	memcpy((d), (s), (n));    \ +	*(d) = le64_to_cpu(*(d)); \ +} while (0) +#else +#define le16_to_cpu +#define le32_to_cpu +#define le64_to_cpu +#define memcpy_le64 memcpy +#endif + +static const char * const arm_spe_packet_name[] = { +	[ARM_SPE_PAD]		= "PAD", +	[ARM_SPE_END]		= "END", +	[ARM_SPE_TIMESTAMP]	= "TS", +	[ARM_SPE_ADDRESS]	= "ADDR", +	[ARM_SPE_COUNTER]	= "LAT", +	[ARM_SPE_CONTEXT]	= "CONTEXT", +	[ARM_SPE_OP_TYPE]	= "OP-TYPE", +	[ARM_SPE_EVENTS]	= "EVENTS", +	[ARM_SPE_DATA_SOURCE]	= "DATA-SOURCE", +}; + +const char *arm_spe_pkt_name(enum arm_spe_pkt_type type) +{ +	return arm_spe_packet_name[type]; +} + +/* return ARM SPE payload size from its encoding, + * which is in bits 5:4 of the byte. + * 00 : byte + * 01 : halfword (2) + * 10 : word (4) + * 11 : doubleword (8) + */ +static int payloadlen(unsigned char byte) +{ +	return 1 << ((byte & 0x30) >> 4); +} + +static int arm_spe_get_payload(const unsigned char *buf, size_t len, +			       struct arm_spe_pkt *packet) +{ +	size_t payload_len = payloadlen(buf[0]); + +	if (len < 1 + payload_len) +		return ARM_SPE_NEED_MORE_BYTES; + +	buf++; + +	switch (payload_len) { +	case 1: packet->payload = *(uint8_t *)buf; break; +	case 2: packet->payload = le16_to_cpu(*(uint16_t *)buf); break; +	case 4: packet->payload = le32_to_cpu(*(uint32_t *)buf); break; +	case 8: packet->payload = le64_to_cpu(*(uint64_t *)buf); break; +	default: return ARM_SPE_BAD_PACKET; +	} + +	return 1 + payload_len; +} + +static int arm_spe_get_pad(struct arm_spe_pkt *packet) +{ +	packet->type = ARM_SPE_PAD; +	return 1; +} + +static int arm_spe_get_alignment(const unsigned char *buf, size_t len, +				 struct arm_spe_pkt *packet) +{ +	unsigned int alignment = 1 << ((buf[0] & 0xf) + 1); + +	if (len < alignment) +		return ARM_SPE_NEED_MORE_BYTES; + +	packet->type = ARM_SPE_PAD; +	return alignment - (((uintptr_t)buf) & (alignment - 1)); +} + +static int arm_spe_get_end(struct arm_spe_pkt *packet) +{ +	packet->type = ARM_SPE_END; +	return 1; +} + +static int arm_spe_get_timestamp(const unsigned char *buf, size_t len, +				 struct arm_spe_pkt *packet) +{ +	packet->type = ARM_SPE_TIMESTAMP; +	return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_events(const unsigned char *buf, size_t len, +			      struct arm_spe_pkt *packet) +{ +	int ret = arm_spe_get_payload(buf, len, packet); + +	packet->type = ARM_SPE_EVENTS; + +	/* we use index to identify Events with a less number of +	 * comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS, +	 * LLC-REFILL, and REMOTE-ACCESS events are identified iff +	 * index > 1. +	 */ +	packet->index = ret - 1; + +	return ret; +} + +static int arm_spe_get_data_source(const unsigned char *buf, size_t len, +				   struct arm_spe_pkt *packet) +{ +	packet->type = ARM_SPE_DATA_SOURCE; +	return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_context(const unsigned char *buf, size_t len, +			       struct arm_spe_pkt *packet) +{ +	packet->type = ARM_SPE_CONTEXT; +	packet->index = buf[0] & 0x3; + +	return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_op_type(const unsigned char *buf, size_t len, +			       struct arm_spe_pkt *packet) +{ +	packet->type = ARM_SPE_OP_TYPE; +	packet->index = buf[0] & 0x3; +	return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_counter(const unsigned char *buf, size_t len, +			       const unsigned char ext_hdr, struct arm_spe_pkt *packet) +{ +	if (len < 2) +		return ARM_SPE_NEED_MORE_BYTES; + +	packet->type = ARM_SPE_COUNTER; +	if (ext_hdr) +		packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); +	else +		packet->index = buf[0] & 0x7; + +	packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); + +	return 1 + ext_hdr + 2; +} + +static int arm_spe_get_addr(const unsigned char *buf, size_t len, +			    const unsigned char ext_hdr, struct arm_spe_pkt *packet) +{ +	if (len < 8) +		return ARM_SPE_NEED_MORE_BYTES; + +	packet->type = ARM_SPE_ADDRESS; +	if (ext_hdr) +		packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); +	else +		packet->index = buf[0] & 0x7; + +	memcpy_le64(&packet->payload, buf + 1, 8); + +	return 1 + ext_hdr + 8; +} + +static int arm_spe_do_get_packet(const unsigned char *buf, size_t len, +				 struct arm_spe_pkt *packet) +{ +	unsigned int byte; + +	memset(packet, 0, sizeof(struct arm_spe_pkt)); + +	if (!len) +		return ARM_SPE_NEED_MORE_BYTES; + +	byte = buf[0]; +	if (byte == SPE_HEADER0_PAD) +		return arm_spe_get_pad(packet); +	else if (byte == SPE_HEADER0_END) /* no timestamp at end of record */ +		return arm_spe_get_end(packet); +	else if (byte & 0xc0 /* 0y11xxxxxx */) { +		if (byte & 0x80) { +			if ((byte & SPE_HEADER0_ADDRESS_MASK) == SPE_HEADER0_ADDRESS) +				return arm_spe_get_addr(buf, len, 0, packet); +			if ((byte & SPE_HEADER0_COUNTER_MASK) == SPE_HEADER0_COUNTER) +				return arm_spe_get_counter(buf, len, 0, packet); +		} else +			if (byte == SPE_HEADER0_TIMESTAMP) +				return arm_spe_get_timestamp(buf, len, packet); +			else if ((byte & SPE_HEADER0_EVENTS_MASK) == SPE_HEADER0_EVENTS) +				return arm_spe_get_events(buf, len, packet); +			else if ((byte & SPE_HEADER0_SOURCE_MASK) == SPE_HEADER0_SOURCE) +				return arm_spe_get_data_source(buf, len, packet); +			else if ((byte & SPE_HEADER0_CONTEXT_MASK) == SPE_HEADER0_CONTEXT) +				return arm_spe_get_context(buf, len, packet); +			else if ((byte & SPE_HEADER0_OP_TYPE_MASK) == SPE_HEADER0_OP_TYPE) +				return arm_spe_get_op_type(buf, len, packet); +	} else if ((byte & 0xe0) == 0x20 /* 0y001xxxxx */) { +		/* 16-bit header */ +		byte = buf[1]; +		if (byte == SPE_HEADER1_ALIGNMENT) +			return arm_spe_get_alignment(buf, len, packet); +		else if ((byte & SPE_HEADER1_ADDRESS_MASK) == SPE_HEADER1_ADDRESS) +			return arm_spe_get_addr(buf, len, 1, packet); +		else if ((byte & SPE_HEADER1_COUNTER_MASK) == SPE_HEADER1_COUNTER) +			return arm_spe_get_counter(buf, len, 1, packet); +	} + +	return ARM_SPE_BAD_PACKET; +} + +int arm_spe_get_packet(const unsigned char *buf, size_t len, +		       struct arm_spe_pkt *packet) +{ +	int ret; + +	ret = arm_spe_do_get_packet(buf, len, packet); +	/* put multiple consecutive PADs on the same line, up to +	 * the fixed-width output format of 16 bytes per line. +	 */ +	if (ret > 0 && packet->type == ARM_SPE_PAD) { +		while (ret < 16 && len > (size_t)ret && !buf[ret]) +			ret += 1; +	} +	return ret; +} + +int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, +		     size_t buf_len) +{ +	int ret, ns, el, idx = packet->index; +	unsigned long long payload = packet->payload; +	const char *name = arm_spe_pkt_name(packet->type); + +	switch (packet->type) { +	case ARM_SPE_BAD: +	case ARM_SPE_PAD: +	case ARM_SPE_END: +		return snprintf(buf, buf_len, "%s", name); +	case ARM_SPE_EVENTS: { +		size_t blen = buf_len; + +		ret = 0; +		ret = snprintf(buf, buf_len, "EV"); +		buf += ret; +		blen -= ret; +		if (payload & 0x1) { +			ret = snprintf(buf, buf_len, " EXCEPTION-GEN"); +			buf += ret; +			blen -= ret; +		} +		if (payload & 0x2) { +			ret = snprintf(buf, buf_len, " RETIRED"); +			buf += ret; +			blen -= ret; +		} +		if (payload & 0x4) { +			ret = snprintf(buf, buf_len, " L1D-ACCESS"); +			buf += ret; +			blen -= ret; +		} +		if (payload & 0x8) { +			ret = snprintf(buf, buf_len, " L1D-REFILL"); +			buf += ret; +			blen -= ret; +		} +		if (payload & 0x10) { +			ret = snprintf(buf, buf_len, " TLB-ACCESS"); +			buf += ret; +			blen -= ret; +		} +		if (payload & 0x20) { +			ret = snprintf(buf, buf_len, " TLB-REFILL"); +			buf += ret; +			blen -= ret; +		} +		if (payload & 0x40) { +			ret = snprintf(buf, buf_len, " NOT-TAKEN"); +			buf += ret; +			blen -= ret; +		} +		if (payload & 0x80) { +			ret = snprintf(buf, buf_len, " MISPRED"); +			buf += ret; +			blen -= ret; +		} +		if (idx > 1) { +			if (payload & 0x100) { +				ret = snprintf(buf, buf_len, " LLC-ACCESS"); +				buf += ret; +				blen -= ret; +			} +			if (payload & 0x200) { +				ret = snprintf(buf, buf_len, " LLC-REFILL"); +				buf += ret; +				blen -= ret; +			} +			if (payload & 0x400) { +				ret = snprintf(buf, buf_len, " REMOTE-ACCESS"); +				buf += ret; +				blen -= ret; +			} +		} +		if (ret < 0) +			return ret; +		blen -= ret; +		return buf_len - blen; +	} +	case ARM_SPE_OP_TYPE: +		switch (idx) { +		case 0:	return snprintf(buf, buf_len, "%s", payload & 0x1 ? +					"COND-SELECT" : "INSN-OTHER"); +		case 1:	{ +			size_t blen = buf_len; + +			if (payload & 0x1) +				ret = snprintf(buf, buf_len, "ST"); +			else +				ret = snprintf(buf, buf_len, "LD"); +			buf += ret; +			blen -= ret; +			if (payload & 0x2) { +				if (payload & 0x4) { +					ret = snprintf(buf, buf_len, " AT"); +					buf += ret; +					blen -= ret; +				} +				if (payload & 0x8) { +					ret = snprintf(buf, buf_len, " EXCL"); +					buf += ret; +					blen -= ret; +				} +				if (payload & 0x10) { +					ret = snprintf(buf, buf_len, " AR"); +					buf += ret; +					blen -= ret; +				} +			} else if (payload & 0x4) { +				ret = snprintf(buf, buf_len, " SIMD-FP"); +				buf += ret; +				blen -= ret; +			} +			if (ret < 0) +				return ret; +			blen -= ret; +			return buf_len - blen; +		} +		case 2:	{ +			size_t blen = buf_len; + +			ret = snprintf(buf, buf_len, "B"); +			buf += ret; +			blen -= ret; +			if (payload & 0x1) { +				ret = snprintf(buf, buf_len, " COND"); +				buf += ret; +				blen -= ret; +			} +			if (payload & 0x2) { +				ret = snprintf(buf, buf_len, " IND"); +				buf += ret; +				blen -= ret; +			} +			if (ret < 0) +				return ret; +			blen -= ret; +			return buf_len - blen; +			} +		default: return 0; +		} +	case ARM_SPE_DATA_SOURCE: +	case ARM_SPE_TIMESTAMP: +		return snprintf(buf, buf_len, "%s %lld", name, payload); +	case ARM_SPE_ADDRESS: +		switch (idx) { +		case 0: +		case 1: ns = !!(packet->payload & NS_FLAG); +			el = (packet->payload & EL_FLAG) >> 61; +			payload &= ~(0xffULL << 56); +			return snprintf(buf, buf_len, "%s 0x%llx el%d ns=%d", +				        (idx == 1) ? "TGT" : "PC", payload, el, ns); +		case 2:	return snprintf(buf, buf_len, "VA 0x%llx", payload); +		case 3:	ns = !!(packet->payload & NS_FLAG); +			payload &= ~(0xffULL << 56); +			return snprintf(buf, buf_len, "PA 0x%llx ns=%d", +					payload, ns); +		default: return 0; +		} +	case ARM_SPE_CONTEXT: +		return snprintf(buf, buf_len, "%s 0x%lx el%d", name, +				(unsigned long)payload, idx + 1); +	case ARM_SPE_COUNTER: { +		size_t blen = buf_len; + +		ret = snprintf(buf, buf_len, "%s %d ", name, +			       (unsigned short)payload); +		buf += ret; +		blen -= ret; +		switch (idx) { +		case 0:	ret = snprintf(buf, buf_len, "TOT"); break; +		case 1:	ret = snprintf(buf, buf_len, "ISSUE"); break; +		case 2:	ret = snprintf(buf, buf_len, "XLAT"); break; +		default: ret = 0; +		} +		if (ret < 0) +			return ret; +		blen -= ret; +		return buf_len - blen; +	} +	default: +		break; +	} + +	return snprintf(buf, buf_len, "%s 0x%llx (%d)", +			name, payload, packet->index); +} diff --git a/tools/perf/util/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-pkt-decoder.h new file mode 100644 index 000000000000..d786ef65113f --- /dev/null +++ b/tools/perf/util/arm-spe-pkt-decoder.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__ +#define INCLUDE__ARM_SPE_PKT_DECODER_H__ + +#include <stddef.h> +#include <stdint.h> + +#define ARM_SPE_PKT_DESC_MAX		256 + +#define ARM_SPE_NEED_MORE_BYTES		-1 +#define ARM_SPE_BAD_PACKET		-2 + +enum arm_spe_pkt_type { +	ARM_SPE_BAD, +	ARM_SPE_PAD, +	ARM_SPE_END, +	ARM_SPE_TIMESTAMP, +	ARM_SPE_ADDRESS, +	ARM_SPE_COUNTER, +	ARM_SPE_CONTEXT, +	ARM_SPE_OP_TYPE, +	ARM_SPE_EVENTS, +	ARM_SPE_DATA_SOURCE, +}; + +struct arm_spe_pkt { +	enum arm_spe_pkt_type	type; +	unsigned char		index; +	uint64_t		payload; +}; + +const char *arm_spe_pkt_name(enum arm_spe_pkt_type); + +int arm_spe_get_packet(const unsigned char *buf, size_t len, +		       struct arm_spe_pkt *packet); + +int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len); +#endif diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c new file mode 100644 index 000000000000..6067267cc76c --- /dev/null +++ b/tools/perf/util/arm-spe.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#include <endian.h> +#include <errno.h> +#include <byteswap.h> +#include <inttypes.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> + +#include "cpumap.h" +#include "color.h" +#include "evsel.h" +#include "evlist.h" +#include "machine.h" +#include "session.h" +#include "util.h" +#include "thread.h" +#include "debug.h" +#include "auxtrace.h" +#include "arm-spe.h" +#include "arm-spe-pkt-decoder.h" + +struct arm_spe { +	struct auxtrace			auxtrace; +	struct auxtrace_queues		queues; +	struct auxtrace_heap		heap; +	u32				auxtrace_type; +	struct perf_session		*session; +	struct machine			*machine; +	u32				pmu_type; +}; + +struct arm_spe_queue { +	struct arm_spe		*spe; +	unsigned int		queue_nr; +	struct auxtrace_buffer	*buffer; +	bool			on_heap; +	bool			done; +	pid_t			pid; +	pid_t			tid; +	int			cpu; +}; + +static void arm_spe_dump(struct arm_spe *spe __maybe_unused, +			 unsigned char *buf, size_t len) +{ +	struct arm_spe_pkt packet; +	size_t pos = 0; +	int ret, pkt_len, i; +	char desc[ARM_SPE_PKT_DESC_MAX]; +	const char *color = PERF_COLOR_BLUE; + +	color_fprintf(stdout, color, +		      ". ... ARM SPE data: size %zu bytes\n", +		      len); + +	while (len) { +		ret = arm_spe_get_packet(buf, len, &packet); +		if (ret > 0) +			pkt_len = ret; +		else +			pkt_len = 1; +		printf("."); +		color_fprintf(stdout, color, "  %08x: ", pos); +		for (i = 0; i < pkt_len; i++) +			color_fprintf(stdout, color, " %02x", buf[i]); +		for (; i < 16; i++) +			color_fprintf(stdout, color, "   "); +		if (ret > 0) { +			ret = arm_spe_pkt_desc(&packet, desc, +					       ARM_SPE_PKT_DESC_MAX); +			if (ret > 0) +				color_fprintf(stdout, color, " %s\n", desc); +		} else { +			color_fprintf(stdout, color, " Bad packet!\n"); +		} +		pos += pkt_len; +		buf += pkt_len; +		len -= pkt_len; +	} +} + +static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, +			       size_t len) +{ +	printf(".\n"); +	arm_spe_dump(spe, buf, len); +} + +static int arm_spe_process_event(struct perf_session *session __maybe_unused, +				 union perf_event *event __maybe_unused, +				 struct perf_sample *sample __maybe_unused, +				 struct perf_tool *tool __maybe_unused) +{ +	return 0; +} + +static int arm_spe_process_auxtrace_event(struct perf_session *session, +					  union perf_event *event, +					  struct perf_tool *tool __maybe_unused) +{ +	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, +					     auxtrace); +	struct auxtrace_buffer *buffer; +	off_t data_offset; +	int fd = perf_data__fd(session->data); +	int err; + +	if (perf_data__is_pipe(session->data)) { +		data_offset = 0; +	} else { +		data_offset = lseek(fd, 0, SEEK_CUR); +		if (data_offset == -1) +			return -errno; +	} + +	err = auxtrace_queues__add_event(&spe->queues, session, event, +					 data_offset, &buffer); +	if (err) +		return err; + +	/* Dump here now we have copied a piped trace out of the pipe */ +	if (dump_trace) { +		if (auxtrace_buffer__get_data(buffer, fd)) { +			arm_spe_dump_event(spe, buffer->data, +					     buffer->size); +			auxtrace_buffer__put_data(buffer); +		} +	} + +	return 0; +} + +static int arm_spe_flush(struct perf_session *session __maybe_unused, +			 struct perf_tool *tool __maybe_unused) +{ +	return 0; +} + +static void arm_spe_free_queue(void *priv) +{ +	struct arm_spe_queue *speq = priv; + +	if (!speq) +		return; +	free(speq); +} + +static void arm_spe_free_events(struct perf_session *session) +{ +	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, +					     auxtrace); +	struct auxtrace_queues *queues = &spe->queues; +	unsigned int i; + +	for (i = 0; i < queues->nr_queues; i++) { +		arm_spe_free_queue(queues->queue_array[i].priv); +		queues->queue_array[i].priv = NULL; +	} +	auxtrace_queues__free(queues); +} + +static void arm_spe_free(struct perf_session *session) +{ +	struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, +					     auxtrace); + +	auxtrace_heap__free(&spe->heap); +	arm_spe_free_events(session); +	session->auxtrace = NULL; +	free(spe); +} + +static const char * const arm_spe_info_fmts[] = { +	[ARM_SPE_PMU_TYPE]		= "  PMU Type           %"PRId64"\n", +}; + +static void arm_spe_print_info(u64 *arr) +{ +	if (!dump_trace) +		return; + +	fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); +} + +int arm_spe_process_auxtrace_info(union perf_event *event, +				  struct perf_session *session) +{ +	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; +	size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE; +	struct arm_spe *spe; +	int err; + +	if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + +					min_sz) +		return -EINVAL; + +	spe = zalloc(sizeof(struct arm_spe)); +	if (!spe) +		return -ENOMEM; + +	err = auxtrace_queues__init(&spe->queues); +	if (err) +		goto err_free; + +	spe->session = session; +	spe->machine = &session->machines.host; /* No kvm support */ +	spe->auxtrace_type = auxtrace_info->type; +	spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + +	spe->auxtrace.process_event = arm_spe_process_event; +	spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; +	spe->auxtrace.flush_events = arm_spe_flush; +	spe->auxtrace.free_events = arm_spe_free_events; +	spe->auxtrace.free = arm_spe_free; +	session->auxtrace = &spe->auxtrace; + +	arm_spe_print_info(&auxtrace_info->priv[0]); + +	return 0; + +err_free: +	free(spe); +	return err; +} diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h new file mode 100644 index 000000000000..98d3235781c3 --- /dev/null +++ b/tools/perf/util/arm-spe.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#ifndef INCLUDE__PERF_ARM_SPE_H__ +#define INCLUDE__PERF_ARM_SPE_H__ + +#define ARM_SPE_PMU_NAME "arm_spe_" + +enum { +	ARM_SPE_PMU_TYPE, +	ARM_SPE_PER_CPU_MMAPS, +	ARM_SPE_AUXTRACE_PRIV_MAX, +}; + +#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64)) + +union perf_event; +struct perf_session; +struct perf_pmu; + +struct auxtrace_record *arm_spe_recording_init(int *err, +					       struct perf_pmu *arm_spe_pmu); + +int arm_spe_process_auxtrace_info(union perf_event *event, +				  struct perf_session *session); + +struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu); +#endif diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a33491416400..9faf3b5367db 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -31,9 +31,6 @@  #include <sys/param.h>  #include <stdlib.h>  #include <stdio.h> -#include <string.h> -#include <limits.h> -#include <errno.h>  #include <linux/list.h>  #include "../perf.h" @@ -55,8 +52,10 @@  #include "debug.h"  #include <subcmd/parse-options.h> +#include "cs-etm.h"  #include "intel-pt.h"  #include "intel-bts.h" +#include "arm-spe.h"  #include "sane_ctype.h"  #include "symbol/kallsyms.h" @@ -913,7 +912,10 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,  		return intel_pt_process_auxtrace_info(event, session);  	case PERF_AUXTRACE_INTEL_BTS:  		return intel_bts_process_auxtrace_info(event, session); +	case PERF_AUXTRACE_ARM_SPE: +		return arm_spe_process_auxtrace_info(event, session);  	case PERF_AUXTRACE_CS_ETM: +		return cs_etm__process_auxtrace_info(event, session);  	case PERF_AUXTRACE_UNKNOWN:  	default:  		return -EINVAL; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index d19e11b68de7..453c148d2158 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -43,6 +43,7 @@ enum auxtrace_type {  	PERF_AUXTRACE_INTEL_PT,  	PERF_AUXTRACE_INTEL_BTS,  	PERF_AUXTRACE_CS_ETM, +	PERF_AUXTRACE_ARM_SPE,  };  enum itrace_period_type { diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 72c107fcbc5a..af7ad814b2c3 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -94,7 +94,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)  		err = perf_clang__compile_bpf(filename, &obj_buf, &obj_buf_sz);  		perf_clang__cleanup();  		if (err) { -			pr_warning("bpf: builtin compilation failed: %d, try external compiler\n", err); +			pr_debug("bpf: builtin compilation failed: %d, try external compiler\n", err);  			err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz);  			if (err)  				return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); @@ -1533,7 +1533,7 @@ int bpf__apply_obj_config(void)  			(strcmp("__bpf_stdout__", 	\  				bpf_map__name(pos)) == 0)) -int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused) +int bpf__setup_stdout(struct perf_evlist *evlist)  {  	struct bpf_map_priv *tmpl_priv = NULL;  	struct bpf_object *obj, *tmp; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 082505d08d72..32ef7bdca1cf 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -37,6 +37,15 @@ struct callchain_param callchain_param = {  	CALLCHAIN_PARAM_DEFAULT  }; +/* + * Are there any events usind DWARF callchains? + * + * I.e. + * + * -e cycles/call-graph=dwarf/ + */ +bool dwarf_callchain_users; +  struct callchain_param callchain_param_default = {  	CALLCHAIN_PARAM_DEFAULT  }; @@ -265,6 +274,7 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)  			ret = 0;  			param->record_mode = CALLCHAIN_DWARF;  			param->dump_size = default_stack_dump_size; +			dwarf_callchain_users = true;  			tok = strtok_r(NULL, ",", &saveptr);  			if (tok) { diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index b79ef2478a57..154560b1eb65 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -89,6 +89,8 @@ enum chain_value {  	CCVAL_COUNT,  }; +extern bool dwarf_callchain_users; +  struct callchain_param {  	bool			enabled;  	enum perf_call_graph_mode record_mode; diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index d9ffc1e6eb39..984f69144f87 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -6,6 +6,9 @@  #include "cgroup.h"  #include "evlist.h"  #include <linux/stringify.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h>  int nr_cgroups; diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build new file mode 100644 index 000000000000..bc22c39c727f --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c new file mode 100644 index 000000000000..1fb01849f1c7 --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -0,0 +1,513 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright(C) 2015-2018 Linaro Limited. + * + * Author: Tor Jeremiassen <tor@ti.com> + * Author: Mathieu Poirier <mathieu.poirier@linaro.org> + */ + +#include <linux/err.h> +#include <linux/list.h> +#include <stdlib.h> +#include <opencsd/c_api/opencsd_c_api.h> +#include <opencsd/etmv4/trc_pkt_types_etmv4.h> +#include <opencsd/ocsd_if_types.h> + +#include "cs-etm.h" +#include "cs-etm-decoder.h" +#include "intlist.h" +#include "util.h" + +#define MAX_BUFFER 1024 + +/* use raw logging */ +#ifdef CS_DEBUG_RAW +#define CS_LOG_RAW_FRAMES +#ifdef CS_RAW_PACKED +#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT | \ +			    OCSD_DFRMTR_PACKED_RAW_OUT) +#else +#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT) +#endif +#endif + +struct cs_etm_decoder { +	void *data; +	void (*packet_printer)(const char *msg); +	bool trace_on; +	dcd_tree_handle_t dcd_tree; +	cs_etm_mem_cb_type mem_access; +	ocsd_datapath_resp_t prev_return; +	u32 packet_count; +	u32 head; +	u32 tail; +	struct cs_etm_packet packet_buffer[MAX_BUFFER]; +}; + +static u32 +cs_etm_decoder__mem_access(const void *context, +			   const ocsd_vaddr_t address, +			   const ocsd_mem_space_acc_t mem_space __maybe_unused, +			   const u32 req_size, +			   u8 *buffer) +{ +	struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; + +	return decoder->mem_access(decoder->data, +				   address, +				   req_size, +				   buffer); +} + +int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, +				      u64 start, u64 end, +				      cs_etm_mem_cb_type cb_func) +{ +	decoder->mem_access = cb_func; + +	if (ocsd_dt_add_callback_mem_acc(decoder->dcd_tree, start, end, +					 OCSD_MEM_SPACE_ANY, +					 cs_etm_decoder__mem_access, decoder)) +		return -1; + +	return 0; +} + +int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) +{ +	ocsd_datapath_resp_t dp_ret; + +	dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, +				      0, 0, NULL, NULL); +	if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) +		return -1; + +	return 0; +} + +int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, +			       struct cs_etm_packet *packet) +{ +	if (!decoder || !packet) +		return -EINVAL; + +	/* Nothing to do, might as well just return */ +	if (decoder->packet_count == 0) +		return 0; + +	*packet = decoder->packet_buffer[decoder->head]; + +	decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); + +	decoder->packet_count--; + +	return 1; +} + +static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, +					     ocsd_etmv4_cfg *config) +{ +	config->reg_configr = params->etmv4.reg_configr; +	config->reg_traceidr = params->etmv4.reg_traceidr; +	config->reg_idr0 = params->etmv4.reg_idr0; +	config->reg_idr1 = params->etmv4.reg_idr1; +	config->reg_idr2 = params->etmv4.reg_idr2; +	config->reg_idr8 = params->etmv4.reg_idr8; +	config->reg_idr9 = 0; +	config->reg_idr10 = 0; +	config->reg_idr11 = 0; +	config->reg_idr12 = 0; +	config->reg_idr13 = 0; +	config->arch_ver = ARCH_V8; +	config->core_prof = profile_CortexA; +} + +static void cs_etm_decoder__print_str_cb(const void *p_context, +					 const char *msg, +					 const int str_len) +{ +	if (p_context && str_len) +		((struct cs_etm_decoder *)p_context)->packet_printer(msg); +} + +static int +cs_etm_decoder__init_def_logger_printing(struct cs_etm_decoder_params *d_params, +					 struct cs_etm_decoder *decoder) +{ +	int ret = 0; + +	if (d_params->packet_printer == NULL) +		return -1; + +	decoder->packet_printer = d_params->packet_printer; + +	/* +	 * Set up a library default logger to process any printers +	 * (packet/raw frame) we add later. +	 */ +	ret = ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1); +	if (ret != 0) +		return -1; + +	/* no stdout / err / file output */ +	ret = ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL); +	if (ret != 0) +		return -1; + +	/* +	 * Set the string CB for the default logger, passes strings to +	 * perf print logger. +	 */ +	ret = ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree, +					      (void *)decoder, +					      cs_etm_decoder__print_str_cb); +	if (ret != 0) +		ret = -1; + +	return 0; +} + +#ifdef CS_LOG_RAW_FRAMES +static void +cs_etm_decoder__init_raw_frame_logging(struct cs_etm_decoder_params *d_params, +				       struct cs_etm_decoder *decoder) +{ +	/* Only log these during a --dump operation */ +	if (d_params->operation == CS_ETM_OPERATION_PRINT) { +		/* set up a library default logger to process the +		 *  raw frame printer we add later +		 */ +		ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1); + +		/* no stdout / err / file output */ +		ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL); + +		/* set the string CB for the default logger, +		 * passes strings to perf print logger. +		 */ +		ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree, +						(void *)decoder, +						cs_etm_decoder__print_str_cb); + +		/* use the built in library printer for the raw frames */ +		ocsd_dt_set_raw_frame_printer(decoder->dcd_tree, +					      CS_RAW_DEBUG_FLAGS); +	} +} +#else +static void +cs_etm_decoder__init_raw_frame_logging( +		struct cs_etm_decoder_params *d_params __maybe_unused, +		struct cs_etm_decoder *decoder __maybe_unused) +{ +} +#endif + +static int cs_etm_decoder__create_packet_printer(struct cs_etm_decoder *decoder, +						 const char *decoder_name, +						 void *trace_config) +{ +	u8 csid; + +	if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name, +				   OCSD_CREATE_FLG_PACKET_PROC, +				   trace_config, &csid)) +		return -1; + +	if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0)) +		return -1; + +	return 0; +} + +static int +cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params, +					  struct cs_etm_decoder *decoder) +{ +	const char *decoder_name; +	ocsd_etmv4_cfg trace_config_etmv4; +	void *trace_config; + +	switch (t_params->protocol) { +	case CS_ETM_PROTO_ETMV4i: +		cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); +		decoder_name = OCSD_BUILTIN_DCD_ETMV4I; +		trace_config = &trace_config_etmv4; +		break; +	default: +		return -1; +	} + +	return cs_etm_decoder__create_packet_printer(decoder, +						     decoder_name, +						     trace_config); +} + +static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) +{ +	int i; + +	decoder->head = 0; +	decoder->tail = 0; +	decoder->packet_count = 0; +	for (i = 0; i < MAX_BUFFER; i++) { +		decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL; +		decoder->packet_buffer[i].end_addr   = 0xdeadbeefdeadbeefUL; +		decoder->packet_buffer[i].exc	     = false; +		decoder->packet_buffer[i].exc_ret    = false; +		decoder->packet_buffer[i].cpu	     = INT_MIN; +	} +} + +static ocsd_datapath_resp_t +cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, +			      const ocsd_generic_trace_elem *elem, +			      const u8 trace_chan_id, +			      enum cs_etm_sample_type sample_type) +{ +	u32 et = 0; +	struct int_node *inode = NULL; + +	if (decoder->packet_count >= MAX_BUFFER - 1) +		return OCSD_RESP_FATAL_SYS_ERR; + +	/* Search the RB tree for the cpu associated with this traceID */ +	inode = intlist__find(traceid_list, trace_chan_id); +	if (!inode) +		return OCSD_RESP_FATAL_SYS_ERR; + +	et = decoder->tail; +	decoder->packet_buffer[et].sample_type = sample_type; +	decoder->packet_buffer[et].start_addr = elem->st_addr; +	decoder->packet_buffer[et].end_addr = elem->en_addr; +	decoder->packet_buffer[et].exc = false; +	decoder->packet_buffer[et].exc_ret = false; +	decoder->packet_buffer[et].cpu = *((int *)inode->priv); + +	/* Wrap around if need be */ +	et = (et + 1) & (MAX_BUFFER - 1); + +	decoder->tail = et; +	decoder->packet_count++; + +	if (decoder->packet_count == MAX_BUFFER - 1) +		return OCSD_RESP_WAIT; + +	return OCSD_RESP_CONT; +} + +static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( +				const void *context, +				const ocsd_trc_index_t indx __maybe_unused, +				const u8 trace_chan_id __maybe_unused, +				const ocsd_generic_trace_elem *elem) +{ +	ocsd_datapath_resp_t resp = OCSD_RESP_CONT; +	struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; + +	switch (elem->elem_type) { +	case OCSD_GEN_TRC_ELEM_UNKNOWN: +		break; +	case OCSD_GEN_TRC_ELEM_NO_SYNC: +		decoder->trace_on = false; +		break; +	case OCSD_GEN_TRC_ELEM_TRACE_ON: +		decoder->trace_on = true; +		break; +	case OCSD_GEN_TRC_ELEM_INSTR_RANGE: +		resp = cs_etm_decoder__buffer_packet(decoder, elem, +						     trace_chan_id, +						     CS_ETM_RANGE); +		break; +	case OCSD_GEN_TRC_ELEM_EXCEPTION: +		decoder->packet_buffer[decoder->tail].exc = true; +		break; +	case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: +		decoder->packet_buffer[decoder->tail].exc_ret = true; +		break; +	case OCSD_GEN_TRC_ELEM_PE_CONTEXT: +	case OCSD_GEN_TRC_ELEM_EO_TRACE: +	case OCSD_GEN_TRC_ELEM_ADDR_NACC: +	case OCSD_GEN_TRC_ELEM_TIMESTAMP: +	case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: +	case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: +	case OCSD_GEN_TRC_ELEM_EVENT: +	case OCSD_GEN_TRC_ELEM_SWTRACE: +	case OCSD_GEN_TRC_ELEM_CUSTOM: +	default: +		break; +	} + +	return resp; +} + +static int cs_etm_decoder__create_etm_packet_decoder( +					struct cs_etm_trace_params *t_params, +					struct cs_etm_decoder *decoder) +{ +	const char *decoder_name; +	ocsd_etmv4_cfg trace_config_etmv4; +	void *trace_config; +	u8 csid; + +	switch (t_params->protocol) { +	case CS_ETM_PROTO_ETMV4i: +		cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); +		decoder_name = OCSD_BUILTIN_DCD_ETMV4I; +		trace_config = &trace_config_etmv4; +		break; +	default: +		return -1; +	} + +	if (ocsd_dt_create_decoder(decoder->dcd_tree, +				     decoder_name, +				     OCSD_CREATE_FLG_FULL_DECODER, +				     trace_config, &csid)) +		return -1; + +	if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree, +				       cs_etm_decoder__gen_trace_elem_printer, +				       decoder)) +		return -1; + +	return 0; +} + +static int +cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, +				   struct cs_etm_trace_params *t_params, +				   struct cs_etm_decoder *decoder) +{ +	if (d_params->operation == CS_ETM_OPERATION_PRINT) +		return cs_etm_decoder__create_etm_packet_printer(t_params, +								 decoder); +	else if (d_params->operation == CS_ETM_OPERATION_DECODE) +		return cs_etm_decoder__create_etm_packet_decoder(t_params, +								 decoder); + +	return -1; +} + +struct cs_etm_decoder * +cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, +		    struct cs_etm_trace_params t_params[]) +{ +	struct cs_etm_decoder *decoder; +	ocsd_dcd_tree_src_t format; +	u32 flags; +	int i, ret; + +	if ((!t_params) || (!d_params)) +		return NULL; + +	decoder = zalloc(sizeof(*decoder)); + +	if (!decoder) +		return NULL; + +	decoder->data = d_params->data; +	decoder->prev_return = OCSD_RESP_CONT; +	cs_etm_decoder__clear_buffer(decoder); +	format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED : +					 OCSD_TRC_SRC_SINGLE); +	flags = 0; +	flags |= (d_params->fsyncs ? OCSD_DFRMTR_HAS_FSYNCS : 0); +	flags |= (d_params->hsyncs ? OCSD_DFRMTR_HAS_HSYNCS : 0); +	flags |= (d_params->frame_aligned ? OCSD_DFRMTR_FRAME_MEM_ALIGN : 0); + +	/* +	 * Drivers may add barrier frames when used with perf, set up to +	 * handle this. Barriers const of FSYNC packet repeated 4 times. +	 */ +	flags |= OCSD_DFRMTR_RESET_ON_4X_FSYNC; + +	/* Create decode tree for the data source */ +	decoder->dcd_tree = ocsd_create_dcd_tree(format, flags); + +	if (decoder->dcd_tree == 0) +		goto err_free_decoder; + +	/* init library print logging support */ +	ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder); +	if (ret != 0) +		goto err_free_decoder_tree; + +	/* init raw frame logging if required */ +	cs_etm_decoder__init_raw_frame_logging(d_params, decoder); + +	for (i = 0; i < num_cpu; i++) { +		ret = cs_etm_decoder__create_etm_decoder(d_params, +							 &t_params[i], +							 decoder); +		if (ret != 0) +			goto err_free_decoder_tree; +	} + +	return decoder; + +err_free_decoder_tree: +	ocsd_destroy_dcd_tree(decoder->dcd_tree); +err_free_decoder: +	free(decoder); +	return NULL; +} + +int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, +				       u64 indx, const u8 *buf, +				       size_t len, size_t *consumed) +{ +	int ret = 0; +	ocsd_datapath_resp_t cur = OCSD_RESP_CONT; +	ocsd_datapath_resp_t prev_return = decoder->prev_return; +	size_t processed = 0; +	u32 count; + +	while (processed < len) { +		if (OCSD_DATA_RESP_IS_WAIT(prev_return)) { +			cur = ocsd_dt_process_data(decoder->dcd_tree, +						   OCSD_OP_FLUSH, +						   0, +						   0, +						   NULL, +						   NULL); +		} else if (OCSD_DATA_RESP_IS_CONT(prev_return)) { +			cur = ocsd_dt_process_data(decoder->dcd_tree, +						   OCSD_OP_DATA, +						   indx + processed, +						   len - processed, +						   &buf[processed], +						   &count); +			processed += count; +		} else { +			ret = -EINVAL; +			break; +		} + +		/* +		 * Return to the input code if the packet buffer is full. +		 * Flushing will get done once the packet buffer has been +		 * processed. +		 */ +		if (OCSD_DATA_RESP_IS_WAIT(cur)) +			break; + +		prev_return = cur; +	} + +	decoder->prev_return = cur; +	*consumed = processed; + +	return ret; +} + +void cs_etm_decoder__free(struct cs_etm_decoder *decoder) +{ +	if (!decoder) +		return; + +	ocsd_destroy_dcd_tree(decoder->dcd_tree); +	decoder->dcd_tree = NULL; +	free(decoder); +} diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h new file mode 100644 index 000000000000..3d2e6205d186 --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright(C) 2015-2018 Linaro Limited. + * + * Author: Tor Jeremiassen <tor@ti.com> + * Author: Mathieu Poirier <mathieu.poirier@linaro.org> + */ + +#ifndef INCLUDE__CS_ETM_DECODER_H__ +#define INCLUDE__CS_ETM_DECODER_H__ + +#include <linux/types.h> +#include <stdio.h> + +struct cs_etm_decoder; + +struct cs_etm_buffer { +	const unsigned char *buf; +	size_t len; +	u64 offset; +	u64 ref_timestamp; +}; + +enum cs_etm_sample_type { +	CS_ETM_RANGE = 1 << 0, +}; + +struct cs_etm_packet { +	enum cs_etm_sample_type sample_type; +	u64 start_addr; +	u64 end_addr; +	u8 exc; +	u8 exc_ret; +	int cpu; +}; + +struct cs_etm_queue; + +typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64, +				  size_t, u8 *); + +struct cs_etmv4_trace_params { +	u32 reg_idr0; +	u32 reg_idr1; +	u32 reg_idr2; +	u32 reg_idr8; +	u32 reg_configr; +	u32 reg_traceidr; +}; + +struct cs_etm_trace_params { +	int protocol; +	union { +		struct cs_etmv4_trace_params etmv4; +	}; +}; + +struct cs_etm_decoder_params { +	int operation; +	void (*packet_printer)(const char *msg); +	cs_etm_mem_cb_type mem_acc_cb; +	u8 formatted; +	u8 fsyncs; +	u8 hsyncs; +	u8 frame_aligned; +	void *data; +}; + +/* + * The following enums are indexed starting with 1 to align with the + * open source coresight trace decoder library. + */ +enum { +	CS_ETM_PROTO_ETMV3 = 1, +	CS_ETM_PROTO_ETMV4i, +	CS_ETM_PROTO_ETMV4d, +}; + +enum { +	CS_ETM_OPERATION_PRINT = 1, +	CS_ETM_OPERATION_DECODE, +}; + +int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, +				       u64 indx, const u8 *buf, +				       size_t len, size_t *consumed); + +struct cs_etm_decoder * +cs_etm_decoder__new(int num_cpu, +		    struct cs_etm_decoder_params *d_params, +		    struct cs_etm_trace_params t_params[]); + +void cs_etm_decoder__free(struct cs_etm_decoder *decoder); + +int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, +				      u64 start, u64 end, +				      cs_etm_mem_cb_type cb_func); + +int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, +			       struct cs_etm_packet *packet); + +int cs_etm_decoder__reset(struct cs_etm_decoder *decoder); + +#endif /* INCLUDE__CS_ETM_DECODER_H__ */ diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c new file mode 100644 index 000000000000..b9f0a53dfa65 --- /dev/null +++ b/tools/perf/util/cs-etm.c @@ -0,0 +1,1023 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright(C) 2015-2018 Linaro Limited. + * + * Author: Tor Jeremiassen <tor@ti.com> + * Author: Mathieu Poirier <mathieu.poirier@linaro.org> + */ + +#include <linux/bitops.h> +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/log2.h> +#include <linux/types.h> + +#include <stdlib.h> + +#include "auxtrace.h" +#include "color.h" +#include "cs-etm.h" +#include "cs-etm-decoder/cs-etm-decoder.h" +#include "debug.h" +#include "evlist.h" +#include "intlist.h" +#include "machine.h" +#include "map.h" +#include "perf.h" +#include "thread.h" +#include "thread_map.h" +#include "thread-stack.h" +#include "util.h" + +#define MAX_TIMESTAMP (~0ULL) + +struct cs_etm_auxtrace { +	struct auxtrace auxtrace; +	struct auxtrace_queues queues; +	struct auxtrace_heap heap; +	struct itrace_synth_opts synth_opts; +	struct perf_session *session; +	struct machine *machine; +	struct thread *unknown_thread; + +	u8 timeless_decoding; +	u8 snapshot_mode; +	u8 data_queued; +	u8 sample_branches; + +	int num_cpu; +	u32 auxtrace_type; +	u64 branches_sample_type; +	u64 branches_id; +	u64 **metadata; +	u64 kernel_start; +	unsigned int pmu_type; +}; + +struct cs_etm_queue { +	struct cs_etm_auxtrace *etm; +	struct thread *thread; +	struct cs_etm_decoder *decoder; +	struct auxtrace_buffer *buffer; +	const struct cs_etm_state *state; +	union perf_event *event_buf; +	unsigned int queue_nr; +	pid_t pid, tid; +	int cpu; +	u64 time; +	u64 timestamp; +	u64 offset; +}; + +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); +static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, +					   pid_t tid, u64 time_); + +static void cs_etm__packet_dump(const char *pkt_string) +{ +	const char *color = PERF_COLOR_BLUE; +	int len = strlen(pkt_string); + +	if (len && (pkt_string[len-1] == '\n')) +		color_fprintf(stdout, color, "	%s", pkt_string); +	else +		color_fprintf(stdout, color, "	%s\n", pkt_string); + +	fflush(stdout); +} + +static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, +			       struct auxtrace_buffer *buffer) +{ +	int i, ret; +	const char *color = PERF_COLOR_BLUE; +	struct cs_etm_decoder_params d_params; +	struct cs_etm_trace_params *t_params; +	struct cs_etm_decoder *decoder; +	size_t buffer_used = 0; + +	fprintf(stdout, "\n"); +	color_fprintf(stdout, color, +		     ". ... CoreSight ETM Trace data: size %zu bytes\n", +		     buffer->size); + +	/* Use metadata to fill in trace parameters for trace decoder */ +	t_params = zalloc(sizeof(*t_params) * etm->num_cpu); +	for (i = 0; i < etm->num_cpu; i++) { +		t_params[i].protocol = CS_ETM_PROTO_ETMV4i; +		t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; +		t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; +		t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; +		t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; +		t_params[i].etmv4.reg_configr = +					etm->metadata[i][CS_ETMV4_TRCCONFIGR]; +		t_params[i].etmv4.reg_traceidr = +					etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; +	} + +	/* Set decoder parameters to simply print the trace packets */ +	d_params.packet_printer = cs_etm__packet_dump; +	d_params.operation = CS_ETM_OPERATION_PRINT; +	d_params.formatted = true; +	d_params.fsyncs = false; +	d_params.hsyncs = false; +	d_params.frame_aligned = true; + +	decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); + +	zfree(&t_params); + +	if (!decoder) +		return; +	do { +		size_t consumed; + +		ret = cs_etm_decoder__process_data_block( +				decoder, buffer->offset, +				&((u8 *)buffer->data)[buffer_used], +				buffer->size - buffer_used, &consumed); +		if (ret) +			break; + +		buffer_used += consumed; +	} while (buffer_used < buffer->size); + +	cs_etm_decoder__free(decoder); +} + +static int cs_etm__flush_events(struct perf_session *session, +				struct perf_tool *tool) +{ +	int ret; +	struct cs_etm_auxtrace *etm = container_of(session->auxtrace, +						   struct cs_etm_auxtrace, +						   auxtrace); +	if (dump_trace) +		return 0; + +	if (!tool->ordered_events) +		return -EINVAL; + +	if (!etm->timeless_decoding) +		return -EINVAL; + +	ret = cs_etm__update_queues(etm); + +	if (ret < 0) +		return ret; + +	return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1); +} + +static void cs_etm__free_queue(void *priv) +{ +	struct cs_etm_queue *etmq = priv; + +	free(etmq); +} + +static void cs_etm__free_events(struct perf_session *session) +{ +	unsigned int i; +	struct cs_etm_auxtrace *aux = container_of(session->auxtrace, +						   struct cs_etm_auxtrace, +						   auxtrace); +	struct auxtrace_queues *queues = &aux->queues; + +	for (i = 0; i < queues->nr_queues; i++) { +		cs_etm__free_queue(queues->queue_array[i].priv); +		queues->queue_array[i].priv = NULL; +	} + +	auxtrace_queues__free(queues); +} + +static void cs_etm__free(struct perf_session *session) +{ +	int i; +	struct int_node *inode, *tmp; +	struct cs_etm_auxtrace *aux = container_of(session->auxtrace, +						   struct cs_etm_auxtrace, +						   auxtrace); +	cs_etm__free_events(session); +	session->auxtrace = NULL; + +	/* First remove all traceID/CPU# nodes for the RB tree */ +	intlist__for_each_entry_safe(inode, tmp, traceid_list) +		intlist__remove(traceid_list, inode); +	/* Then the RB tree itself */ +	intlist__delete(traceid_list); + +	for (i = 0; i < aux->num_cpu; i++) +		zfree(&aux->metadata[i]); + +	zfree(&aux->metadata); +	zfree(&aux); +} + +static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, +			      size_t size, u8 *buffer) +{ +	u8  cpumode; +	u64 offset; +	int len; +	struct	 thread *thread; +	struct	 machine *machine; +	struct	 addr_location al; + +	if (!etmq) +		return -1; + +	machine = etmq->etm->machine; +	if (address >= etmq->etm->kernel_start) +		cpumode = PERF_RECORD_MISC_KERNEL; +	else +		cpumode = PERF_RECORD_MISC_USER; + +	thread = etmq->thread; +	if (!thread) { +		if (cpumode != PERF_RECORD_MISC_KERNEL) +			return -EINVAL; +		thread = etmq->etm->unknown_thread; +	} + +	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, address, &al); + +	if (!al.map || !al.map->dso) +		return 0; + +	if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && +	    dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE)) +		return 0; + +	offset = al.map->map_ip(al.map, address); + +	map__load(al.map); + +	len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); + +	if (len <= 0) +		return 0; + +	return len; +} + +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, +						unsigned int queue_nr) +{ +	int i; +	struct cs_etm_decoder_params d_params; +	struct cs_etm_trace_params  *t_params; +	struct cs_etm_queue *etmq; + +	etmq = zalloc(sizeof(*etmq)); +	if (!etmq) +		return NULL; + +	etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); +	if (!etmq->event_buf) +		goto out_free; + +	etmq->etm = etm; +	etmq->queue_nr = queue_nr; +	etmq->pid = -1; +	etmq->tid = -1; +	etmq->cpu = -1; + +	/* Use metadata to fill in trace parameters for trace decoder */ +	t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + +	if (!t_params) +		goto out_free; + +	for (i = 0; i < etm->num_cpu; i++) { +		t_params[i].protocol = CS_ETM_PROTO_ETMV4i; +		t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; +		t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; +		t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; +		t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; +		t_params[i].etmv4.reg_configr = +					etm->metadata[i][CS_ETMV4_TRCCONFIGR]; +		t_params[i].etmv4.reg_traceidr = +					etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; +	} + +	/* Set decoder parameters to simply print the trace packets */ +	d_params.packet_printer = cs_etm__packet_dump; +	d_params.operation = CS_ETM_OPERATION_DECODE; +	d_params.formatted = true; +	d_params.fsyncs = false; +	d_params.hsyncs = false; +	d_params.frame_aligned = true; +	d_params.data = etmq; + +	etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); + +	zfree(&t_params); + +	if (!etmq->decoder) +		goto out_free; + +	/* +	 * Register a function to handle all memory accesses required by +	 * the trace decoder library. +	 */ +	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, +					      0x0L, ((u64) -1L), +					      cs_etm__mem_access)) +		goto out_free_decoder; + +	etmq->offset = 0; + +	return etmq; + +out_free_decoder: +	cs_etm_decoder__free(etmq->decoder); +out_free: +	zfree(&etmq->event_buf); +	free(etmq); + +	return NULL; +} + +static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, +			       struct auxtrace_queue *queue, +			       unsigned int queue_nr) +{ +	struct cs_etm_queue *etmq = queue->priv; + +	if (list_empty(&queue->head) || etmq) +		return 0; + +	etmq = cs_etm__alloc_queue(etm, queue_nr); + +	if (!etmq) +		return -ENOMEM; + +	queue->priv = etmq; + +	if (queue->cpu != -1) +		etmq->cpu = queue->cpu; + +	etmq->tid = queue->tid; + +	return 0; +} + +static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) +{ +	unsigned int i; +	int ret; + +	for (i = 0; i < etm->queues.nr_queues; i++) { +		ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); +		if (ret) +			return ret; +	} + +	return 0; +} + +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) +{ +	if (etm->queues.new_data) { +		etm->queues.new_data = false; +		return cs_etm__setup_queues(etm); +	} + +	return 0; +} + +static int +cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) +{ +	struct auxtrace_buffer *aux_buffer = etmq->buffer; +	struct auxtrace_buffer *old_buffer = aux_buffer; +	struct auxtrace_queue *queue; + +	queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; + +	aux_buffer = auxtrace_buffer__next(queue, aux_buffer); + +	/* If no more data, drop the previous auxtrace_buffer and return */ +	if (!aux_buffer) { +		if (old_buffer) +			auxtrace_buffer__drop_data(old_buffer); +		buff->len = 0; +		return 0; +	} + +	etmq->buffer = aux_buffer; + +	/* If the aux_buffer doesn't have data associated, try to load it */ +	if (!aux_buffer->data) { +		/* get the file desc associated with the perf data file */ +		int fd = perf_data__fd(etmq->etm->session->data); + +		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); +		if (!aux_buffer->data) +			return -ENOMEM; +	} + +	/* If valid, drop the previous buffer */ +	if (old_buffer) +		auxtrace_buffer__drop_data(old_buffer); + +	buff->offset = aux_buffer->offset; +	buff->len = aux_buffer->size; +	buff->buf = aux_buffer->data; + +	buff->ref_timestamp = aux_buffer->reference; + +	return buff->len; +} + +static void  cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, +				     struct auxtrace_queue *queue) +{ +	struct cs_etm_queue *etmq = queue->priv; + +	/* CPU-wide tracing isn't supported yet */ +	if (queue->tid == -1) +		return; + +	if ((!etmq->thread) && (etmq->tid != -1)) +		etmq->thread = machine__find_thread(etm->machine, -1, +						    etmq->tid); + +	if (etmq->thread) { +		etmq->pid = etmq->thread->pid_; +		if (queue->cpu == -1) +			etmq->cpu = etmq->thread->cpu; +	} +} + +/* + * The cs etm packet encodes an instruction range between a branch target + * and the next taken branch. Generate sample accordingly. + */ +static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, +				       struct cs_etm_packet *packet) +{ +	int ret = 0; +	struct cs_etm_auxtrace *etm = etmq->etm; +	struct perf_sample sample = {.ip = 0,}; +	union perf_event *event = etmq->event_buf; +	u64 start_addr = packet->start_addr; +	u64 end_addr = packet->end_addr; + +	event->sample.header.type = PERF_RECORD_SAMPLE; +	event->sample.header.misc = PERF_RECORD_MISC_USER; +	event->sample.header.size = sizeof(struct perf_event_header); + +	sample.ip = start_addr; +	sample.pid = etmq->pid; +	sample.tid = etmq->tid; +	sample.addr = end_addr; +	sample.id = etmq->etm->branches_id; +	sample.stream_id = etmq->etm->branches_id; +	sample.period = 1; +	sample.cpu = packet->cpu; +	sample.flags = 0; +	sample.cpumode = PERF_RECORD_MISC_USER; + +	ret = perf_session__deliver_synth_event(etm->session, event, &sample); + +	if (ret) +		pr_err( +		"CS ETM Trace: failed to deliver instruction event, error %d\n", +		ret); + +	return ret; +} + +struct cs_etm_synth { +	struct perf_tool dummy_tool; +	struct perf_session *session; +}; + +static int cs_etm__event_synth(struct perf_tool *tool, +			       union perf_event *event, +			       struct perf_sample *sample __maybe_unused, +			       struct machine *machine __maybe_unused) +{ +	struct cs_etm_synth *cs_etm_synth = +		      container_of(tool, struct cs_etm_synth, dummy_tool); + +	return perf_session__deliver_synth_event(cs_etm_synth->session, +						 event, NULL); +} + +static int cs_etm__synth_event(struct perf_session *session, +			       struct perf_event_attr *attr, u64 id) +{ +	struct cs_etm_synth cs_etm_synth; + +	memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); +	cs_etm_synth.session = session; + +	return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, +					   &id, cs_etm__event_synth); +} + +static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, +				struct perf_session *session) +{ +	struct perf_evlist *evlist = session->evlist; +	struct perf_evsel *evsel; +	struct perf_event_attr attr; +	bool found = false; +	u64 id; +	int err; + +	evlist__for_each_entry(evlist, evsel) { +		if (evsel->attr.type == etm->pmu_type) { +			found = true; +			break; +		} +	} + +	if (!found) { +		pr_debug("No selected events with CoreSight Trace data\n"); +		return 0; +	} + +	memset(&attr, 0, sizeof(struct perf_event_attr)); +	attr.size = sizeof(struct perf_event_attr); +	attr.type = PERF_TYPE_HARDWARE; +	attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; +	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | +			    PERF_SAMPLE_PERIOD; +	if (etm->timeless_decoding) +		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; +	else +		attr.sample_type |= PERF_SAMPLE_TIME; + +	attr.exclude_user = evsel->attr.exclude_user; +	attr.exclude_kernel = evsel->attr.exclude_kernel; +	attr.exclude_hv = evsel->attr.exclude_hv; +	attr.exclude_host = evsel->attr.exclude_host; +	attr.exclude_guest = evsel->attr.exclude_guest; +	attr.sample_id_all = evsel->attr.sample_id_all; +	attr.read_format = evsel->attr.read_format; + +	/* create new id val to be a fixed offset from evsel id */ +	id = evsel->id[0] + 1000000000; + +	if (!id) +		id = 1; + +	if (etm->synth_opts.branches) { +		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; +		attr.sample_period = 1; +		attr.sample_type |= PERF_SAMPLE_ADDR; +		err = cs_etm__synth_event(session, &attr, id); +		if (err) +			return err; +		etm->sample_branches = true; +		etm->branches_sample_type = attr.sample_type; +		etm->branches_id = id; +	} + +	return 0; +} + +static int cs_etm__sample(struct cs_etm_queue *etmq) +{ +	int ret; +	struct cs_etm_packet packet; + +	while (1) { +		ret = cs_etm_decoder__get_packet(etmq->decoder, &packet); +		if (ret <= 0) +			return ret; + +		/* +		 * If the packet contains an instruction range, generate an +		 * instruction sequence event. +		 */ +		if (packet.sample_type & CS_ETM_RANGE) +			cs_etm__synth_branch_sample(etmq, &packet); +	} + +	return 0; +} + +static int cs_etm__run_decoder(struct cs_etm_queue *etmq) +{ +	struct cs_etm_auxtrace *etm = etmq->etm; +	struct cs_etm_buffer buffer; +	size_t buffer_used, processed; +	int err = 0; + +	if (!etm->kernel_start) +		etm->kernel_start = machine__kernel_start(etm->machine); + +	/* Go through each buffer in the queue and decode them one by one */ +more: +	buffer_used = 0; +	memset(&buffer, 0, sizeof(buffer)); +	err = cs_etm__get_trace(&buffer, etmq); +	if (err <= 0) +		return err; +	/* +	 * We cannot assume consecutive blocks in the data file are contiguous, +	 * reset the decoder to force re-sync. +	 */ +	err = cs_etm_decoder__reset(etmq->decoder); +	if (err != 0) +		return err; + +	/* Run trace decoder until buffer consumed or end of trace */ +	do { +		processed = 0; + +		err = cs_etm_decoder__process_data_block( +						etmq->decoder, +						etmq->offset, +						&buffer.buf[buffer_used], +						buffer.len - buffer_used, +						&processed); + +		if (err) +			return err; + +		etmq->offset += processed; +		buffer_used += processed; + +		/* +		 * Nothing to do with an error condition, let's hope the next +		 * chunk will be better. +		 */ +		err = cs_etm__sample(etmq); +	} while (buffer.len > buffer_used); + +goto more; + +	return err; +} + +static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, +					   pid_t tid, u64 time_) +{ +	unsigned int i; +	struct auxtrace_queues *queues = &etm->queues; + +	for (i = 0; i < queues->nr_queues; i++) { +		struct auxtrace_queue *queue = &etm->queues.queue_array[i]; +		struct cs_etm_queue *etmq = queue->priv; + +		if (etmq && ((tid == -1) || (etmq->tid == tid))) { +			etmq->time = time_; +			cs_etm__set_pid_tid_cpu(etm, queue); +			cs_etm__run_decoder(etmq); +		} +	} + +	return 0; +} + +static int cs_etm__process_event(struct perf_session *session, +				 union perf_event *event, +				 struct perf_sample *sample, +				 struct perf_tool *tool) +{ +	int err = 0; +	u64 timestamp; +	struct cs_etm_auxtrace *etm = container_of(session->auxtrace, +						   struct cs_etm_auxtrace, +						   auxtrace); + +	if (dump_trace) +		return 0; + +	if (!tool->ordered_events) { +		pr_err("CoreSight ETM Trace requires ordered events\n"); +		return -EINVAL; +	} + +	if (!etm->timeless_decoding) +		return -EINVAL; + +	if (sample->time && (sample->time != (u64) -1)) +		timestamp = sample->time; +	else +		timestamp = 0; + +	if (timestamp || etm->timeless_decoding) { +		err = cs_etm__update_queues(etm); +		if (err) +			return err; +	} + +	if (event->header.type == PERF_RECORD_EXIT) +		return cs_etm__process_timeless_queues(etm, +						       event->fork.tid, +						       sample->time); + +	return 0; +} + +static int cs_etm__process_auxtrace_event(struct perf_session *session, +					  union perf_event *event, +					  struct perf_tool *tool __maybe_unused) +{ +	struct cs_etm_auxtrace *etm = container_of(session->auxtrace, +						   struct cs_etm_auxtrace, +						   auxtrace); +	if (!etm->data_queued) { +		struct auxtrace_buffer *buffer; +		off_t  data_offset; +		int fd = perf_data__fd(session->data); +		bool is_pipe = perf_data__is_pipe(session->data); +		int err; + +		if (is_pipe) +			data_offset = 0; +		else { +			data_offset = lseek(fd, 0, SEEK_CUR); +			if (data_offset == -1) +				return -errno; +		} + +		err = auxtrace_queues__add_event(&etm->queues, session, +						 event, data_offset, &buffer); +		if (err) +			return err; + +		if (dump_trace) +			if (auxtrace_buffer__get_data(buffer, fd)) { +				cs_etm__dump_event(etm, buffer); +				auxtrace_buffer__put_data(buffer); +			} +	} + +	return 0; +} + +static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) +{ +	struct perf_evsel *evsel; +	struct perf_evlist *evlist = etm->session->evlist; +	bool timeless_decoding = true; + +	/* +	 * Circle through the list of event and complain if we find one +	 * with the time bit set. +	 */ +	evlist__for_each_entry(evlist, evsel) { +		if ((evsel->attr.sample_type & PERF_SAMPLE_TIME)) +			timeless_decoding = false; +	} + +	return timeless_decoding; +} + +static const char * const cs_etm_global_header_fmts[] = { +	[CS_HEADER_VERSION_0]	= "	Header version		       %llx\n", +	[CS_PMU_TYPE_CPUS]	= "	PMU type/num cpus	       %llx\n", +	[CS_ETM_SNAPSHOT]	= "	Snapshot		       %llx\n", +}; + +static const char * const cs_etm_priv_fmts[] = { +	[CS_ETM_MAGIC]		= "	Magic number		       %llx\n", +	[CS_ETM_CPU]		= "	CPU			       %lld\n", +	[CS_ETM_ETMCR]		= "	ETMCR			       %llx\n", +	[CS_ETM_ETMTRACEIDR]	= "	ETMTRACEIDR		       %llx\n", +	[CS_ETM_ETMCCER]	= "	ETMCCER			       %llx\n", +	[CS_ETM_ETMIDR]		= "	ETMIDR			       %llx\n", +}; + +static const char * const cs_etmv4_priv_fmts[] = { +	[CS_ETM_MAGIC]		= "	Magic number		       %llx\n", +	[CS_ETM_CPU]		= "	CPU			       %lld\n", +	[CS_ETMV4_TRCCONFIGR]	= "	TRCCONFIGR		       %llx\n", +	[CS_ETMV4_TRCTRACEIDR]	= "	TRCTRACEIDR		       %llx\n", +	[CS_ETMV4_TRCIDR0]	= "	TRCIDR0			       %llx\n", +	[CS_ETMV4_TRCIDR1]	= "	TRCIDR1			       %llx\n", +	[CS_ETMV4_TRCIDR2]	= "	TRCIDR2			       %llx\n", +	[CS_ETMV4_TRCIDR8]	= "	TRCIDR8			       %llx\n", +	[CS_ETMV4_TRCAUTHSTATUS] = "	TRCAUTHSTATUS		       %llx\n", +}; + +static void cs_etm__print_auxtrace_info(u64 *val, int num) +{ +	int i, j, cpu = 0; + +	for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) +		fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + +	for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { +		if (val[i] == __perf_cs_etmv3_magic) +			for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) +				fprintf(stdout, cs_etm_priv_fmts[j], val[i]); +		else if (val[i] == __perf_cs_etmv4_magic) +			for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) +				fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); +		else +			/* failure.. return */ +			return; +	} +} + +int cs_etm__process_auxtrace_info(union perf_event *event, +				  struct perf_session *session) +{ +	struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; +	struct cs_etm_auxtrace *etm = NULL; +	struct int_node *inode; +	unsigned int pmu_type; +	int event_header_size = sizeof(struct perf_event_header); +	int info_header_size; +	int total_size = auxtrace_info->header.size; +	int priv_size = 0; +	int num_cpu; +	int err = 0, idx = -1; +	int i, j, k; +	u64 *ptr, *hdr = NULL; +	u64 **metadata = NULL; + +	/* +	 * sizeof(auxtrace_info_event::type) + +	 * sizeof(auxtrace_info_event::reserved) == 8 +	 */ +	info_header_size = 8; + +	if (total_size < (event_header_size + info_header_size)) +		return -EINVAL; + +	priv_size = total_size - event_header_size - info_header_size; + +	/* First the global part */ +	ptr = (u64 *) auxtrace_info->priv; + +	/* Look for version '0' of the header */ +	if (ptr[0] != 0) +		return -EINVAL; + +	hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); +	if (!hdr) +		return -ENOMEM; + +	/* Extract header information - see cs-etm.h for format */ +	for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) +		hdr[i] = ptr[i]; +	num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; +	pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & +				    0xffffffff); + +	/* +	 * Create an RB tree for traceID-CPU# tuple. Since the conversion has +	 * to be made for each packet that gets decoded, optimizing access in +	 * anything other than a sequential array is worth doing. +	 */ +	traceid_list = intlist__new(NULL); +	if (!traceid_list) { +		err = -ENOMEM; +		goto err_free_hdr; +	} + +	metadata = zalloc(sizeof(*metadata) * num_cpu); +	if (!metadata) { +		err = -ENOMEM; +		goto err_free_traceid_list; +	} + +	/* +	 * The metadata is stored in the auxtrace_info section and encodes +	 * the configuration of the ARM embedded trace macrocell which is +	 * required by the trace decoder to properly decode the trace due +	 * to its highly compressed nature. +	 */ +	for (j = 0; j < num_cpu; j++) { +		if (ptr[i] == __perf_cs_etmv3_magic) { +			metadata[j] = zalloc(sizeof(*metadata[j]) * +					     CS_ETM_PRIV_MAX); +			if (!metadata[j]) { +				err = -ENOMEM; +				goto err_free_metadata; +			} +			for (k = 0; k < CS_ETM_PRIV_MAX; k++) +				metadata[j][k] = ptr[i + k]; + +			/* The traceID is our handle */ +			idx = metadata[j][CS_ETM_ETMTRACEIDR]; +			i += CS_ETM_PRIV_MAX; +		} else if (ptr[i] == __perf_cs_etmv4_magic) { +			metadata[j] = zalloc(sizeof(*metadata[j]) * +					     CS_ETMV4_PRIV_MAX); +			if (!metadata[j]) { +				err = -ENOMEM; +				goto err_free_metadata; +			} +			for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) +				metadata[j][k] = ptr[i + k]; + +			/* The traceID is our handle */ +			idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; +			i += CS_ETMV4_PRIV_MAX; +		} + +		/* Get an RB node for this CPU */ +		inode = intlist__findnew(traceid_list, idx); + +		/* Something went wrong, no need to continue */ +		if (!inode) { +			err = PTR_ERR(inode); +			goto err_free_metadata; +		} + +		/* +		 * The node for that CPU should not be taken. +		 * Back out if that's the case. +		 */ +		if (inode->priv) { +			err = -EINVAL; +			goto err_free_metadata; +		} +		/* All good, associate the traceID with the CPU# */ +		inode->priv = &metadata[j][CS_ETM_CPU]; +	} + +	/* +	 * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and +	 * CS_ETMV4_PRIV_MAX mark how many double words are in the +	 * global metadata, and each cpu's metadata respectively. +	 * The following tests if the correct number of double words was +	 * present in the auxtrace info section. +	 */ +	if (i * 8 != priv_size) { +		err = -EINVAL; +		goto err_free_metadata; +	} + +	etm = zalloc(sizeof(*etm)); + +	if (!etm) { +		err = -ENOMEM; +		goto err_free_metadata; +	} + +	err = auxtrace_queues__init(&etm->queues); +	if (err) +		goto err_free_etm; + +	etm->session = session; +	etm->machine = &session->machines.host; + +	etm->num_cpu = num_cpu; +	etm->pmu_type = pmu_type; +	etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0); +	etm->metadata = metadata; +	etm->auxtrace_type = auxtrace_info->type; +	etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); + +	etm->auxtrace.process_event = cs_etm__process_event; +	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; +	etm->auxtrace.flush_events = cs_etm__flush_events; +	etm->auxtrace.free_events = cs_etm__free_events; +	etm->auxtrace.free = cs_etm__free; +	session->auxtrace = &etm->auxtrace; + +	if (dump_trace) { +		cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); +		return 0; +	} + +	if (session->itrace_synth_opts && session->itrace_synth_opts->set) { +		etm->synth_opts = *session->itrace_synth_opts; +	} else { +		itrace_synth_opts__set_default(&etm->synth_opts); +		etm->synth_opts.callchain = false; +	} + +	err = cs_etm__synth_events(etm, session); +	if (err) +		goto err_free_queues; + +	err = auxtrace_queues__process_index(&etm->queues, session); +	if (err) +		goto err_free_queues; + +	etm->data_queued = etm->queues.populated; + +	return 0; + +err_free_queues: +	auxtrace_queues__free(&etm->queues); +	session->auxtrace = NULL; +err_free_etm: +	zfree(&etm); +err_free_metadata: +	/* No need to check @metadata[j], free(NULL) is supported */ +	for (j = 0; j < num_cpu; j++) +		free(metadata[j]); +	zfree(&metadata); +err_free_traceid_list: +	intlist__delete(traceid_list); +err_free_hdr: +	zfree(&hdr); + +	return -EINVAL; +} diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 3cc6bc3263fe..5864d5dca616 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -18,6 +18,9 @@  #ifndef INCLUDE__UTIL_PERF_CS_ETM_H__  #define INCLUDE__UTIL_PERF_CS_ETM_H__ +#include "util/event.h" +#include "util/session.h" +  /* Versionning header in case things need tro change in the future.  That way   * decoding of old snapshot is still possible.   */ @@ -61,6 +64,9 @@ enum {  	CS_ETMV4_PRIV_MAX,  }; +/* RB tree for quick conversion between traceID and CPUs */ +struct intlist *traceid_list; +  #define KiB(x) ((x) * 1024)  #define MiB(x) ((x) * 1024 * 1024) @@ -71,4 +77,16 @@ static const u64 __perf_cs_etmv4_magic   = 0x4040404040404040ULL;  #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))  #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) +#ifdef HAVE_CSTRACE_SUPPORT +int cs_etm__process_auxtrace_info(union perf_event *event, +				  struct perf_session *session); +#else +static inline int +cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, +			      struct perf_session *session __maybe_unused) +{ +	return -1; +} +#endif +  #endif diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 48094fde0a68..d8cfc19ddb10 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -12,16 +12,6 @@  #include "util.h"  #include "debug.h" -#ifndef O_CLOEXEC -#ifdef __sparc__ -#define O_CLOEXEC	0x400000 -#elif defined(__alpha__) || defined(__hppa__) -#define O_CLOEXEC	010000000 -#else -#define O_CLOEXEC	02000000 -#endif -#endif -  static bool check_pipe(struct perf_data *data)  {  	struct stat st; diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index d5b6f7f5baff..36ef45b2e89d 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -446,7 +446,7 @@ static int do_open(char *name)  	char sbuf[STRERR_BUFSIZE];  	do { -		fd = open(name, O_RDONLY); +		fd = open(name, O_RDONLY|O_CLOEXEC);  		if (fd >= 0)  			return fd; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6276b340f893..6d311868d850 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -1,8 +1,10 @@  // SPDX-License-Identifier: GPL-2.0  #include "cpumap.h"  #include "env.h" +#include "sane_ctype.h"  #include "util.h"  #include <errno.h> +#include <sys/utsname.h>  struct perf_env perf_env; @@ -93,3 +95,48 @@ void cpu_cache_level__free(struct cpu_cache_level *cache)  	free(cache->map);  	free(cache->size);  } + +/* + * Return architecture name in a normalized form. + * The conversion logic comes from the Makefile. + */ +static const char *normalize_arch(char *arch) +{ +	if (!strcmp(arch, "x86_64")) +		return "x86"; +	if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6') +		return "x86"; +	if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5)) +		return "sparc"; +	if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64")) +		return "arm64"; +	if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110")) +		return "arm"; +	if (!strncmp(arch, "s390", 4)) +		return "s390"; +	if (!strncmp(arch, "parisc", 6)) +		return "parisc"; +	if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3)) +		return "powerpc"; +	if (!strncmp(arch, "mips", 4)) +		return "mips"; +	if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) +		return "sh"; + +	return arch; +} + +const char *perf_env__arch(struct perf_env *env) +{ +	struct utsname uts; +	char *arch_name; + +	if (!env) { /* Assume local operation */ +		if (uname(&uts) < 0) +			return NULL; +		arch_name = uts.machine; +	} else +		arch_name = env->arch; + +	return normalize_arch(arch_name); +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1eb35b190b34..bf970f57dce0 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -65,4 +65,6 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);  int perf_env__read_cpu_topology_map(struct perf_env *env);  void cpu_cache_level__free(struct cpu_cache_level *cache); + +const char *perf_env__arch(struct perf_env *env);  #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 97a8ef9980db..44e603c27944 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1435,6 +1435,11 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)  		       event->context_switch.next_prev_tid);  } +static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp) +{ +	return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost); +} +  size_t perf_event__fprintf(union perf_event *event, FILE *fp)  {  	size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -1467,6 +1472,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)  	case PERF_RECORD_SWITCH_CPU_WIDE:  		ret += perf_event__fprintf_switch(event, fp);  		break; +	case PERF_RECORD_LOST: +		ret += perf_event__fprintf_lost(event, fp); +		break;  	default:  		ret += fprintf(fp, "\n");  	} diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 1ae95efbfb95..0f794744919c 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -205,6 +205,7 @@ struct perf_sample {  	u32 flags;  	u16 insn_len;  	u8  cpumode; +	u16 misc;  	char insn[MAX_INSN];  	void *raw_data;  	struct ip_callchain *callchain; @@ -774,8 +775,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,  				     u64 read_format);  int perf_event__synthesize_sample(union perf_event *event, u64 type,  				  u64 read_format, -				  const struct perf_sample *sample, -				  bool swapped); +				  const struct perf_sample *sample);  pid_t perf_event__synthesize_comm(struct perf_tool *tool,  				  union perf_event *event, pid_t pid, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c6c891e154a6..ac35cd214feb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -25,6 +25,7 @@  #include "parse-events.h"  #include <subcmd/parse-options.h> +#include <fcntl.h>  #include <sys/ioctl.h>  #include <sys/mman.h> @@ -125,7 +126,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist)  void perf_evlist__exit(struct perf_evlist *evlist)  {  	zfree(&evlist->mmap); -	zfree(&evlist->backward_mmap); +	zfree(&evlist->overwrite_mmap);  	fdarray__exit(&evlist->pollfd);  } @@ -257,7 +258,7 @@ int perf_evlist__add_dummy(struct perf_evlist *evlist)  		.config = PERF_COUNT_SW_DUMMY,  		.size	= sizeof(attr), /* to capture ABI version */  	}; -	struct perf_evsel *evsel = perf_evsel__new(&attr); +	struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries);  	if (evsel == NULL)  		return -ENOMEM; @@ -675,11 +676,11 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)  {  	int i; -	if (!evlist->backward_mmap) +	if (!evlist->overwrite_mmap)  		return 0;  	for (i = 0; i < evlist->nr_mmaps; i++) { -		int fd = evlist->backward_mmap[i].fd; +		int fd = evlist->overwrite_mmap[i].fd;  		int err;  		if (fd < 0) @@ -711,7 +712,7 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int  	 * No need for read-write ring buffer: kernel stop outputting when  	 * it hit md->prev (perf_mmap__consume()).  	 */ -	return perf_mmap__read_forward(md, evlist->overwrite); +	return perf_mmap__read_forward(md);  }  union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) @@ -738,7 +739,7 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx)  void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)  { -	perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); +	perf_mmap__consume(&evlist->mmap[idx], false);  }  static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) @@ -749,16 +750,16 @@ static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)  		for (i = 0; i < evlist->nr_mmaps; i++)  			perf_mmap__munmap(&evlist->mmap[i]); -	if (evlist->backward_mmap) +	if (evlist->overwrite_mmap)  		for (i = 0; i < evlist->nr_mmaps; i++) -			perf_mmap__munmap(&evlist->backward_mmap[i]); +			perf_mmap__munmap(&evlist->overwrite_mmap[i]);  }  void perf_evlist__munmap(struct perf_evlist *evlist)  {  	perf_evlist__munmap_nofree(evlist);  	zfree(&evlist->mmap); -	zfree(&evlist->backward_mmap); +	zfree(&evlist->overwrite_mmap);  }  static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) @@ -800,7 +801,7 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,  static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,  				       struct mmap_params *mp, int cpu_idx, -				       int thread, int *_output, int *_output_backward) +				       int thread, int *_output, int *_output_overwrite)  {  	struct perf_evsel *evsel;  	int revent; @@ -812,18 +813,20 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,  		int fd;  		int cpu; +		mp->prot = PROT_READ | PROT_WRITE;  		if (evsel->attr.write_backward) { -			output = _output_backward; -			maps = evlist->backward_mmap; +			output = _output_overwrite; +			maps = evlist->overwrite_mmap;  			if (!maps) {  				maps = perf_evlist__alloc_mmap(evlist);  				if (!maps)  					return -1; -				evlist->backward_mmap = maps; +				evlist->overwrite_mmap = maps;  				if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)  					perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);  			} +			mp->prot &= ~PROT_WRITE;  		}  		if (evsel->system_wide && thread) @@ -884,14 +887,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,  	pr_debug2("perf event ring buffer mmapped per cpu\n");  	for (cpu = 0; cpu < nr_cpus; cpu++) {  		int output = -1; -		int output_backward = -1; +		int output_overwrite = -1;  		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,  					      true);  		for (thread = 0; thread < nr_threads; thread++) {  			if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, -							thread, &output, &output_backward)) +							thread, &output, &output_overwrite))  				goto out_unmap;  		}  	} @@ -912,13 +915,13 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,  	pr_debug2("perf event ring buffer mmapped per thread\n");  	for (thread = 0; thread < nr_threads; thread++) {  		int output = -1; -		int output_backward = -1; +		int output_overwrite = -1;  		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,  					      false);  		if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, -						&output, &output_backward)) +						&output, &output_overwrite))  			goto out_unmap;  	} @@ -1052,15 +1055,18 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,   * Return: %0 on success, negative error code otherwise.   */  int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, -			 bool overwrite, unsigned int auxtrace_pages, +			 unsigned int auxtrace_pages,  			 bool auxtrace_overwrite)  {  	struct perf_evsel *evsel;  	const struct cpu_map *cpus = evlist->cpus;  	const struct thread_map *threads = evlist->threads; -	struct mmap_params mp = { -		.prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), -	}; +	/* +	 * Delay setting mp.prot: set it before calling perf_mmap__mmap. +	 * Its value is decided by evsel's write_backward. +	 * So &mp should not be passed through const pointer. +	 */ +	struct mmap_params mp;  	if (!evlist->mmap)  		evlist->mmap = perf_evlist__alloc_mmap(evlist); @@ -1070,7 +1076,6 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,  	if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)  		return -ENOMEM; -	evlist->overwrite = overwrite;  	evlist->mmap_len = perf_evlist__mmap_size(pages);  	pr_debug("mmap size %zuB\n", evlist->mmap_len);  	mp.mask = evlist->mmap_len - page_size - 1; @@ -1091,10 +1096,9 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,  	return perf_evlist__mmap_per_cpu(evlist, &mp);  } -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, -		      bool overwrite) +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)  { -	return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); +	return perf_evlist__mmap_ex(evlist, pages, 0, false);  }  int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) @@ -1102,7 +1106,8 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)  	struct cpu_map *cpus;  	struct thread_map *threads; -	threads = thread_map__new_str(target->pid, target->tid, target->uid); +	threads = thread_map__new_str(target->pid, target->tid, target->uid, +				      target->per_thread);  	if (!threads)  		return -1; @@ -1582,6 +1587,17 @@ int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *even  	return perf_evsel__parse_sample(evsel, event, sample);  } +int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, +					union perf_event *event, +					u64 *timestamp) +{ +	struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); + +	if (!evsel) +		return -EFAULT; +	return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); +} +  size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)  {  	struct perf_evsel *evsel; @@ -1739,13 +1755,13 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,  		RESUME,  	} action = NONE; -	if (!evlist->backward_mmap) +	if (!evlist->overwrite_mmap)  		return;  	switch (old_state) {  	case BKW_MMAP_NOTREADY: {  		if (state != BKW_MMAP_RUNNING) -			goto state_err;; +			goto state_err;  		break;  	}  	case BKW_MMAP_RUNNING: { @@ -1786,3 +1802,15 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,  state_err:  	return;  } + +bool perf_evlist__exclude_kernel(struct perf_evlist *evlist) +{ +	struct perf_evsel *evsel; + +	evlist__for_each_entry(evlist, evsel) { +		if (!evsel->attr.exclude_kernel) +			return false; +	} + +	return true; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e72ae64c11ac..75f8e0ad5d76 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -7,7 +7,6 @@  #include <linux/refcount.h>  #include <linux/list.h>  #include <api/fd/array.h> -#include <fcntl.h>  #include <stdio.h>  #include "../perf.h"  #include "event.h" @@ -31,7 +30,6 @@ struct perf_evlist {  	int		 nr_entries;  	int		 nr_groups;  	int		 nr_mmaps; -	bool		 overwrite;  	bool		 enabled;  	bool		 has_user_cpus;  	size_t		 mmap_len; @@ -45,12 +43,14 @@ struct perf_evlist {  	} workload;  	struct fdarray	 pollfd;  	struct perf_mmap *mmap; -	struct perf_mmap *backward_mmap; +	struct perf_mmap *overwrite_mmap;  	struct thread_map *threads;  	struct cpu_map	  *cpus;  	struct perf_evsel *selected;  	struct events_stats stats;  	struct perf_env	*env; +	u64		first_sample_time; +	u64		last_sample_time;  };  struct perf_evsel_str_handler { @@ -169,10 +169,9 @@ int perf_evlist__parse_mmap_pages(const struct option *opt,  unsigned long perf_event_mlock_kb_in_pages(void);  int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, -			 bool overwrite, unsigned int auxtrace_pages, +			 unsigned int auxtrace_pages,  			 bool auxtrace_overwrite); -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, -		      bool overwrite); +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);  void perf_evlist__munmap(struct perf_evlist *evlist);  size_t perf_evlist__mmap_size(unsigned long pages); @@ -205,6 +204,10 @@ u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);  int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,  			      struct perf_sample *sample); +int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, +					union perf_event *event, +					u64 *timestamp); +  bool perf_evlist__valid_sample_type(struct perf_evlist *evlist);  bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist);  bool perf_evlist__valid_read_format(struct perf_evlist *evlist); @@ -312,4 +315,6 @@ perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);  struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,  					    union perf_event *event); + +bool perf_evlist__exclude_kernel(struct perf_evlist *evlist);  #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f894893c203d..66fa45198a11 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -36,6 +36,7 @@  #include "debug.h"  #include "trace-event.h"  #include "stat.h" +#include "memswap.h"  #include "util/parse-branch-options.h"  #include "sane_ctype.h" @@ -650,9 +651,9 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)  	return ret;  } -void perf_evsel__config_callchain(struct perf_evsel *evsel, -				  struct record_opts *opts, -				  struct callchain_param *param) +static void __perf_evsel__config_callchain(struct perf_evsel *evsel, +					   struct record_opts *opts, +					   struct callchain_param *param)  {  	bool function = perf_evsel__is_function_event(evsel);  	struct perf_event_attr *attr = &evsel->attr; @@ -698,6 +699,14 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel,  	}  } +void perf_evsel__config_callchain(struct perf_evsel *evsel, +				  struct record_opts *opts, +				  struct callchain_param *param) +{ +	if (param->enabled) +		return __perf_evsel__config_callchain(evsel, opts, param); +} +  static void  perf_evsel__reset_callgraph(struct perf_evsel *evsel,  			    struct callchain_param *param) @@ -717,28 +726,32 @@ perf_evsel__reset_callgraph(struct perf_evsel *evsel,  }  static void apply_config_terms(struct perf_evsel *evsel, -			       struct record_opts *opts) +			       struct record_opts *opts, bool track)  {  	struct perf_evsel_config_term *term;  	struct list_head *config_terms = &evsel->config_terms;  	struct perf_event_attr *attr = &evsel->attr; -	struct callchain_param param; +	/* callgraph default */ +	struct callchain_param param = { +		.record_mode = callchain_param.record_mode, +	};  	u32 dump_size = 0;  	int max_stack = 0;  	const char *callgraph_buf = NULL; -	/* callgraph default */ -	param.record_mode = callchain_param.record_mode; -  	list_for_each_entry(term, config_terms, list) {  		switch (term->type) {  		case PERF_EVSEL__CONFIG_TERM_PERIOD: -			attr->sample_period = term->val.period; -			attr->freq = 0; +			if (!(term->weak && opts->user_interval != ULLONG_MAX)) { +				attr->sample_period = term->val.period; +				attr->freq = 0; +			}  			break;  		case PERF_EVSEL__CONFIG_TERM_FREQ: -			attr->sample_freq = term->val.freq; -			attr->freq = 1; +			if (!(term->weak && opts->user_freq != UINT_MAX)) { +				attr->sample_freq = term->val.freq; +				attr->freq = 1; +			}  			break;  		case PERF_EVSEL__CONFIG_TERM_TIME:  			if (term->val.time) @@ -775,6 +788,8 @@ static void apply_config_terms(struct perf_evsel *evsel,  		case PERF_EVSEL__CONFIG_TERM_OVERWRITE:  			attr->write_backward = term->val.overwrite ? 1 : 0;  			break; +		case PERF_EVSEL__CONFIG_TERM_DRV_CFG: +			break;  		default:  			break;  		} @@ -782,6 +797,8 @@ static void apply_config_terms(struct perf_evsel *evsel,  	/* User explicitly set per-event callgraph, clear the old setting and reset. */  	if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) { +		bool sample_address = false; +  		if (max_stack) {  			param.max_stack = max_stack;  			if (callgraph_buf == NULL) @@ -801,6 +818,8 @@ static void apply_config_terms(struct perf_evsel *evsel,  					       evsel->name);  					return;  				} +				if (param.record_mode == CALLCHAIN_DWARF) +					sample_address = true;  			}  		}  		if (dump_size > 0) { @@ -813,8 +832,14 @@ static void apply_config_terms(struct perf_evsel *evsel,  			perf_evsel__reset_callgraph(evsel, &callchain_param);  		/* set perf-event callgraph */ -		if (param.enabled) +		if (param.enabled) { +			if (sample_address) { +				perf_evsel__set_sample_bit(evsel, ADDR); +				perf_evsel__set_sample_bit(evsel, DATA_SRC); +				evsel->attr.mmap_data = track; +			}  			perf_evsel__config_callchain(evsel, opts, ¶m); +		}  	}  } @@ -1045,7 +1070,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,  	 * Apply event specific term settings,  	 * it overloads any global configuration.  	 */ -	apply_config_terms(evsel, opts); +	apply_config_terms(evsel, opts, track);  	evsel->ignore_missing_thread = opts->ignore_missing_thread;  } @@ -1371,7 +1396,7 @@ perf_evsel__process_group_data(struct perf_evsel *leader,  static int  perf_evsel__read_group(struct perf_evsel *leader, int cpu, int thread)  { -	struct perf_stat_evsel *ps = leader->priv; +	struct perf_stat_evsel *ps = leader->stats;  	u64 read_format = leader->attr.read_format;  	int size = perf_evsel__read_size(leader);  	u64 *data = ps->group_data; @@ -1570,6 +1595,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,  	PRINT_ATTRf(use_clockid, p_unsigned);  	PRINT_ATTRf(context_switch, p_unsigned);  	PRINT_ATTRf(write_backward, p_unsigned); +	PRINT_ATTRf(namespaces, p_unsigned);  	PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);  	PRINT_ATTRf(bp_type, p_unsigned); @@ -1592,10 +1618,46 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,  	return fprintf(fp, "  %-32s %s\n", name, val);  } +static void perf_evsel__remove_fd(struct perf_evsel *pos, +				  int nr_cpus, int nr_threads, +				  int thread_idx) +{ +	for (int cpu = 0; cpu < nr_cpus; cpu++) +		for (int thread = thread_idx; thread < nr_threads - 1; thread++) +			FD(pos, cpu, thread) = FD(pos, cpu, thread + 1); +} + +static int update_fds(struct perf_evsel *evsel, +		      int nr_cpus, int cpu_idx, +		      int nr_threads, int thread_idx) +{ +	struct perf_evsel *pos; + +	if (cpu_idx >= nr_cpus || thread_idx >= nr_threads) +		return -EINVAL; + +	evlist__for_each_entry(evsel->evlist, pos) { +		nr_cpus = pos != evsel ? nr_cpus : cpu_idx; + +		perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); + +		/* +		 * Since fds for next evsel has not been created, +		 * there is no need to iterate whole event list. +		 */ +		if (pos == evsel) +			break; +	} +	return 0; +} +  static bool ignore_missing_thread(struct perf_evsel *evsel, +				  int nr_cpus, int cpu,  				  struct thread_map *threads,  				  int thread, int err)  { +	pid_t ignore_pid = thread_map__pid(threads, thread); +  	if (!evsel->ignore_missing_thread)  		return false; @@ -1611,11 +1673,18 @@ static bool ignore_missing_thread(struct perf_evsel *evsel,  	if (threads->nr == 1)  		return false; +	/* +	 * We should remove fd for missing_thread first +	 * because thread_map__remove() will decrease threads->nr. +	 */ +	if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread)) +		return false; +  	if (thread_map__remove(threads, thread))  		return false;  	pr_warning("WARNING: Ignored open failure for pid %d\n", -		   thread_map__pid(threads, thread)); +		   ignore_pid);  	return true;  } @@ -1720,7 +1789,7 @@ retry_open:  			if (fd < 0) {  				err = -errno; -				if (ignore_missing_thread(evsel, threads, thread, err)) { +				if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) {  					/*  					 * We just removed 1 thread, so take a step  					 * back on thread index and lower the upper @@ -1956,6 +2025,20 @@ static inline bool overflow(const void *endp, u16 max_size, const void *offset,  #define OVERFLOW_CHECK_u64(offset) \  	OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64)) +static int +perf_event__check_size(union perf_event *event, unsigned int sample_size) +{ +	/* +	 * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes +	 * up to PERF_SAMPLE_PERIOD.  After that overflow() must be used to +	 * check the format does not go past the end of the event. +	 */ +	if (sample_size + sizeof(event->header) > event->header.size) +		return -EFAULT; + +	return 0; +} +  int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,  			     struct perf_sample *data)  { @@ -1977,6 +2060,9 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,  	data->stream_id = data->id = data->time = -1ULL;  	data->period = evsel->attr.sample_period;  	data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; +	data->misc    = event->header.misc; +	data->id = -1ULL; +	data->data_src = PERF_MEM_DATA_SRC_NONE;  	if (event->header.type != PERF_RECORD_SAMPLE) {  		if (!evsel->attr.sample_id_all) @@ -1986,15 +2072,9 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,  	array = event->sample.array; -	/* -	 * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes -	 * up to PERF_SAMPLE_PERIOD.  After that overflow() must be used to -	 * check the format does not go past the end of the event. -	 */ -	if (evsel->sample_size + sizeof(event->header) > event->header.size) +	if (perf_event__check_size(event, evsel->sample_size))  		return -EFAULT; -	data->id = -1ULL;  	if (type & PERF_SAMPLE_IDENTIFIER) {  		data->id = *array;  		array++; @@ -2024,7 +2104,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,  		array++;  	} -	data->addr = 0;  	if (type & PERF_SAMPLE_ADDR) {  		data->addr = *array;  		array++; @@ -2116,14 +2195,27 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,  	if (type & PERF_SAMPLE_RAW) {  		OVERFLOW_CHECK_u64(array);  		u.val64 = *array; -		if (WARN_ONCE(swapped, -			      "Endianness of raw data not corrected!\n")) { -			/* undo swap of u64, then swap on individual u32s */ + +		/* +		 * Undo swap of u64, then swap on individual u32s, +		 * get the size of the raw area and undo all of the +		 * swap. The pevent interface handles endianity by +		 * itself. +		 */ +		if (swapped) {  			u.val64 = bswap_64(u.val64);  			u.val32[0] = bswap_32(u.val32[0]);  			u.val32[1] = bswap_32(u.val32[1]);  		}  		data->raw_size = u.val32[0]; + +		/* +		 * The raw data is aligned on 64bits including the +		 * u32 size, so it's safe to use mem_bswap_64. +		 */ +		if (swapped) +			mem_bswap_64((void *) array, data->raw_size); +  		array = (void *)array + sizeof(u32);  		OVERFLOW_CHECK(array, data->raw_size, max_size); @@ -2188,14 +2280,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,  		array++;  	} -	data->data_src = PERF_MEM_DATA_SRC_NONE;  	if (type & PERF_SAMPLE_DATA_SRC) {  		OVERFLOW_CHECK_u64(array);  		data->data_src = *array;  		array++;  	} -	data->transaction = 0;  	if (type & PERF_SAMPLE_TRANSACTION) {  		OVERFLOW_CHECK_u64(array);  		data->transaction = *array; @@ -2228,6 +2318,50 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,  	return 0;  } +int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel, +				       union perf_event *event, +				       u64 *timestamp) +{ +	u64 type = evsel->attr.sample_type; +	const u64 *array; + +	if (!(type & PERF_SAMPLE_TIME)) +		return -1; + +	if (event->header.type != PERF_RECORD_SAMPLE) { +		struct perf_sample data = { +			.time = -1ULL, +		}; + +		if (!evsel->attr.sample_id_all) +			return -1; +		if (perf_evsel__parse_id_sample(evsel, event, &data)) +			return -1; + +		*timestamp = data.time; +		return 0; +	} + +	array = event->sample.array; + +	if (perf_event__check_size(event, evsel->sample_size)) +		return -EFAULT; + +	if (type & PERF_SAMPLE_IDENTIFIER) +		array++; + +	if (type & PERF_SAMPLE_IP) +		array++; + +	if (type & PERF_SAMPLE_TID) +		array++; + +	if (type & PERF_SAMPLE_TIME) +		*timestamp = *array; + +	return 0; +} +  size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,  				     u64 read_format)  { @@ -2338,8 +2472,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,  int perf_event__synthesize_sample(union perf_event *event, u64 type,  				  u64 read_format, -				  const struct perf_sample *sample, -				  bool swapped) +				  const struct perf_sample *sample)  {  	u64 *array;  	size_t sz; @@ -2364,15 +2497,6 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,  	if (type & PERF_SAMPLE_TID) {  		u.val32[0] = sample->pid;  		u.val32[1] = sample->tid; -		if (swapped) { -			/* -			 * Inverse of what is done in perf_evsel__parse_sample -			 */ -			u.val32[0] = bswap_32(u.val32[0]); -			u.val32[1] = bswap_32(u.val32[1]); -			u.val64 = bswap_64(u.val64); -		} -  		*array = u.val64;  		array++;  	} @@ -2399,13 +2523,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,  	if (type & PERF_SAMPLE_CPU) {  		u.val32[0] = sample->cpu; -		if (swapped) { -			/* -			 * Inverse of what is done in perf_evsel__parse_sample -			 */ -			u.val32[0] = bswap_32(u.val32[0]); -			u.val64 = bswap_64(u.val64); -		} +		u.val32[1] = 0;  		*array = u.val64;  		array++;  	} @@ -2452,15 +2570,6 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,  	if (type & PERF_SAMPLE_RAW) {  		u.val32[0] = sample->raw_size; -		if (WARN_ONCE(swapped, -			      "Endianness of raw data not corrected!\n")) { -			/* -			 * Inverse of what is done in perf_evsel__parse_sample -			 */ -			u.val32[0] = bswap_32(u.val32[0]); -			u.val32[1] = bswap_32(u.val32[1]); -			u.val64 = bswap_64(u.val64); -		}  		*array = u.val64;  		array = (void *)array + sizeof(u32); @@ -2739,8 +2848,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,  		break;  	case EOPNOTSUPP:  		if (evsel->attr.sample_period != 0) -			return scnprintf(msg, size, "%s", -	"PMU Hardware doesn't support sampling/overflow-interrupts."); +			return scnprintf(msg, size, +	"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", +					 perf_evsel__name(evsel));  		if (evsel->attr.precise_ip)  			return scnprintf(msg, size, "%s",  	"\'precise\' request may not be supported. Try removing 'p' modifier."); @@ -2777,16 +2887,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,  			 perf_evsel__name(evsel));  } -char *perf_evsel__env_arch(struct perf_evsel *evsel) -{ -	if (evsel && evsel->evlist && evsel->evlist->env) -		return evsel->evlist->env->arch; -	return NULL; -} - -char *perf_evsel__env_cpuid(struct perf_evsel *evsel) +struct perf_env *perf_evsel__env(struct perf_evsel *evsel)  { -	if (evsel && evsel->evlist && evsel->evlist->env) -		return evsel->evlist->env->cpuid; +	if (evsel && evsel->evlist) +		return evsel->evlist->env;  	return NULL;  } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 9277df96ffda..846e41644525 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -38,7 +38,7 @@ struct cgroup_sel;   * It is allocated within event parsing and attached to   * perf_evsel::config_terms list head.  */ -enum { +enum term_type {  	PERF_EVSEL__CONFIG_TERM_PERIOD,  	PERF_EVSEL__CONFIG_TERM_FREQ,  	PERF_EVSEL__CONFIG_TERM_TIME, @@ -49,12 +49,11 @@ enum {  	PERF_EVSEL__CONFIG_TERM_OVERWRITE,  	PERF_EVSEL__CONFIG_TERM_DRV_CFG,  	PERF_EVSEL__CONFIG_TERM_BRANCH, -	PERF_EVSEL__CONFIG_TERM_MAX,  };  struct perf_evsel_config_term {  	struct list_head	list; -	int	type; +	enum term_type	type;  	union {  		u64	period;  		u64	freq; @@ -67,6 +66,7 @@ struct perf_evsel_config_term {  		bool	overwrite;  		char	*branch;  	} val; +	bool weak;  };  struct perf_stat_evsel; @@ -338,6 +338,10 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,  int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,  			     struct perf_sample *sample); +int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel, +				       union perf_event *event, +				       u64 *timestamp); +  static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel)  {  	return list_entry(evsel->node.next, struct perf_evsel, node); @@ -442,7 +446,6 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);  int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,  			     attr__fprintf_f attr__fprintf, void *priv); -char *perf_evsel__env_arch(struct perf_evsel *evsel); -char *perf_evsel__env_cpuid(struct perf_evsel *evsel); +struct perf_env *perf_evsel__env(struct perf_evsel *evsel);  #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index 9bbcec4e3365..ff17920a5ebc 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -38,7 +38,7 @@ do  done  echo "#endif /* HAVE_LIBELF_SUPPORT */" -echo "#ifdef HAVE_LIBAUDIT_SUPPORT" +echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)"  sed -n -e 's/^perf-\([^ 	]*\)[ 	].* audit*/\1/p' command-list.txt |  sort |  while read cmd diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 7c0e9d587bfa..a326e0d8b5b6 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -15,9 +15,8 @@  #include <linux/bitops.h>  #include <linux/stringify.h>  #include <sys/stat.h> -#include <sys/types.h>  #include <sys/utsname.h> -#include <unistd.h> +#include <linux/time64.h>  #include "evlist.h"  #include "evsel.h" @@ -37,6 +36,7 @@  #include <api/fs/fs.h>  #include "asm/bug.h"  #include "tool.h" +#include "time-utils.h"  #include "sane_ctype.h" @@ -1182,6 +1182,20 @@ static int write_stat(struct feat_fd *ff __maybe_unused,  	return 0;  } +static int write_sample_time(struct feat_fd *ff, +			     struct perf_evlist *evlist) +{ +	int ret; + +	ret = do_write(ff, &evlist->first_sample_time, +		       sizeof(evlist->first_sample_time)); +	if (ret < 0) +		return ret; + +	return do_write(ff, &evlist->last_sample_time, +			sizeof(evlist->last_sample_time)); +} +  static void print_hostname(struct feat_fd *ff, FILE *fp)  {  	fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1507,6 +1521,28 @@ static void print_group_desc(struct feat_fd *ff, FILE *fp)  	}  } +static void print_sample_time(struct feat_fd *ff, FILE *fp) +{ +	struct perf_session *session; +	char time_buf[32]; +	double d; + +	session = container_of(ff->ph, struct perf_session, header); + +	timestamp__scnprintf_usec(session->evlist->first_sample_time, +				  time_buf, sizeof(time_buf)); +	fprintf(fp, "# time of first sample : %s\n", time_buf); + +	timestamp__scnprintf_usec(session->evlist->last_sample_time, +				  time_buf, sizeof(time_buf)); +	fprintf(fp, "# time of last sample : %s\n", time_buf); + +	d = (double)(session->evlist->last_sample_time - +		session->evlist->first_sample_time) / NSEC_PER_MSEC; + +	fprintf(fp, "# sample duration : %10.3f ms\n", d); +} +  static int __event_process_build_id(struct build_id_event *bev,  				    char *filename,  				    struct perf_session *session) @@ -2148,6 +2184,27 @@ out_free_caches:  	return -1;  } +static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused) +{ +	struct perf_session *session; +	u64 first_sample_time, last_sample_time; +	int ret; + +	session = container_of(ff->ph, struct perf_session, header); + +	ret = do_read_u64(ff, &first_sample_time); +	if (ret) +		return -1; + +	ret = do_read_u64(ff, &last_sample_time); +	if (ret) +		return -1; + +	session->evlist->first_sample_time = first_sample_time; +	session->evlist->last_sample_time = last_sample_time; +	return 0; +} +  struct feature_ops {  	int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);  	void (*print)(struct feat_fd *ff, FILE *fp); @@ -2205,6 +2262,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {  	FEAT_OPN(AUXTRACE,	auxtrace,	false),  	FEAT_OPN(STAT,		stat,		false),  	FEAT_OPN(CACHE,		cache,		true), +	FEAT_OPR(SAMPLE_TIME,	sample_time,	false),  };  struct header_print_data { @@ -3258,6 +3316,74 @@ int perf_event__synthesize_attrs(struct perf_tool *tool,  	return err;  } +static bool has_unit(struct perf_evsel *counter) +{ +	return counter->unit && *counter->unit; +} + +static bool has_scale(struct perf_evsel *counter) +{ +	return counter->scale != 1; +} + +int perf_event__synthesize_extra_attr(struct perf_tool *tool, +				      struct perf_evlist *evsel_list, +				      perf_event__handler_t process, +				      bool is_pipe) +{ +	struct perf_evsel *counter; +	int err; + +	/* +	 * Synthesize other events stuff not carried within +	 * attr event - unit, scale, name +	 */ +	evlist__for_each_entry(evsel_list, counter) { +		if (!counter->supported) +			continue; + +		/* +		 * Synthesize unit and scale only if it's defined. +		 */ +		if (has_unit(counter)) { +			err = perf_event__synthesize_event_update_unit(tool, counter, process); +			if (err < 0) { +				pr_err("Couldn't synthesize evsel unit.\n"); +				return err; +			} +		} + +		if (has_scale(counter)) { +			err = perf_event__synthesize_event_update_scale(tool, counter, process); +			if (err < 0) { +				pr_err("Couldn't synthesize evsel counter.\n"); +				return err; +			} +		} + +		if (counter->own_cpus) { +			err = perf_event__synthesize_event_update_cpus(tool, counter, process); +			if (err < 0) { +				pr_err("Couldn't synthesize evsel cpus.\n"); +				return err; +			} +		} + +		/* +		 * Name is needed only for pipe output, +		 * perf.data carries event names. +		 */ +		if (is_pipe) { +			err = perf_event__synthesize_event_update_name(tool, counter, process); +			if (err < 0) { +				pr_err("Couldn't synthesize evsel name.\n"); +				return err; +			} +		} +	} +	return 0; +} +  int perf_event__process_attr(struct perf_tool *tool __maybe_unused,  			     union perf_event *event,  			     struct perf_evlist **pevlist) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 29ccbfdf8724..f28aaaa3a440 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -9,6 +9,7 @@  #include <linux/types.h>  #include "event.h"  #include "env.h" +#include "pmu.h"  enum {  	HEADER_RESERVED		= 0,	/* always cleared */ @@ -34,6 +35,7 @@ enum {  	HEADER_AUXTRACE,  	HEADER_STAT,  	HEADER_CACHE, +	HEADER_SAMPLE_TIME,  	HEADER_LAST_FEATURE,  	HEADER_FEAT_BITS	= 256,  }; @@ -107,6 +109,11 @@ int perf_event__synthesize_features(struct perf_tool *tool,  				    struct perf_evlist *evlist,  				    perf_event__handler_t process); +int perf_event__synthesize_extra_attr(struct perf_tool *tool, +				      struct perf_evlist *evsel_list, +				      perf_event__handler_t process, +				      bool is_pipe); +  int perf_event__process_feature(struct perf_tool *tool,  				union perf_event *event,  				struct perf_session *session); @@ -166,5 +173,5 @@ int write_padded(struct feat_fd *fd, const void *bf,   */  int get_cpuid(char *buffer, size_t sz); -char *get_cpuid_str(void); +char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused);  #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 5325e65f9711..72db2744876d 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -67,7 +67,6 @@ struct intel_bts {  	u64				branches_sample_type;  	u64				branches_id;  	size_t				branches_event_size; -	bool				synth_needs_swap;  	unsigned long			num_events;  }; @@ -303,8 +302,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,  		event.sample.header.size = bts->branches_event_size;  		ret = perf_event__synthesize_sample(&event,  						    bts->branches_sample_type, -						    0, &sample, -						    bts->synth_needs_swap); +						    0, &sample);  		if (ret)  			return ret;  	} @@ -841,8 +839,6 @@ static int intel_bts_synth_events(struct intel_bts *bts,  				__perf_evsel__sample_size(attr.sample_type);  	} -	bts->synth_needs_swap = evsel->needs_swap; -  	return 0;  } diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 10e0814bb8d2..1b704fbea9de 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -11,15 +11,21 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table  $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c  	@(diff -I 2>&1 | grep -q 'option requires an argument' && \ -	test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \ -	diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null && \ -	diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null && \ -	diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \ -	diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \ -	diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ -	diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ -	diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ -	|| echo "Warning: Intel PT: x86 instruction decoder differs from kernel" >&2 )) || true +	test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ +	((diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null) || \ +	(echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/insn.c' differs from latest version at 'arch/x86/lib/insn.c'" >&2)) && \ +	((diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null) || \ +	(echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/inat.c' differs from latest version at 'arch/x86/lib/inat.c'" >&2)) && \ +	((diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null) || \ +	(echo "Warning: Intel PT: x86 instruction decoder map file at 'tools/perf/util/intel-pt-decoder/x86-opcode-map.txt' differs from latest version at 'arch/x86/lib/x86-opcode-map.txt'" >&2)) && \ +	((diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null) || \ +	(echo "Warning: Intel PT: x86 instruction decoder script at 'tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk' differs from latest version at 'arch/x86/tools/gen-insn-attr-x86.awk'" >&2)) && \ +	((diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null) || \ +	(echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/insn.h' differs from latest version at 'arch/x86/include/asm/insn.h'" >&2)) && \ +	((diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null) || \ +	(echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat.h' differs from latest version at 'arch/x86/include/asm/inat.h'" >&2)) && \ +	((diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) || \ +	(echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat_types.h' differs from latest version at 'arch/x86/include/asm/inat_types.h'" >&2)))) || true  	$(call rule_mkdir)  	$(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h index 125ecd2a300d..52dc8d911173 100644 --- a/tools/perf/util/intel-pt-decoder/inat.h +++ b/tools/perf/util/intel-pt-decoder/inat.h @@ -97,6 +97,16 @@  #define INAT_MAKE_GROUP(grp)	((grp << INAT_GRP_OFFS) | INAT_MODRM)  #define INAT_MAKE_IMM(imm)	(imm << INAT_IMM_OFFS) +/* Identifiers for segment registers */ +#define INAT_SEG_REG_IGNORE	0 +#define INAT_SEG_REG_DEFAULT	1 +#define INAT_SEG_REG_CS		2 +#define INAT_SEG_REG_SS		3 +#define INAT_SEG_REG_DS		4 +#define INAT_SEG_REG_ES		5 +#define INAT_SEG_REG_FS		6 +#define INAT_SEG_REG_GS		7 +  /* Attribute search APIs */  extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);  extern int inat_get_last_prefix_id(insn_byte_t last_pfx); diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt index 12e377184ee4..e0b85930dd77 100644 --- a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt @@ -607,7 +607,7 @@ fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)  fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)  fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)  fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) -ff: +ff: UD0  EndTable  Table: 3-byte opcode 1 (0x0f 0x38) @@ -717,7 +717,7 @@ AVXcode: 2  7e: vpermt2d/q Vx,Hx,Wx (66),(ev)  7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)  80: INVEPT Gy,Mdq (66) -81: INVPID Gy,Mdq (66) +81: INVVPID Gy,Mdq (66)  82: INVPCID Gy,Mdq (66)  83: vpmultishiftqb Vx,Hx,Wx (66),(ev)  88: vexpandps/d Vpd,Wpd (66),(ev) @@ -896,7 +896,7 @@ EndTable  GrpTable: Grp3_1  0: TEST Eb,Ib -1: +1: TEST Eb,Ib  2: NOT Eb  3: NEG Eb  4: MUL AL,Eb @@ -970,6 +970,15 @@ GrpTable: Grp9  EndTable  GrpTable: Grp10 +# all are UD1 +0: UD1 +1: UD1 +2: UD1 +3: UD1 +4: UD1 +5: UD1 +6: UD1 +7: UD1  EndTable  # Grp11A and Grp11B are expressed as Grp11 in Intel SDM diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 23f9ba676df0..3773d9c54f45 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -104,8 +104,6 @@ struct intel_pt {  	u64 pwrx_id;  	u64 cbr_id; -	bool synth_needs_swap; -  	u64 tsc_bit;  	u64 mtc_bit;  	u64 mtc_freq_bits; @@ -1101,11 +1099,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt,  }  static int intel_pt_inject_event(union perf_event *event, -				 struct perf_sample *sample, u64 type, -				 bool swapped) +				 struct perf_sample *sample, u64 type)  {  	event->header.size = perf_event__sample_event_size(sample, type, 0); -	return perf_event__synthesize_sample(event, type, 0, sample, swapped); +	return perf_event__synthesize_sample(event, type, 0, sample);  }  static inline int intel_pt_opt_inject(struct intel_pt *pt, @@ -1115,7 +1112,7 @@ static inline int intel_pt_opt_inject(struct intel_pt *pt,  	if (!pt->synth_opts.inject)  		return 0; -	return intel_pt_inject_event(event, sample, type, pt->synth_needs_swap); +	return intel_pt_inject_event(event, sample, type);  }  static int intel_pt_deliver_synth_b_event(struct intel_pt *pt, @@ -2329,8 +2326,6 @@ static int intel_pt_synth_events(struct intel_pt *pt,  		id += 1;  	} -	pt->synth_needs_swap = evsel->needs_swap; -  	return 0;  } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 6a8d03c3d9b7..b05a67464c03 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -172,6 +172,9 @@ void machine__exit(struct machine *machine)  {  	int i; +	if (machine == NULL) +		return; +  	machine__destroy_kernel_maps(machine);  	map_groups__exit(&machine->kmaps);  	dsos__exit(&machine->dsos); @@ -1723,7 +1726,7 @@ static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip)  		bool show_addr = callchain_param.key == CCKEY_ADDRESS;  		srcline = get_srcline(map->dso, map__rip_2objdump(map, ip), -				      sym, show_sym, show_addr); +				      sym, show_sym, show_addr, ip);  		srcline__tree_insert(&map->dso->srclines, ip, srcline);  	} @@ -2201,7 +2204,7 @@ int thread__resolve_callchain(struct thread *thread,  {  	int ret = 0; -	callchain_cursor_reset(&callchain_cursor); +	callchain_cursor_reset(cursor);  	if (callchain_param.order == ORDER_CALLEE) {  		ret = thread__resolve_callchain_sample(thread, cursor, diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 6d40efd74402..8fe57031e1a8 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -419,7 +419,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,  	if (map && map->dso) {  		srcline = get_srcline(map->dso,  				      map__rip_2objdump(map, addr), NULL, -				      true, true); +				      true, true, addr);  		if (srcline != SRCLINE_UNKNOWN)  			ret = fprintf(fp, "%s%s", prefix, srcline);  		free_srcline(srcline); diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 0ddd9c199227..1ddc3d1d0147 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -20,12 +20,10 @@  #include "pmu.h"  #include "expr.h"  #include "rblist.h" -#include "pmu.h"  #include <string.h>  #include <stdbool.h>  #include <errno.h>  #include "pmu-events/pmu-events.h" -#include "strbuf.h"  #include "strlist.h"  #include <assert.h>  #include <ctype.h> @@ -38,6 +36,10 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events,  	struct metric_event me = {  		.evsel = evsel  	}; + +	if (!metric_events) +		return NULL; +  	nd = rblist__find(metric_events, &me);  	if (nd)  		return container_of(nd, struct metric_event, nd); @@ -270,7 +272,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw)  void metricgroup__print(bool metrics, bool metricgroups, char *filter,  			bool raw)  { -	struct pmu_events_map *map = perf_pmu__find_map(); +	struct pmu_events_map *map = perf_pmu__find_map(NULL);  	struct pmu_event *pe;  	int i;  	struct rblist groups; @@ -368,7 +370,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,  static int metricgroup__add_metric(const char *metric, struct strbuf *events,  				   struct list_head *group_list)  { -	struct pmu_events_map *map = perf_pmu__find_map(); +	struct pmu_events_map *map = perf_pmu__find_map(NULL);  	struct pmu_event *pe;  	int ret = -EINVAL;  	int i, j; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 9fe5f9c7d577..05076e683938 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -21,33 +21,13 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)  }  /* When check_messup is true, 'end' must points to a good entry */ -static union perf_event *perf_mmap__read(struct perf_mmap *map, bool check_messup, +static union perf_event *perf_mmap__read(struct perf_mmap *map,  					 u64 start, u64 end, u64 *prev)  {  	unsigned char *data = map->base + page_size;  	union perf_event *event = NULL;  	int diff = end - start; -	if (check_messup) { -		/* -		 * If we're further behind than half the buffer, there's a chance -		 * the writer will bite our tail and mess up the samples under us. -		 * -		 * If we somehow ended up ahead of the 'end', we got messed up. -		 * -		 * In either case, truncate and restart at 'end'. -		 */ -		if (diff > map->mask / 2 || diff < 0) { -			fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); - -			/* -			 * 'end' points to a known good entry, start there. -			 */ -			start = end; -			diff = 0; -		} -	} -  	if (diff >= (int)sizeof(event->header)) {  		size_t size; @@ -89,7 +69,7 @@ broken_event:  	return event;  } -union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup) +union perf_event *perf_mmap__read_forward(struct perf_mmap *map)  {  	u64 head;  	u64 old = map->prev; @@ -102,7 +82,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_mess  	head = perf_mmap__read_head(map); -	return perf_mmap__read(map, check_messup, old, head, &map->prev); +	return perf_mmap__read(map, old, head, &map->prev);  }  union perf_event *perf_mmap__read_backward(struct perf_mmap *map) @@ -138,7 +118,7 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map)  	else  		end = head + map->mask + 1; -	return perf_mmap__read(map, false, start, end, &map->prev); +	return perf_mmap__read(map, start, end, &map->prev);  }  void perf_mmap__read_catchup(struct perf_mmap *map) @@ -254,18 +234,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd)  	return 0;  } -static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) +static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end)  {  	struct perf_event_header *pheader;  	u64 evt_head = head;  	int size = mask + 1; -	pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); +	pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head);  	pheader = (struct perf_event_header *)(buf + (head & mask));  	*start = head;  	while (true) {  		if (evt_head - head >= (unsigned int)size) { -			pr_debug("Finished reading backward ring buffer: rewind\n"); +			pr_debug("Finished reading overwrite ring buffer: rewind\n");  			if (evt_head - head > (unsigned int)size)  				evt_head -= pheader->size;  			*end = evt_head; @@ -275,7 +255,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64  		pheader = (struct perf_event_header *)(buf + (evt_head & mask));  		if (pheader->size == 0) { -			pr_debug("Finished reading backward ring buffer: get start\n"); +			pr_debug("Finished reading overwrite ring buffer: get start\n");  			*end = evt_head;  			return 0;  		} @@ -287,19 +267,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64  	return -1;  } -static int rb_find_range(void *data, int mask, u64 head, u64 old, -			 u64 *start, u64 *end, bool backward) -{ -	if (!backward) { -		*start = old; -		*end = head; -		return 0; -	} - -	return backward_rb_find_range(data, mask, head, start, end); -} - -int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, +int perf_mmap__push(struct perf_mmap *md, bool overwrite,  		    void *to, int push(void *to, void *buf, size_t size))  {  	u64 head = perf_mmap__read_head(md); @@ -310,19 +278,28 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,  	void *buf;  	int rc = 0; -	if (rb_find_range(data, md->mask, head, old, &start, &end, backward)) -		return -1; +	start = overwrite ? head : old; +	end = overwrite ? old : head;  	if (start == end)  		return 0;  	size = end - start;  	if (size > (unsigned long)(md->mask) + 1) { -		WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); +		if (!overwrite) { +			WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); -		md->prev = head; -		perf_mmap__consume(md, overwrite || backward); -		return 0; +			md->prev = head; +			perf_mmap__consume(md, overwrite); +			return 0; +		} + +		/* +		 * Backward ring buffer is full. We still have a chance to read +		 * most of data from it. +		 */ +		if (overwrite_rb_find_range(data, md->mask, head, &start, &end)) +			return -1;  	}  	if ((start & md->mask) + size != (end & md->mask)) { @@ -346,7 +323,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward,  	}  	md->prev = head; -	perf_mmap__consume(md, overwrite || backward); +	perf_mmap__consume(md, overwrite);  out:  	return rc;  } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index efd78b827b05..e43d7b55a55f 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -70,7 +70,7 @@ void perf_mmap__read_catchup(struct perf_mmap *md);  static inline u64 perf_mmap__read_head(struct perf_mmap *mm)  {  	struct perf_event_mmap_page *pc = mm->base; -	u64 head = ACCESS_ONCE(pc->data_head); +	u64 head = READ_ONCE(pc->data_head);  	rmb();  	return head;  } @@ -86,10 +86,10 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)  	pc->data_tail = tail;  } -union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); +union perf_event *perf_mmap__read_forward(struct perf_mmap *map);  union perf_event *perf_mmap__read_backward(struct perf_mmap *map); -int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, +int perf_mmap__push(struct perf_mmap *md, bool backward,  		    void *to, int push(void *to, void *buf, size_t size));  size_t perf_mmap__mmap_len(struct perf_mmap *map); diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 8e09fd2d842f..bad9e0296e9a 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -157,9 +157,8 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve  }  int ordered_events__queue(struct ordered_events *oe, union perf_event *event, -			  struct perf_sample *sample, u64 file_offset) +			  u64 timestamp, u64 file_offset)  { -	u64 timestamp = sample->time;  	struct ordered_event *oevent;  	if (!timestamp || timestamp == ~0ULL) diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index 96e5292d88e2..8c7a2948593e 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -45,7 +45,7 @@ struct ordered_events {  };  int ordered_events__queue(struct ordered_events *oe, union perf_event *event, -			  struct perf_sample *sample, u64 file_offset); +			  u64 timestamp, u64 file_offset);  void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event);  int ordered_events__flush(struct ordered_events *oe, enum oe_flush how);  void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a7fcd95961ef..34589c427e52 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -4,6 +4,9 @@  #include <dirent.h>  #include <errno.h>  #include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h>  #include <sys/param.h>  #include "term.h"  #include "../perf.h" @@ -1116,6 +1119,7 @@ do {								\  	INIT_LIST_HEAD(&__t->list);				\  	__t->type       = PERF_EVSEL__CONFIG_TERM_ ## __type;	\  	__t->val.__name = __val;				\ +	__t->weak	= term->weak;				\  	list_add_tail(&__t->list, head_terms);			\  } while (0) @@ -2410,6 +2414,7 @@ static int new_term(struct parse_events_term **_term,  	*term = *temp;  	INIT_LIST_HEAD(&term->list); +	term->weak = false;  	switch (term->type_val) {  	case PARSE_EVENTS__TERM_TYPE_NUM: diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index be337c266697..88108cd11b4c 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -101,6 +101,9 @@ struct parse_events_term {  	/* error string indexes for within parsed string */  	int err_term;  	int err_val; + +	/* Coming from implicit alias */ +	bool weak;  };  struct parse_events_error { diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 933f5c6bffb4..ca56ba2dd3da 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -18,6 +18,7 @@  #include <stdio.h>  #include <sys/types.h>  #include <sys/stat.h> +#include <dirent.h>  #include <unistd.h>  static char bad_path[] = "/bad-path/"; @@ -77,3 +78,16 @@ bool is_regular_file(const char *file)  	return S_ISREG(st.st_mode);  } + +/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ +bool is_directory(const char *base_path, const struct dirent *dent) +{ +	char path[PATH_MAX]; +	struct stat st; + +	sprintf(path, "%s/%s", base_path, dent->d_name); +	if (stat(path, &st)) +		return false; + +	return S_ISDIR(st.st_mode); +} diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h index 14a254ada7eb..f014f905df50 100644 --- a/tools/perf/util/path.h +++ b/tools/perf/util/path.h @@ -2,9 +2,12 @@  #ifndef _PERF_PATH_H  #define _PERF_PATH_H +struct dirent; +  int path__join(char *bf, size_t size, const char *path1, const char *path2);  int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3);  bool is_regular_file(const char *file); +bool is_directory(const char *base_path, const struct dirent *dent);  #endif /* _PERF_PATH_H */ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 07cb2ac041d7..57e38fdf0b34 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -12,6 +12,7 @@  #include <dirent.h>  #include <api/fs/fs.h>  #include <locale.h> +#include <regex.h>  #include "util.h"  #include "pmu.h"  #include "parse-events.h" @@ -405,6 +406,11 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias,  			parse_events_terms__purge(&list);  			return ret;  		} +		/* +		 * Weak terms don't override command line options, +		 * which we don't want for implicit terms in aliases. +		 */ +		cloned->weak = true;  		list_add_tail(&cloned->list, &list);  	}  	list_splice(&list, terms); @@ -532,17 +538,45 @@ static bool pmu_is_uncore(const char *name)  }  /* + *  PMU CORE devices have different name other than cpu in sysfs on some + *  platforms. looking for possible sysfs files to identify as core device. + */ +static int is_pmu_core(const char *name) +{ +	struct stat st; +	char path[PATH_MAX]; +	const char *sysfs = sysfs__mountpoint(); + +	if (!sysfs) +		return 0; + +	/* Look for cpu sysfs (x86 and others) */ +	scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs); +	if ((stat(path, &st) == 0) && +			(strncmp(name, "cpu", strlen("cpu")) == 0)) +		return 1; + +	/* Look for cpu sysfs (specific to arm) */ +	scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", +				sysfs, name); +	if (stat(path, &st) == 0) +		return 1; + +	return 0; +} + +/*   * Return the CPU id as a raw string.   *   * Each architecture should provide a more precise id string that   * can be use to match the architecture's "mapfile".   */ -char * __weak get_cpuid_str(void) +char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)  {  	return NULL;  } -static char *perf_pmu__getcpuid(void) +static char *perf_pmu__getcpuid(struct perf_pmu *pmu)  {  	char *cpuid;  	static bool printed; @@ -551,7 +585,7 @@ static char *perf_pmu__getcpuid(void)  	if (cpuid)  		cpuid = strdup(cpuid);  	if (!cpuid) -		cpuid = get_cpuid_str(); +		cpuid = get_cpuid_str(pmu);  	if (!cpuid)  		return NULL; @@ -562,22 +596,45 @@ static char *perf_pmu__getcpuid(void)  	return cpuid;  } -struct pmu_events_map *perf_pmu__find_map(void) +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu)  {  	struct pmu_events_map *map; -	char *cpuid = perf_pmu__getcpuid(); +	char *cpuid = perf_pmu__getcpuid(pmu);  	int i; +	/* on some platforms which uses cpus map, cpuid can be NULL for +	 * PMUs other than CORE PMUs. +	 */ +	if (!cpuid) +		return NULL; +  	i = 0;  	for (;;) { +		regex_t re; +		regmatch_t pmatch[1]; +		int match; +  		map = &pmu_events_map[i++];  		if (!map->table) {  			map = NULL;  			break;  		} -		if (!strcmp(map->cpuid, cpuid)) +		if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) { +			/* Warn unable to generate match particular string. */ +			pr_info("Invalid regular expression %s\n", map->cpuid);  			break; +		} + +		match = !regexec(&re, cpuid, 1, pmatch, 0); +		regfree(&re); +		if (match) { +			size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); + +			/* Verify the entire string matched. */ +			if (match_len == strlen(cpuid)) +				break; +		}  	}  	free(cpuid);  	return map; @@ -588,13 +645,14 @@ struct pmu_events_map *perf_pmu__find_map(void)   * to the current running CPU. Then, add all PMU events from that table   * as aliases.   */ -static void pmu_add_cpu_aliases(struct list_head *head, const char *name) +static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)  {  	int i;  	struct pmu_events_map *map;  	struct pmu_event *pe; +	const char *name = pmu->name; -	map = perf_pmu__find_map(); +	map = perf_pmu__find_map(pmu);  	if (!map)  		return; @@ -603,7 +661,6 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)  	 */  	i = 0;  	while (1) { -		const char *pname;  		pe = &map->table[i++];  		if (!pe->name) { @@ -612,9 +669,13 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name)  			break;  		} -		pname = pe->pmu ? pe->pmu : "cpu"; -		if (strncmp(pname, name, strlen(pname))) -			continue; +		if (!is_pmu_core(name)) { +			/* check for uncore devices */ +			if (pe->pmu == NULL) +				continue; +			if (strncmp(pe->pmu, name, strlen(pe->pmu))) +				continue; +		}  		/* need type casts to override 'const' */  		__perf_pmu__new_alias(head, NULL, (char *)pe->name, @@ -656,21 +717,20 @@ static struct perf_pmu *pmu_lookup(const char *name)  	if (pmu_aliases(name, &aliases))  		return NULL; -	pmu_add_cpu_aliases(&aliases, name);  	pmu = zalloc(sizeof(*pmu));  	if (!pmu)  		return NULL;  	pmu->cpus = pmu_cpumask(name); - +	pmu->name = strdup(name); +	pmu->type = type;  	pmu->is_uncore = pmu_is_uncore(name); +	pmu_add_cpu_aliases(&aliases, pmu);  	INIT_LIST_HEAD(&pmu->format);  	INIT_LIST_HEAD(&pmu->aliases);  	list_splice(&format, &pmu->format);  	list_splice(&aliases, &pmu->aliases); -	pmu->name = strdup(name); -	pmu->type = type;  	list_add_tail(&pmu->list, &pmus);  	pmu->default_config = perf_pmu__get_default_config(pmu); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 27c75e635866..76fecec7b3f9 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -92,6 +92,6 @@ int perf_pmu__test(void);  struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); -struct pmu_events_map *perf_pmu__find_map(void); +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu);  #endif /* __PMU_H */ diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b7aaf9b2294d..e1dbc9821617 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1325,27 +1325,30 @@ static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)  {  	char *ptr; -	ptr = strchr(*arg, ':'); +	ptr = strpbrk_esc(*arg, ":");  	if (ptr) {  		*ptr = '\0';  		if (!pev->sdt && !is_c_func_name(*arg))  			goto ng_name; -		pev->group = strdup(*arg); +		pev->group = strdup_esc(*arg);  		if (!pev->group)  			return -ENOMEM;  		*arg = ptr + 1;  	} else  		pev->group = NULL; -	if (!pev->sdt && !is_c_func_name(*arg)) { + +	pev->event = strdup_esc(*arg); +	if (pev->event == NULL) +		return -ENOMEM; + +	if (!pev->sdt && !is_c_func_name(pev->event)) { +		zfree(&pev->event);  ng_name: +		zfree(&pev->group);  		semantic_error("%s is bad for event name -it must "  			       "follow C symbol-naming rule.\n", *arg);  		return -EINVAL;  	} -	pev->event = strdup(*arg); -	if (pev->event == NULL) -		return -ENOMEM; -  	return 0;  } @@ -1373,7 +1376,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)  			arg++;  	} -	ptr = strpbrk(arg, ";=@+%"); +	ptr = strpbrk_esc(arg, ";=@+%");  	if (pev->sdt) {  		if (ptr) {  			if (*ptr != '@') { @@ -1387,7 +1390,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)  				pev->target = build_id_cache__origname(tmp);  				free(tmp);  			} else -				pev->target = strdup(ptr + 1); +				pev->target = strdup_esc(ptr + 1);  			if (!pev->target)  				return -ENOMEM;  			*ptr = '\0'; @@ -1421,13 +1424,14 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)  	 *  	 * Otherwise, we consider arg to be a function specification.  	 */ -	if (!strpbrk(arg, "+@%") && (ptr = strpbrk(arg, ";:")) != NULL) { +	if (!strpbrk_esc(arg, "+@%")) { +		ptr = strpbrk_esc(arg, ";:");  		/* This is a file spec if it includes a '.' before ; or : */ -		if (memchr(arg, '.', ptr - arg)) +		if (ptr && memchr(arg, '.', ptr - arg))  			file_spec = true;  	} -	ptr = strpbrk(arg, ";:+@%"); +	ptr = strpbrk_esc(arg, ";:+@%");  	if (ptr) {  		nc = *ptr;  		*ptr++ = '\0'; @@ -1436,7 +1440,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)  	if (arg[0] == '\0')  		tmp = NULL;  	else { -		tmp = strdup(arg); +		tmp = strdup_esc(arg);  		if (tmp == NULL)  			return -ENOMEM;  	} @@ -1469,12 +1473,12 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)  		arg = ptr;  		c = nc;  		if (c == ';') {	/* Lazy pattern must be the last part */ -			pp->lazy_line = strdup(arg); +			pp->lazy_line = strdup(arg); /* let leave escapes */  			if (pp->lazy_line == NULL)  				return -ENOMEM;  			break;  		} -		ptr = strpbrk(arg, ";:+@%"); +		ptr = strpbrk_esc(arg, ";:+@%");  		if (ptr) {  			nc = *ptr;  			*ptr++ = '\0'; @@ -1501,7 +1505,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)  				semantic_error("SRC@SRC is not allowed.\n");  				return -EINVAL;  			} -			pp->file = strdup(arg); +			pp->file = strdup_esc(arg);  			if (pp->file == NULL)  				return -ENOMEM;  			break; @@ -2573,7 +2577,8 @@ int show_perf_probe_events(struct strfilter *filter)  }  static int get_new_event_name(char *buf, size_t len, const char *base, -			      struct strlist *namelist, bool allow_suffix) +			      struct strlist *namelist, bool ret_event, +			      bool allow_suffix)  {  	int i, ret;  	char *p, *nbase; @@ -2584,13 +2589,13 @@ static int get_new_event_name(char *buf, size_t len, const char *base,  	if (!nbase)  		return -ENOMEM; -	/* Cut off the dot suffixes (e.g. .const, .isra)*/ -	p = strchr(nbase, '.'); +	/* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */ +	p = strpbrk(nbase, ".@");  	if (p && p != nbase)  		*p = '\0';  	/* Try no suffix number */ -	ret = e_snprintf(buf, len, "%s", nbase); +	ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : "");  	if (ret < 0) {  		pr_debug("snprintf() failed: %d\n", ret);  		goto out; @@ -2625,6 +2630,14 @@ static int get_new_event_name(char *buf, size_t len, const char *base,  out:  	free(nbase); + +	/* Final validation */ +	if (ret >= 0 && !is_c_func_name(buf)) { +		pr_warning("Internal error: \"%s\" is an invalid event name.\n", +			   buf); +		ret = -EINVAL; +	} +  	return ret;  } @@ -2681,8 +2694,8 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev,  		group = PERFPROBE_GROUP;  	/* Get an unused new event name */ -	ret = get_new_event_name(buf, 64, event, -				 namelist, allow_suffix); +	ret = get_new_event_name(buf, 64, event, namelist, +				 tev->point.retprobe, allow_suffix);  	if (ret < 0)  		return ret; @@ -2792,16 +2805,40 @@ static int find_probe_functions(struct map *map, char *name,  	int found = 0;  	struct symbol *sym;  	struct rb_node *tmp; +	const char *norm, *ver; +	char *buf = NULL; +	bool cut_version = true;  	if (map__load(map) < 0)  		return 0; +	/* If user gives a version, don't cut off the version from symbols */ +	if (strchr(name, '@')) +		cut_version = false; +  	map__for_each_symbol(map, sym, tmp) { -		if (strglobmatch(sym->name, name)) { +		norm = arch__normalize_symbol_name(sym->name); +		if (!norm) +			continue; + +		if (cut_version) { +			/* We don't care about default symbol or not */ +			ver = strchr(norm, '@'); +			if (ver) { +				buf = strndup(norm, ver - norm); +				if (!buf) +					return -ENOMEM; +				norm = buf; +			} +		} + +		if (strglobmatch(norm, name)) {  			found++;  			if (syms && found < probe_conf.max_probes)  				syms[found - 1] = sym;  		} +		if (buf) +			zfree(&buf);  	}  	return found; @@ -2847,7 +2884,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,  	 * same name but different addresses, this lists all the symbols.  	 */  	num_matched_functions = find_probe_functions(map, pp->function, syms); -	if (num_matched_functions == 0) { +	if (num_matched_functions <= 0) {  		pr_err("Failed to find symbol %s in %s\n", pp->function,  			pev->target ? : "kernel");  		ret = -ENOENT; diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index b4f2f06722a7..7aa0ea64544e 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,7 @@ util/ctype.c  util/evlist.c  util/evsel.c  util/cpumap.c +util/memswap.c  util/mmap.c  util/namespaces.c  ../lib/bitmap.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 8e49d9cafcfc..b1e999bd21ef 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -864,7 +864,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,  					 &pages, &overwrite))  		return NULL; -	if (perf_evlist__mmap(evlist, pages, overwrite) < 0) { +	if (perf_evlist__mmap(evlist, pages) < 0) {  		PyErr_SetFromErrno(PyExc_OSError);  		return NULL;  	} diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c index 0dfe27d99458..0efc3258c648 100644 --- a/tools/perf/util/rblist.c +++ b/tools/perf/util/rblist.c @@ -101,16 +101,21 @@ void rblist__init(struct rblist *rblist)  	return;  } +void rblist__exit(struct rblist *rblist) +{ +	struct rb_node *pos, *next = rb_first(&rblist->entries); + +	while (next) { +		pos = next; +		next = rb_next(pos); +		rblist__remove_node(rblist, pos); +	} +} +  void rblist__delete(struct rblist *rblist)  {  	if (rblist != NULL) { -		struct rb_node *pos, *next = rb_first(&rblist->entries); - -		while (next) { -			pos = next; -			next = rb_next(pos); -			rblist__remove_node(rblist, pos); -		} +		rblist__exit(rblist);  		free(rblist);  	}  } diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h index 4c8638a22571..76df15c27f5f 100644 --- a/tools/perf/util/rblist.h +++ b/tools/perf/util/rblist.h @@ -29,6 +29,7 @@ struct rblist {  };  void rblist__init(struct rblist *rblist); +void rblist__exit(struct rblist *rblist);  void rblist__delete(struct rblist *rblist);  int rblist__add_node(struct rblist *rblist, const void *new_entry);  void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node); diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c7187f067d31..ea070883c593 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -43,7 +43,6 @@  #include "../db-export.h"  #include "../thread-stack.h"  #include "../trace-event.h" -#include "../machine.h"  #include "../call-path.h"  #include "thread_map.h"  #include "cpumap.h" @@ -500,6 +499,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,  			PyLong_FromUnsignedLongLong(sample->time));  	pydict_set_item_string_decref(dict_sample, "period",  			PyLong_FromUnsignedLongLong(sample->period)); +	pydict_set_item_string_decref(dict_sample, "phys_addr", +			PyLong_FromUnsignedLongLong(sample->phys_addr));  	set_sample_read_in_dict(dict_sample, sample, evsel);  	pydict_set_item_string_decref(dict, "sample", dict_sample); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5c412310f266..c71ced7db152 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -27,7 +27,6 @@  static int perf_session__deliver_event(struct perf_session *session,  				       union perf_event *event, -				       struct perf_sample *sample,  				       struct perf_tool *tool,  				       u64 file_offset); @@ -107,17 +106,10 @@ static void perf_session__set_comm_exec(struct perf_session *session)  static int ordered_events__deliver_event(struct ordered_events *oe,  					 struct ordered_event *event)  { -	struct perf_sample sample;  	struct perf_session *session = container_of(oe, struct perf_session,  						    ordered_events); -	int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample); - -	if (ret) { -		pr_err("Can't parse sample, err = %d\n", ret); -		return ret; -	} -	return perf_session__deliver_event(session, event->event, &sample, +	return perf_session__deliver_event(session, event->event,  					   session->tool, event->file_offset);  } @@ -873,9 +865,9 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused,  }  int perf_session__queue_event(struct perf_session *s, union perf_event *event, -			      struct perf_sample *sample, u64 file_offset) +			      u64 timestamp, u64 file_offset)  { -	return ordered_events__queue(&s->ordered_events, event, sample, file_offset); +	return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset);  }  static void callchain__lbr_callstack_printf(struct perf_sample *sample) @@ -1328,20 +1320,26 @@ static int machines__deliver_event(struct machines *machines,  static int perf_session__deliver_event(struct perf_session *session,  				       union perf_event *event, -				       struct perf_sample *sample,  				       struct perf_tool *tool,  				       u64 file_offset)  { +	struct perf_sample sample;  	int ret; -	ret = auxtrace__process_event(session, event, sample, tool); +	ret = perf_evlist__parse_sample(session->evlist, event, &sample); +	if (ret) { +		pr_err("Can't parse sample, err = %d\n", ret); +		return ret; +	} + +	ret = auxtrace__process_event(session, event, &sample, tool);  	if (ret < 0)  		return ret;  	if (ret > 0)  		return 0;  	return machines__deliver_event(&session->machines, session->evlist, -				       event, sample, tool, file_offset); +				       event, &sample, tool, file_offset);  }  static s64 perf_session__process_user_event(struct perf_session *session, @@ -1350,10 +1348,11 @@ static s64 perf_session__process_user_event(struct perf_session *session,  {  	struct ordered_events *oe = &session->ordered_events;  	struct perf_tool *tool = session->tool; +	struct perf_sample sample = { .time = 0, };  	int fd = perf_data__fd(session->data);  	int err; -	dump_event(session->evlist, event, file_offset, NULL); +	dump_event(session->evlist, event, file_offset, &sample);  	/* These events are processed right away */  	switch (event->header.type) { @@ -1495,7 +1494,6 @@ static s64 perf_session__process_event(struct perf_session *session,  {  	struct perf_evlist *evlist = session->evlist;  	struct perf_tool *tool = session->tool; -	struct perf_sample sample;  	int ret;  	if (session->header.needs_swap) @@ -1509,21 +1507,19 @@ static s64 perf_session__process_event(struct perf_session *session,  	if (event->header.type >= PERF_RECORD_USER_TYPE_START)  		return perf_session__process_user_event(session, event, file_offset); -	/* -	 * For all kernel events we get the sample data -	 */ -	ret = perf_evlist__parse_sample(evlist, event, &sample); -	if (ret) -		return ret; -  	if (tool->ordered_events) { -		ret = perf_session__queue_event(session, event, &sample, file_offset); +		u64 timestamp = -1ULL; + +		ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); +		if (ret && ret != -1) +			return ret; + +		ret = perf_session__queue_event(session, event, timestamp, file_offset);  		if (ret != -ETIME)  			return ret;  	} -	return perf_session__deliver_event(session, event, &sample, tool, -					   file_offset); +	return perf_session__deliver_event(session, event, tool, file_offset);  }  void perf_event_header__bswap(struct perf_event_header *hdr) @@ -1777,7 +1773,8 @@ done:  	err = perf_session__flush_thread_stacks(session);  out_err:  	free(buf); -	perf_session__warn_about_errors(session); +	if (!tool->no_warn) +		perf_session__warn_about_errors(session);  	ordered_events__free(&session->ordered_events);  	auxtrace__free_events(session);  	return err; @@ -1933,7 +1930,8 @@ out:  	err = perf_session__flush_thread_stacks(session);  out_err:  	ui_progress__finish(); -	perf_session__warn_about_errors(session); +	if (!tool->no_warn) +		perf_session__warn_about_errors(session);  	/*  	 * We may switching perf.data output, make ordered_events  	 * reusable. diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index da1434a7c120..da40b4b380ca 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -53,7 +53,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,  int perf_session__process_events(struct perf_session *session);  int perf_session__queue_event(struct perf_session *s, union perf_event *event, -			      struct perf_sample *sample, u64 file_offset); +			      u64 timestamp, u64 file_offset);  void perf_tool__fill_defaults(struct perf_tool *tool); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a00eacdf02ed..2da4d0456a03 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -336,7 +336,7 @@ char *hist_entry__get_srcline(struct hist_entry *he)  		return SRCLINE_UNKNOWN;  	return get_srcline(map->dso, map__rip_2objdump(map, he->ip), -			   he->ms.sym, true, true); +			   he->ms.sym, true, true, he->ip);  }  static int64_t @@ -380,7 +380,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right)  					   map__rip_2objdump(map,  							     left->branch_info->from.al_addr),  							 left->branch_info->from.sym, -							 true, true); +							 true, true, +							 left->branch_info->from.al_addr);  	}  	if (!right->branch_info->srcline_from) {  		struct map *map = right->branch_info->from.map; @@ -391,7 +392,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right)  					     map__rip_2objdump(map,  							       right->branch_info->from.al_addr),  						     right->branch_info->from.sym, -						     true, true); +						     true, true, +						     right->branch_info->from.al_addr);  	}  	return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from);  } @@ -423,7 +425,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right)  					   map__rip_2objdump(map,  							     left->branch_info->to.al_addr),  							 left->branch_info->from.sym, -							 true, true); +							 true, true, +							 left->branch_info->to.al_addr);  	}  	if (!right->branch_info->srcline_to) {  		struct map *map = right->branch_info->to.map; @@ -434,7 +437,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right)  					     map__rip_2objdump(map,  							       right->branch_info->to.al_addr),  						     right->branch_info->to.sym, -						     true, true); +						     true, true, +						     right->branch_info->to.al_addr);  	}  	return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to);  } @@ -465,7 +469,7 @@ static char *hist_entry__get_srcfile(struct hist_entry *e)  		return no_srcfile;  	sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), -			 e->ms.sym, false, true, true); +			 e->ms.sym, false, true, true, e->ip);  	if (!strcmp(sf, SRCLINE_UNKNOWN))  		return no_srcfile;  	p = strchr(sf, ':'); @@ -2883,10 +2887,10 @@ static int setup_output_list(struct perf_hpp_list *list, char *str)  			tok; tok = strtok_r(NULL, ", ", &tmp)) {  		ret = output_field_add(list, tok);  		if (ret == -EINVAL) { -			pr_err("Invalid --fields key: `%s'", tok); +			ui__error("Invalid --fields key: `%s'", tok);  			break;  		} else if (ret == -ESRCH) { -			pr_err("Unknown --fields key: `%s'", tok); +			ui__error("Unknown --fields key: `%s'", tok);  			break;  		}  	} diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index d19f05c56de6..3c21fd059b64 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -496,7 +496,8 @@ out:  #define A2L_FAIL_LIMIT 123  char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, -		  bool show_sym, bool show_addr, bool unwind_inlines) +		  bool show_sym, bool show_addr, bool unwind_inlines, +		  u64 ip)  {  	char *file = NULL;  	unsigned line = 0; @@ -536,7 +537,7 @@ out:  	if (sym) {  		if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "", -					addr - sym->start) < 0) +					ip - sym->start) < 0)  			return SRCLINE_UNKNOWN;  	} else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0)  		return SRCLINE_UNKNOWN; @@ -550,9 +551,9 @@ void free_srcline(char *srcline)  }  char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, -		  bool show_sym, bool show_addr) +		  bool show_sym, bool show_addr, u64 ip)  { -	return __get_srcline(dso, addr, sym, show_sym, show_addr, false); +	return __get_srcline(dso, addr, sym, show_sym, show_addr, false, ip);  }  struct srcline_node { diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 847b7086182c..b2bb5502fd62 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -11,9 +11,10 @@ struct symbol;  extern bool srcline_full_filename;  char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, -		  bool show_sym, bool show_addr); +		  bool show_sym, bool show_addr, u64 ip);  char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, -		  bool show_sym, bool show_addr, bool unwind_inlines); +		  bool show_sym, bool show_addr, bool unwind_inlines, +		  u64 ip);  void free_srcline(char *srcline);  /* insert the srcline into the DSO, which will take ownership */ diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 855e35cbb1dc..594d14a02b67 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -9,17 +9,6 @@  #include "expr.h"  #include "metricgroup.h" -enum { -	CTX_BIT_USER	= 1 << 0, -	CTX_BIT_KERNEL	= 1 << 1, -	CTX_BIT_HV	= 1 << 2, -	CTX_BIT_HOST	= 1 << 3, -	CTX_BIT_IDLE	= 1 << 4, -	CTX_BIT_MAX	= 1 << 5, -}; - -#define NUM_CTX CTX_BIT_MAX -  /*   * AGGR_GLOBAL: Use CPU 0   * AGGR_SOCKET: Use first CPU of socket @@ -27,36 +16,18 @@ enum {   * AGGR_NONE: Use matching CPU   * AGGR_THREAD: Not supported?   */ -static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; -static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; -static struct rblist runtime_saved_values;  static bool have_frontend_stalled; +struct runtime_stat rt_stat;  struct stats walltime_nsecs_stats;  struct saved_value {  	struct rb_node rb_node;  	struct perf_evsel *evsel; +	enum stat_type type; +	int ctx;  	int cpu; +	struct runtime_stat *stat;  	struct stats stats;  }; @@ -69,6 +40,30 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry)  	if (a->cpu != b->cpu)  		return a->cpu - b->cpu; + +	/* +	 * Previously the rbtree was used to link generic metrics. +	 * The keys were evsel/cpu. Now the rbtree is extended to support +	 * per-thread shadow stats. For shadow stats case, the keys +	 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics +	 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). +	 */ +	if (a->type != b->type) +		return a->type - b->type; + +	if (a->ctx != b->ctx) +		return a->ctx - b->ctx; + +	if (a->evsel == NULL && b->evsel == NULL) { +		if (a->stat == b->stat) +			return 0; + +		if ((char *)a->stat < (char *)b->stat) +			return -1; + +		return 1; +	} +  	if (a->evsel == b->evsel)  		return 0;  	if ((char *)a->evsel < (char *)b->evsel) @@ -87,34 +82,66 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,  	return &nd->rb_node;  } +static void saved_value_delete(struct rblist *rblist __maybe_unused, +			       struct rb_node *rb_node) +{ +	struct saved_value *v; + +	BUG_ON(!rb_node); +	v = container_of(rb_node, struct saved_value, rb_node); +	free(v); +} +  static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,  					      int cpu, -					      bool create) +					      bool create, +					      enum stat_type type, +					      int ctx, +					      struct runtime_stat *st)  { +	struct rblist *rblist;  	struct rb_node *nd;  	struct saved_value dm = {  		.cpu = cpu,  		.evsel = evsel, +		.type = type, +		.ctx = ctx, +		.stat = st,  	}; -	nd = rblist__find(&runtime_saved_values, &dm); + +	rblist = &st->value_list; + +	nd = rblist__find(rblist, &dm);  	if (nd)  		return container_of(nd, struct saved_value, rb_node);  	if (create) { -		rblist__add_node(&runtime_saved_values, &dm); -		nd = rblist__find(&runtime_saved_values, &dm); +		rblist__add_node(rblist, &dm); +		nd = rblist__find(rblist, &dm);  		if (nd)  			return container_of(nd, struct saved_value, rb_node);  	}  	return NULL;  } +void runtime_stat__init(struct runtime_stat *st) +{ +	struct rblist *rblist = &st->value_list; + +	rblist__init(rblist); +	rblist->node_cmp = saved_value_cmp; +	rblist->node_new = saved_value_new; +	rblist->node_delete = saved_value_delete; +} + +void runtime_stat__exit(struct runtime_stat *st) +{ +	rblist__exit(&st->value_list); +} +  void perf_stat__init_shadow_stats(void)  {  	have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); -	rblist__init(&runtime_saved_values); -	runtime_saved_values.node_cmp = saved_value_cmp; -	runtime_saved_values.node_new = saved_value_new; -	/* No delete for now */ +	runtime_stat__init(&rt_stat);  }  static int evsel_context(struct perf_evsel *evsel) @@ -135,36 +162,13 @@ static int evsel_context(struct perf_evsel *evsel)  	return ctx;  } -void perf_stat__reset_shadow_stats(void) +static void reset_stat(struct runtime_stat *st)  { +	struct rblist *rblist;  	struct rb_node *pos, *next; -	memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); -	memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); -	memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); -	memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); -	memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); -	memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); -	memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); -	memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); -	memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); -	memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); -	memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); -	memset(runtime_cycles_in_tx_stats, 0, -			sizeof(runtime_cycles_in_tx_stats)); -	memset(runtime_transaction_stats, 0, -		sizeof(runtime_transaction_stats)); -	memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); -	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); -	memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); -	memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); -	memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); -	memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); -	memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); -	memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); -	memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); - -	next = rb_first(&runtime_saved_values.entries); +	rblist = &st->value_list; +	next = rb_first(&rblist->entries);  	while (next) {  		pos = next;  		next = rb_next(pos); @@ -174,13 +178,35 @@ void perf_stat__reset_shadow_stats(void)  	}  } +void perf_stat__reset_shadow_stats(void) +{ +	reset_stat(&rt_stat); +	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); +} + +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) +{ +	reset_stat(st); +} + +static void update_runtime_stat(struct runtime_stat *st, +				enum stat_type type, +				int ctx, int cpu, u64 count) +{ +	struct saved_value *v = saved_value_lookup(NULL, cpu, true, +						   type, ctx, st); + +	if (v) +		update_stats(&v->stats, count); +} +  /*   * Update various tracking values we maintain to print   * more semantic information such as miss/hit ratios,   * instruction rates, etc:   */  void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, -				    int cpu) +				    int cpu, struct runtime_stat *st)  {  	int ctx = evsel_context(counter); @@ -188,50 +214,58 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,  	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) ||  	    perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) -		update_stats(&runtime_nsecs_stats[cpu], count); +		update_runtime_stat(st, STAT_NSECS, 0, cpu, count);  	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) -		update_stats(&runtime_cycles_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);  	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) -		update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);  	else if (perf_stat_evsel__is(counter, TRANSACTION_START)) -		update_stats(&runtime_transaction_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);  	else if (perf_stat_evsel__is(counter, ELISION_START)) -		update_stats(&runtime_elision_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);  	else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) -		update_stats(&runtime_topdown_total_slots[ctx][cpu], count); +		update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, +				    ctx, cpu, count);  	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) -		update_stats(&runtime_topdown_slots_issued[ctx][cpu], count); +		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, +				    ctx, cpu, count);  	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) -		update_stats(&runtime_topdown_slots_retired[ctx][cpu], count); +		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, +				    ctx, cpu, count);  	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) -		update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu], count); +		update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, +				    ctx, cpu, count);  	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) -		update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count); +		update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, +				    ctx, cpu, count);  	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) -		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, +				    ctx, cpu, count);  	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) -		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, +				    ctx, cpu, count);  	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) -		update_stats(&runtime_branches_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);  	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) -		update_stats(&runtime_cacherefs_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);  	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) -		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);  	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) -		update_stats(&runtime_ll_cache_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);  	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) -		update_stats(&runtime_ll_cache_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);  	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) -		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);  	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) -		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);  	else if (perf_stat_evsel__is(counter, SMI_NUM)) -		update_stats(&runtime_smi_num_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);  	else if (perf_stat_evsel__is(counter, APERF)) -		update_stats(&runtime_aperf_stats[ctx][cpu], count); +		update_runtime_stat(st, STAT_APERF, ctx, cpu, count);  	if (counter->collect_stat) { -		struct saved_value *v = saved_value_lookup(counter, cpu, true); +		struct saved_value *v = saved_value_lookup(counter, cpu, true, +							   STAT_NONE, 0, st);  		update_stats(&v->stats, count);  	}  } @@ -352,15 +386,40 @@ void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)  	}  } +static double runtime_stat_avg(struct runtime_stat *st, +			       enum stat_type type, int ctx, int cpu) +{ +	struct saved_value *v; + +	v = saved_value_lookup(NULL, cpu, false, type, ctx, st); +	if (!v) +		return 0.0; + +	return avg_stats(&v->stats); +} + +static double runtime_stat_n(struct runtime_stat *st, +			     enum stat_type type, int ctx, int cpu) +{ +	struct saved_value *v; + +	v = saved_value_lookup(NULL, cpu, false, type, ctx, st); +	if (!v) +		return 0.0; + +	return v->stats.n; +} +  static void print_stalled_cycles_frontend(int cpu,  					  struct perf_evsel *evsel, double avg, -					  struct perf_stat_output_ctx *out) +					  struct perf_stat_output_ctx *out, +					  struct runtime_stat *st)  {  	double total, ratio = 0.0;  	const char *color;  	int ctx = evsel_context(evsel); -	total = avg_stats(&runtime_cycles_stats[ctx][cpu]); +	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);  	if (total)  		ratio = avg / total * 100.0; @@ -376,13 +435,14 @@ static void print_stalled_cycles_frontend(int cpu,  static void print_stalled_cycles_backend(int cpu,  					 struct perf_evsel *evsel, double avg, -					 struct perf_stat_output_ctx *out) +					 struct perf_stat_output_ctx *out, +					 struct runtime_stat *st)  {  	double total, ratio = 0.0;  	const char *color;  	int ctx = evsel_context(evsel); -	total = avg_stats(&runtime_cycles_stats[ctx][cpu]); +	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);  	if (total)  		ratio = avg / total * 100.0; @@ -395,13 +455,14 @@ static void print_stalled_cycles_backend(int cpu,  static void print_branch_misses(int cpu,  				struct perf_evsel *evsel,  				double avg, -				struct perf_stat_output_ctx *out) +				struct perf_stat_output_ctx *out, +				struct runtime_stat *st)  {  	double total, ratio = 0.0;  	const char *color;  	int ctx = evsel_context(evsel); -	total = avg_stats(&runtime_branches_stats[ctx][cpu]); +	total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);  	if (total)  		ratio = avg / total * 100.0; @@ -414,13 +475,15 @@ static void print_branch_misses(int cpu,  static void print_l1_dcache_misses(int cpu,  				   struct perf_evsel *evsel,  				   double avg, -				   struct perf_stat_output_ctx *out) +				   struct perf_stat_output_ctx *out, +				   struct runtime_stat *st) +  {  	double total, ratio = 0.0;  	const char *color;  	int ctx = evsel_context(evsel); -	total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); +	total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);  	if (total)  		ratio = avg / total * 100.0; @@ -433,13 +496,15 @@ static void print_l1_dcache_misses(int cpu,  static void print_l1_icache_misses(int cpu,  				   struct perf_evsel *evsel,  				   double avg, -				   struct perf_stat_output_ctx *out) +				   struct perf_stat_output_ctx *out, +				   struct runtime_stat *st) +  {  	double total, ratio = 0.0;  	const char *color;  	int ctx = evsel_context(evsel); -	total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); +	total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);  	if (total)  		ratio = avg / total * 100.0; @@ -451,13 +516,14 @@ static void print_l1_icache_misses(int cpu,  static void print_dtlb_cache_misses(int cpu,  				    struct perf_evsel *evsel,  				    double avg, -				    struct perf_stat_output_ctx *out) +				    struct perf_stat_output_ctx *out, +				    struct runtime_stat *st)  {  	double total, ratio = 0.0;  	const char *color;  	int ctx = evsel_context(evsel); -	total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); +	total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);  	if (total)  		ratio = avg / total * 100.0; @@ -469,13 +535,14 @@ static void print_dtlb_cache_misses(int cpu,  static void print_itlb_cache_misses(int cpu,  				    struct perf_evsel *evsel,  				    double avg, -				    struct perf_stat_output_ctx *out) +				    struct perf_stat_output_ctx *out, +				    struct runtime_stat *st)  {  	double total, ratio = 0.0;  	const char *color;  	int ctx = evsel_context(evsel); -	total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); +	total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);  	if (total)  		ratio = avg / total * 100.0; @@ -487,13 +554,14 @@ static void print_itlb_cache_misses(int cpu,  static void print_ll_cache_misses(int cpu,  				  struct perf_evsel *evsel,  				  double avg, -				  struct perf_stat_output_ctx *out) +				  struct perf_stat_output_ctx *out, +				  struct runtime_stat *st)  {  	double total, ratio = 0.0;  	const char *color;  	int ctx = evsel_context(evsel); -	total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); +	total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);  	if (total)  		ratio = avg / total * 100.0; @@ -551,68 +619,72 @@ static double sanitize_val(double x)  	return x;  } -static double td_total_slots(int ctx, int cpu) +static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)  { -	return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); +	return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);  } -static double td_bad_spec(int ctx, int cpu) +static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)  {  	double bad_spec = 0;  	double total_slots;  	double total; -	total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - -		avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + -		avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); -	total_slots = td_total_slots(ctx, cpu); +	total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - +		runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + +		runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); + +	total_slots = td_total_slots(ctx, cpu, st);  	if (total_slots)  		bad_spec = total / total_slots;  	return sanitize_val(bad_spec);  } -static double td_retiring(int ctx, int cpu) +static double td_retiring(int ctx, int cpu, struct runtime_stat *st)  {  	double retiring = 0; -	double total_slots = td_total_slots(ctx, cpu); -	double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); +	double total_slots = td_total_slots(ctx, cpu, st); +	double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, +					    ctx, cpu);  	if (total_slots)  		retiring = ret_slots / total_slots;  	return retiring;  } -static double td_fe_bound(int ctx, int cpu) +static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)  {  	double fe_bound = 0; -	double total_slots = td_total_slots(ctx, cpu); -	double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); +	double total_slots = td_total_slots(ctx, cpu, st); +	double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, +					    ctx, cpu);  	if (total_slots)  		fe_bound = fetch_bub / total_slots;  	return fe_bound;  } -static double td_be_bound(int ctx, int cpu) +static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)  { -	double sum = (td_fe_bound(ctx, cpu) + -		      td_bad_spec(ctx, cpu) + -		      td_retiring(ctx, cpu)); +	double sum = (td_fe_bound(ctx, cpu, st) + +		      td_bad_spec(ctx, cpu, st) + +		      td_retiring(ctx, cpu, st));  	if (sum == 0)  		return 0;  	return sanitize_val(1.0 - sum);  }  static void print_smi_cost(int cpu, struct perf_evsel *evsel, -			   struct perf_stat_output_ctx *out) +			   struct perf_stat_output_ctx *out, +			   struct runtime_stat *st)  {  	double smi_num, aperf, cycles, cost = 0.0;  	int ctx = evsel_context(evsel);  	const char *color = NULL; -	smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); -	aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); -	cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); +	smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); +	aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); +	cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);  	if ((cycles == 0) || (aperf == 0))  		return; @@ -632,7 +704,8 @@ static void generic_metric(const char *metric_expr,  			   const char *metric_name,  			   double avg,  			   int cpu, -			   struct perf_stat_output_ctx *out) +			   struct perf_stat_output_ctx *out, +			   struct runtime_stat *st)  {  	print_metric_t print_metric = out->print_metric;  	struct parse_ctx pctx; @@ -651,7 +724,8 @@ static void generic_metric(const char *metric_expr,  			stats = &walltime_nsecs_stats;  			scale = 1e-9;  		} else { -			v = saved_value_lookup(metric_events[i], cpu, false); +			v = saved_value_lookup(metric_events[i], cpu, false, +					       STAT_NONE, 0, st);  			if (!v)  				break;  			stats = &v->stats; @@ -679,7 +753,8 @@ static void generic_metric(const char *metric_expr,  void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  				   double avg, int cpu,  				   struct perf_stat_output_ctx *out, -				   struct rblist *metric_events) +				   struct rblist *metric_events, +				   struct runtime_stat *st)  {  	void *ctxp = out->ctx;  	print_metric_t print_metric = out->print_metric; @@ -690,7 +765,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  	int num = 1;  	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { -		total = avg_stats(&runtime_cycles_stats[ctx][cpu]); +		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); +  		if (total) {  			ratio = avg / total;  			print_metric(ctxp, NULL, "%7.2f ", @@ -698,8 +774,13 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  		} else {  			print_metric(ctxp, NULL, NULL, "insn per cycle", 0);  		} -		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); -		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); + +		total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, +					 ctx, cpu); + +		total = max(total, runtime_stat_avg(st, +						    STAT_STALLED_CYCLES_BACK, +						    ctx, cpu));  		if (total && avg) {  			out->new_line(ctxp); @@ -712,8 +793,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  				     "stalled cycles per insn", 0);  		}  	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { -		if (runtime_branches_stats[ctx][cpu].n != 0) -			print_branch_misses(cpu, evsel, avg, out); +		if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) +			print_branch_misses(cpu, evsel, avg, out, st);  		else  			print_metric(ctxp, NULL, NULL, "of all branches", 0);  	} else if ( @@ -721,8 +802,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |  					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |  					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { -		if (runtime_l1_dcache_stats[ctx][cpu].n != 0) -			print_l1_dcache_misses(cpu, evsel, avg, out); + +		if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) +			print_l1_dcache_misses(cpu, evsel, avg, out, st);  		else  			print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);  	} else if ( @@ -730,8 +812,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |  					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |  					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { -		if (runtime_l1_icache_stats[ctx][cpu].n != 0) -			print_l1_icache_misses(cpu, evsel, avg, out); + +		if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) +			print_l1_icache_misses(cpu, evsel, avg, out, st);  		else  			print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);  	} else if ( @@ -739,8 +822,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |  					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |  					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { -		if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) -			print_dtlb_cache_misses(cpu, evsel, avg, out); + +		if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) +			print_dtlb_cache_misses(cpu, evsel, avg, out, st);  		else  			print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);  	} else if ( @@ -748,8 +832,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |  					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |  					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { -		if (runtime_itlb_cache_stats[ctx][cpu].n != 0) -			print_itlb_cache_misses(cpu, evsel, avg, out); + +		if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) +			print_itlb_cache_misses(cpu, evsel, avg, out, st);  		else  			print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);  	} else if ( @@ -757,27 +842,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |  					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |  					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { -		if (runtime_ll_cache_stats[ctx][cpu].n != 0) -			print_ll_cache_misses(cpu, evsel, avg, out); + +		if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) +			print_ll_cache_misses(cpu, evsel, avg, out, st);  		else  			print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);  	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { -		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); +		total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);  		if (total)  			ratio = avg * 100 / total; -		if (runtime_cacherefs_stats[ctx][cpu].n != 0) +		if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)  			print_metric(ctxp, NULL, "%8.3f %%",  				     "of all cache refs", ratio);  		else  			print_metric(ctxp, NULL, NULL, "of all cache refs", 0);  	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { -		print_stalled_cycles_frontend(cpu, evsel, avg, out); +		print_stalled_cycles_frontend(cpu, evsel, avg, out, st);  	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { -		print_stalled_cycles_backend(cpu, evsel, avg, out); +		print_stalled_cycles_backend(cpu, evsel, avg, out, st);  	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { -		total = avg_stats(&runtime_nsecs_stats[cpu]); +		total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);  		if (total) {  			ratio = avg / total; @@ -786,7 +872,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  			print_metric(ctxp, NULL, NULL, "Ghz", 0);  		}  	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { -		total = avg_stats(&runtime_cycles_stats[ctx][cpu]); +		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); +  		if (total)  			print_metric(ctxp, NULL,  					"%7.2f%%", "transactional cycles", @@ -795,8 +882,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  			print_metric(ctxp, NULL, NULL, "transactional cycles",  				     0);  	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { -		total = avg_stats(&runtime_cycles_stats[ctx][cpu]); -		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); +		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); +		total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); +  		if (total2 < avg)  			total2 = avg;  		if (total) @@ -805,19 +893,21 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  		else  			print_metric(ctxp, NULL, NULL, "aborted cycles", 0);  	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { -		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); +		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, +					 ctx, cpu);  		if (avg)  			ratio = total / avg; -		if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) +		if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)  			print_metric(ctxp, NULL, "%8.0f",  				     "cycles / transaction", ratio);  		else  			print_metric(ctxp, NULL, NULL, "cycles / transaction", -				     0); +				      0);  	} else if (perf_stat_evsel__is(evsel, ELISION_START)) { -		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); +		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, +					 ctx, cpu);  		if (avg)  			ratio = total / avg; @@ -831,28 +921,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  		else  			print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);  	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { -		double fe_bound = td_fe_bound(ctx, cpu); +		double fe_bound = td_fe_bound(ctx, cpu, st);  		if (fe_bound > 0.2)  			color = PERF_COLOR_RED;  		print_metric(ctxp, color, "%8.1f%%", "frontend bound",  				fe_bound * 100.);  	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { -		double retiring = td_retiring(ctx, cpu); +		double retiring = td_retiring(ctx, cpu, st);  		if (retiring > 0.7)  			color = PERF_COLOR_GREEN;  		print_metric(ctxp, color, "%8.1f%%", "retiring",  				retiring * 100.);  	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { -		double bad_spec = td_bad_spec(ctx, cpu); +		double bad_spec = td_bad_spec(ctx, cpu, st);  		if (bad_spec > 0.1)  			color = PERF_COLOR_RED;  		print_metric(ctxp, color, "%8.1f%%", "bad speculation",  				bad_spec * 100.);  	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { -		double be_bound = td_be_bound(ctx, cpu); +		double be_bound = td_be_bound(ctx, cpu, st);  		const char *name = "backend bound";  		static int have_recovery_bubbles = -1; @@ -865,19 +955,19 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  		if (be_bound > 0.2)  			color = PERF_COLOR_RED; -		if (td_total_slots(ctx, cpu) > 0) +		if (td_total_slots(ctx, cpu, st) > 0)  			print_metric(ctxp, color, "%8.1f%%", name,  					be_bound * 100.);  		else  			print_metric(ctxp, NULL, NULL, name, 0);  	} else if (evsel->metric_expr) {  		generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, -				evsel->metric_name, avg, cpu, out); -	} else if (runtime_nsecs_stats[cpu].n != 0) { +				evsel->metric_name, avg, cpu, out, st); +	} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {  		char unit = 'M';  		char unit_buf[10]; -		total = avg_stats(&runtime_nsecs_stats[cpu]); +		total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);  		if (total)  			ratio = 1000.0 * avg / total; @@ -888,7 +978,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);  		print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);  	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) { -		print_smi_cost(cpu, evsel, out); +		print_smi_cost(cpu, evsel, out, st);  	} else {  		num = 0;  	} @@ -901,7 +991,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  				out->new_line(ctxp);  			generic_metric(mexp->metric_expr, mexp->metric_events,  					evsel->name, mexp->metric_name, -					avg, cpu, out); +					avg, cpu, out, st);  		}  	}  	if (num == 0) diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 151e9efd7286..32235657c1ac 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -278,9 +278,16 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel  			perf_evsel__compute_deltas(evsel, cpu, thread, count);  		perf_counts_values__scale(count, config->scale, NULL);  		if (config->aggr_mode == AGGR_NONE) -			perf_stat__update_shadow_stats(evsel, count->val, cpu); -		if (config->aggr_mode == AGGR_THREAD) -			perf_stat__update_shadow_stats(evsel, count->val, 0); +			perf_stat__update_shadow_stats(evsel, count->val, cpu, +						       &rt_stat); +		if (config->aggr_mode == AGGR_THREAD) { +			if (config->stats) +				perf_stat__update_shadow_stats(evsel, +					count->val, 0, &config->stats[thread]); +			else +				perf_stat__update_shadow_stats(evsel, +					count->val, 0, &rt_stat); +		}  		break;  	case AGGR_GLOBAL:  		aggr->val += count->val; @@ -362,7 +369,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,  	/*  	 * Save the full runtime - to allow normalization during printout:  	 */ -	perf_stat__update_shadow_stats(counter, *count, 0); +	perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat);  	return 0;  } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index eefca5c981fd..dbc6f7134f61 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -5,6 +5,7 @@  #include <linux/types.h>  #include <stdio.h>  #include "xyarray.h" +#include "rblist.h"  struct stats  { @@ -43,11 +44,54 @@ enum aggr_mode {  	AGGR_UNSET,  }; +enum { +	CTX_BIT_USER	= 1 << 0, +	CTX_BIT_KERNEL	= 1 << 1, +	CTX_BIT_HV	= 1 << 2, +	CTX_BIT_HOST	= 1 << 3, +	CTX_BIT_IDLE	= 1 << 4, +	CTX_BIT_MAX	= 1 << 5, +}; + +#define NUM_CTX CTX_BIT_MAX + +enum stat_type { +	STAT_NONE = 0, +	STAT_NSECS, +	STAT_CYCLES, +	STAT_STALLED_CYCLES_FRONT, +	STAT_STALLED_CYCLES_BACK, +	STAT_BRANCHES, +	STAT_CACHEREFS, +	STAT_L1_DCACHE, +	STAT_L1_ICACHE, +	STAT_LL_CACHE, +	STAT_ITLB_CACHE, +	STAT_DTLB_CACHE, +	STAT_CYCLES_IN_TX, +	STAT_TRANSACTION, +	STAT_ELISION, +	STAT_TOPDOWN_TOTAL_SLOTS, +	STAT_TOPDOWN_SLOTS_ISSUED, +	STAT_TOPDOWN_SLOTS_RETIRED, +	STAT_TOPDOWN_FETCH_BUBBLES, +	STAT_TOPDOWN_RECOVERY_BUBBLES, +	STAT_SMI_NUM, +	STAT_APERF, +	STAT_MAX +}; + +struct runtime_stat { +	struct rblist value_list; +}; +  struct perf_stat_config {  	enum aggr_mode	aggr_mode;  	bool		scale;  	FILE		*output;  	unsigned int	interval; +	struct runtime_stat *stats; +	int		stats_num;  };  void update_stats(struct stats *stats, u64 val); @@ -67,6 +111,15 @@ static inline void init_stats(struct stats *stats)  struct perf_evsel;  struct perf_evlist; +struct perf_aggr_thread_value { +	struct perf_evsel *counter; +	int id; +	double uval; +	u64 val; +	u64 run; +	u64 ena; +}; +  bool __perf_evsel_stat__is(struct perf_evsel *evsel,  			   enum perf_stat_evsel_id id); @@ -75,16 +128,20 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel,  void perf_stat_evsel_id_init(struct perf_evsel *evsel); +extern struct runtime_stat rt_stat;  extern struct stats walltime_nsecs_stats;  typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit,  			       const char *fmt, double val);  typedef void (*new_line_t )(void *ctx); +void runtime_stat__init(struct runtime_stat *st); +void runtime_stat__exit(struct runtime_stat *st);  void perf_stat__init_shadow_stats(void);  void perf_stat__reset_shadow_stats(void); +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st);  void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, -				    int cpu); +				    int cpu, struct runtime_stat *st);  struct perf_stat_output_ctx {  	void *ctx;  	print_metric_t print_metric; @@ -92,11 +149,11 @@ struct perf_stat_output_ctx {  	bool force_header;  }; -struct rblist;  void perf_stat__print_shadow_stats(struct perf_evsel *evsel,  				   double avg, int cpu,  				   struct perf_stat_output_ctx *out, -				   struct rblist *metric_events); +				   struct rblist *metric_events, +				   struct runtime_stat *st);  void perf_stat__collect_metric_expr(struct perf_evlist *);  int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index aaa08ee8c717..d8bfd0c4d2cb 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -396,3 +396,49 @@ out_err_overflow:  	free(expr);  	return NULL;  } + +/* Like strpbrk(), but not break if it is right after a backslash (escaped) */ +char *strpbrk_esc(char *str, const char *stopset) +{ +	char *ptr; + +	do { +		ptr = strpbrk(str, stopset); +		if (ptr == str || +		    (ptr == str + 1 && *(ptr - 1) != '\\')) +			break; +		str = ptr + 1; +	} while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\'); + +	return ptr; +} + +/* Like strdup, but do not copy a single backslash */ +char *strdup_esc(const char *str) +{ +	char *s, *d, *p, *ret = strdup(str); + +	if (!ret) +		return NULL; + +	d = strchr(ret, '\\'); +	if (!d) +		return ret; + +	s = d + 1; +	do { +		if (*s == '\0') { +			*d = '\0'; +			break; +		} +		p = strchr(s + 1, '\\'); +		if (p) { +			memmove(d, s, p - s); +			d += p - s; +			s = p + 1; +		} else +			memmove(d, s, strlen(s) + 1); +	} while (p); + +	return ret; +} diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index ee14ca5451ab..4c68a09b97e8 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -39,5 +39,7 @@ static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int  	return asprintf_expr_inout_ints(var, false, nints, ints);  } +char *strpbrk_esc(char *str, const char *stopset); +char *strdup_esc(const char *str);  #endif /* PERF_STRING_H */ diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1b67a8639dfe..cc065d4bfafc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -94,6 +94,11 @@ static int prefix_underscores_count(const char *str)  	return tail - str;  } +const char * __weak arch__normalize_symbol_name(const char *name) +{ +	return name; +} +  int __weak arch__compare_symbol_names(const char *namea, const char *nameb)  {  	return strcmp(namea, nameb); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index a4f0075b4e5c..0563f33c1eb3 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -349,6 +349,7 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);  void arch__sym_update(struct symbol *s, GElf_Sym *sym);  #endif +const char *arch__normalize_symbol_name(const char *name);  #define SYMBOL_A 0  #define SYMBOL_B 1 diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 6eea7cff3d4e..303bdb84ab5a 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -26,6 +26,10 @@  #include <asm/syscalls_64.c>  const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;  static const char **syscalltbl_native = syscalltbl_x86_64; +#elif defined(__s390x__) +#include <asm/syscalls_64.c> +const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_s390_64;  #endif  struct syscall { diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 446aa7a56f25..6ef01a83b24e 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -64,6 +64,11 @@ static inline bool target__none(struct target *target)  	return !target__has_task(target) && !target__has_cpu(target);  } +static inline bool target__has_per_thread(struct target *target) +{ +	return target->system_wide && target->per_thread; +} +  static inline bool target__uses_dummy_map(struct target *target)  {  	bool use_dummy = false; @@ -73,6 +78,8 @@ static inline bool target__uses_dummy_map(struct target *target)  	else if (target__has_task(target) ||  	         (!target__has_cpu(target) && !target->uses_mmap))  		use_dummy = true; +	else if (target__has_per_thread(target)) +		use_dummy = true;  	return use_dummy;  } diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index be0d5a736dea..3e1038f6491c 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -92,7 +92,7 @@ struct thread_map *thread_map__new_by_tid(pid_t tid)  	return threads;  } -struct thread_map *thread_map__new_by_uid(uid_t uid) +static struct thread_map *__thread_map__new_all_cpus(uid_t uid)  {  	DIR *proc;  	int max_threads = 32, items, i; @@ -113,7 +113,6 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)  	while ((dirent = readdir(proc)) != NULL) {  		char *end;  		bool grow = false; -		struct stat st;  		pid_t pid = strtol(dirent->d_name, &end, 10);  		if (*end) /* only interested in proper numerical dirents */ @@ -121,11 +120,12 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)  		snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); -		if (stat(path, &st) != 0) -			continue; +		if (uid != UINT_MAX) { +			struct stat st; -		if (st.st_uid != uid) -			continue; +			if (stat(path, &st) != 0 || st.st_uid != uid) +				continue; +		}  		snprintf(path, sizeof(path), "/proc/%d/task", pid);  		items = scandir(path, &namelist, filter, NULL); @@ -178,6 +178,16 @@ out_free_closedir:  	goto out_closedir;  } +struct thread_map *thread_map__new_all_cpus(void) +{ +	return __thread_map__new_all_cpus(UINT_MAX); +} + +struct thread_map *thread_map__new_by_uid(uid_t uid) +{ +	return __thread_map__new_all_cpus(uid); +} +  struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)  {  	if (pid != -1) @@ -313,7 +323,7 @@ out_free_threads:  }  struct thread_map *thread_map__new_str(const char *pid, const char *tid, -				       uid_t uid) +				       uid_t uid, bool per_thread)  {  	if (pid)  		return thread_map__new_by_pid_str(pid); @@ -321,6 +331,9 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid,  	if (!tid && uid != UINT_MAX)  		return thread_map__new_by_uid(uid); +	if (per_thread) +		return thread_map__new_all_cpus(); +  	return thread_map__new_by_tid_str(tid);  } diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index f15803985435..0a806b99e73c 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -23,6 +23,7 @@ struct thread_map *thread_map__new_dummy(void);  struct thread_map *thread_map__new_by_pid(pid_t pid);  struct thread_map *thread_map__new_by_tid(pid_t tid);  struct thread_map *thread_map__new_by_uid(uid_t uid); +struct thread_map *thread_map__new_all_cpus(void);  struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);  struct thread_map *thread_map__new_event(struct thread_map_event *event); @@ -30,7 +31,7 @@ struct thread_map *thread_map__get(struct thread_map *map);  void thread_map__put(struct thread_map *map);  struct thread_map *thread_map__new_str(const char *pid, -		const char *tid, uid_t uid); +		const char *tid, uid_t uid, bool per_thread);  struct thread_map *thread_map__new_by_tid_str(const char *tid_str); diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 81927d027417..6193b46050a5 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -6,6 +6,7 @@  #include <time.h>  #include <errno.h>  #include <inttypes.h> +#include <math.h>  #include "perf.h"  #include "debug.h" @@ -60,11 +61,10 @@ static int parse_timestr_sec_nsec(struct perf_time_interval *ptime,  	return 0;  } -int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) +static int split_start_end(char **start, char **end, const char *ostr, char ch)  {  	char *start_str, *end_str;  	char *d, *str; -	int rc = 0;  	if (ostr == NULL || *ostr == '\0')  		return 0; @@ -74,25 +74,35 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)  	if (str == NULL)  		return -ENOMEM; -	ptime->start = 0; -	ptime->end = 0; - -	/* str has the format: <start>,<stop> -	 * variations: <start>, -	 *             ,<stop> -	 *             , -	 */  	start_str = str; -	d = strchr(start_str, ','); +	d = strchr(start_str, ch);  	if (d) {  		*d = '\0';  		++d;  	}  	end_str = d; +	*start = start_str; +	*end = end_str; + +	return 0; +} + +int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) +{ +	char *start_str = NULL, *end_str; +	int rc; + +	rc = split_start_end(&start_str, &end_str, ostr, ','); +	if (rc || !start_str) +		return rc; + +	ptime->start = 0; +	ptime->end = 0; +  	rc = parse_timestr_sec_nsec(ptime, start_str, end_str); -	free(str); +	free(start_str);  	/* make sure end time is after start time if it was given */  	if (rc == 0 && ptime->end && ptime->end < ptime->start) @@ -104,6 +114,245 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)  	return rc;  } +static int parse_percent(double *pcnt, char *str) +{ +	char *c, *endptr; +	double d; + +	c = strchr(str, '%'); +	if (c) +		*c = '\0'; +	else +		return -1; + +	d = strtod(str, &endptr); +	if (endptr != str + strlen(str)) +		return -1; + +	*pcnt = d / 100.0; +	return 0; +} + +static int percent_slash_split(char *str, struct perf_time_interval *ptime, +			       u64 start, u64 end) +{ +	char *p, *end_str; +	double pcnt, start_pcnt, end_pcnt; +	u64 total = end - start; +	int i; + +	/* +	 * Example: +	 * 10%/2: select the second 10% slice and the third 10% slice +	 */ + +	/* We can modify this string since the original one is copied */ +	p = strchr(str, '/'); +	if (!p) +		return -1; + +	*p = '\0'; +	if (parse_percent(&pcnt, str) < 0) +		return -1; + +	p++; +	i = (int)strtol(p, &end_str, 10); +	if (*end_str) +		return -1; + +	if (pcnt <= 0.0) +		return -1; + +	start_pcnt = pcnt * (i - 1); +	end_pcnt = pcnt * i; + +	if (start_pcnt < 0.0 || start_pcnt > 1.0 || +	    end_pcnt < 0.0 || end_pcnt > 1.0) { +		return -1; +	} + +	ptime->start = start + round(start_pcnt * total); +	ptime->end = start + round(end_pcnt * total); + +	return 0; +} + +static int percent_dash_split(char *str, struct perf_time_interval *ptime, +			      u64 start, u64 end) +{ +	char *start_str = NULL, *end_str; +	double start_pcnt, end_pcnt; +	u64 total = end - start; +	int ret; + +	/* +	 * Example: 0%-10% +	 */ + +	ret = split_start_end(&start_str, &end_str, str, '-'); +	if (ret || !start_str) +		return ret; + +	if ((parse_percent(&start_pcnt, start_str) != 0) || +	    (parse_percent(&end_pcnt, end_str) != 0)) { +		free(start_str); +		return -1; +	} + +	free(start_str); + +	if (start_pcnt < 0.0 || start_pcnt > 1.0 || +	    end_pcnt < 0.0 || end_pcnt > 1.0 || +	    start_pcnt > end_pcnt) { +		return -1; +	} + +	ptime->start = start + round(start_pcnt * total); +	ptime->end = start + round(end_pcnt * total); + +	return 0; +} + +typedef int (*time_pecent_split)(char *, struct perf_time_interval *, +				 u64 start, u64 end); + +static int percent_comma_split(struct perf_time_interval *ptime_buf, int num, +			       const char *ostr, u64 start, u64 end, +			       time_pecent_split func) +{ +	char *str, *p1, *p2; +	int len, ret, i = 0; + +	str = strdup(ostr); +	if (str == NULL) +		return -ENOMEM; + +	len = strlen(str); +	p1 = str; + +	while (p1 < str + len) { +		if (i >= num) { +			free(str); +			return -1; +		} + +		p2 = strchr(p1, ','); +		if (p2) +			*p2 = '\0'; + +		ret = (func)(p1, &ptime_buf[i], start, end); +		if (ret < 0) { +			free(str); +			return -1; +		} + +		pr_debug("start time %d: %" PRIu64 ", ", i, ptime_buf[i].start); +		pr_debug("end time %d: %" PRIu64 "\n", i, ptime_buf[i].end); + +		i++; + +		if (p2) +			p1 = p2 + 1; +		else +			break; +	} + +	free(str); +	return i; +} + +static int one_percent_convert(struct perf_time_interval *ptime_buf, +			       const char *ostr, u64 start, u64 end, char *c) +{ +	char *str; +	int len = strlen(ostr), ret; + +	/* +	 * c points to '%'. +	 * '%' should be the last character +	 */ +	if (ostr + len - 1 != c) +		return -1; + +	/* +	 * Construct a string like "xx%/1" +	 */ +	str = malloc(len + 3); +	if (str == NULL) +		return -ENOMEM; + +	memcpy(str, ostr, len); +	strcpy(str + len, "/1"); + +	ret = percent_slash_split(str, ptime_buf, start, end); +	if (ret == 0) +		ret = 1; + +	free(str); +	return ret; +} + +int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, +				 const char *ostr, u64 start, u64 end) +{ +	char *c; + +	/* +	 * ostr example: +	 * 10%/2,10%/3: select the second 10% slice and the third 10% slice +	 * 0%-10%,30%-40%: multiple time range +	 * 50%: just one percent +	 */ + +	memset(ptime_buf, 0, sizeof(*ptime_buf) * num); + +	c = strchr(ostr, '/'); +	if (c) { +		return percent_comma_split(ptime_buf, num, ostr, start, +					   end, percent_slash_split); +	} + +	c = strchr(ostr, '-'); +	if (c) { +		return percent_comma_split(ptime_buf, num, ostr, start, +					   end, percent_dash_split); +	} + +	c = strchr(ostr, '%'); +	if (c) +		return one_percent_convert(ptime_buf, ostr, start, end, c); + +	return -1; +} + +struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size) +{ +	const char *p1, *p2; +	int i = 1; +	struct perf_time_interval *ptime; + +	/* +	 * At least allocate one time range. +	 */ +	if (!ostr) +		goto alloc; + +	p1 = ostr; +	while (p1 < ostr + strlen(ostr)) { +		p2 = strchr(p1, ','); +		if (!p2) +			break; + +		p1 = p2 + 1; +		i++; +	} + +alloc: +	*size = i; +	ptime = calloc(i, sizeof(*ptime)); +	return ptime; +} +  bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp)  {  	/* if time is not set don't drop sample */ @@ -119,6 +368,34 @@ bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp)  	return false;  } +bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, +				   int num, u64 timestamp) +{ +	struct perf_time_interval *ptime; +	int i; + +	if ((timestamp == 0) || (num == 0)) +		return false; + +	if (num == 1) +		return perf_time__skip_sample(&ptime_buf[0], timestamp); + +	/* +	 * start/end of multiple time ranges must be valid. +	 */ +	for (i = 0; i < num; i++) { +		ptime = &ptime_buf[i]; + +		if (timestamp >= ptime->start && +		    ((timestamp < ptime->end && i < num - 1) || +		     (timestamp <= ptime->end && i == num - 1))) { +			break; +		} +	} + +	return (i == num) ? true : false; +} +  int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz)  {  	u64  sec = timestamp / NSEC_PER_SEC; diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index 15b475c50ccf..70b177d2b98c 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -13,8 +13,16 @@ int parse_nsec_time(const char *str, u64 *ptime);  int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr); +int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, +				 const char *ostr, u64 start, u64 end); + +struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size); +  bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); +bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, +				   int num, u64 timestamp); +  int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz);  int fetch_current_timestamp(char *buf, size_t sz); diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 2532b558099b..183c91453522 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -76,6 +76,7 @@ struct perf_tool {  	bool		ordered_events;  	bool		ordering_requires_timestamps;  	bool		namespace_events; +	bool		no_warn;  	enum show_feature_header show_feat_hdr;  }; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 7a42f703e858..af873044d33a 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -631,9 +631,8 @@ static unw_accessors_t accessors = {  static int _unwind__prepare_access(struct thread *thread)  { -	if (callchain_param.record_mode != CALLCHAIN_DWARF) +	if (!dwarf_callchain_users)  		return 0; -  	thread->addr_space = unw_create_addr_space(&accessors, 0);  	if (!thread->addr_space) {  		pr_err("unwind: Can't create unwind address space.\n"); @@ -646,17 +645,15 @@ static int _unwind__prepare_access(struct thread *thread)  static void _unwind__flush_access(struct thread *thread)  { -	if (callchain_param.record_mode != CALLCHAIN_DWARF) +	if (!dwarf_callchain_users)  		return; -  	unw_flush_cache(thread->addr_space, 0, 0);  }  static void _unwind__finish_access(struct thread *thread)  { -	if (callchain_param.record_mode != CALLCHAIN_DWARF) +	if (!dwarf_callchain_users)  		return; -  	unw_destroy_addr_space(thread->addr_space);  } diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 647a1e6b4c7b..b029a5e9ae49 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -3,7 +3,7 @@  #include "thread.h"  #include "session.h"  #include "debug.h" -#include "arch/common.h" +#include "env.h"  struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops;  struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; @@ -39,7 +39,7 @@ int unwind__prepare_access(struct thread *thread, struct map *map,  	if (dso_type == DSO__TYPE_UNKNOWN)  		return 0; -	arch = normalize_arch(thread->mg->machine->env->arch); +	arch = perf_env__arch(thread->mg->machine->env);  	if (!strcmp(arch, "x86")) {  		if (dso_type != DSO__TYPE_64BIT) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index a789f952b3e9..443892dabedb 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -210,7 +210,7 @@ static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64  		size -= ret;  		off_in += ret; -		off_out -= ret; +		off_out += ret;  	}  	munmap(ptr, off_in + size); diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 01434509c2e9..9496365da3d7 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -68,4 +68,14 @@ extern bool perf_singlethreaded;  void perf_set_singlethreaded(void);  void perf_set_multithreaded(void); +#ifndef O_CLOEXEC +#ifdef __sparc__ +#define O_CLOEXEC      0x400000 +#elif defined(__alpha__) || defined(__hppa__) +#define O_CLOEXEC      010000000 +#else +#define O_CLOEXEC      02000000 +#endif +#endif +  #endif /* GIT_COMPAT_UTIL_H */ | 
