diff options
Diffstat (limited to 'tools/lib')
27 files changed, 716 insertions, 181 deletions
| diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index a9c3e33d0f8a..ab40dbf9f020 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -837,6 +837,50 @@ int bpf_link_create(int prog_fd, int target_fd,  		if (!OPTS_ZEROED(opts, netkit))  			return libbpf_err(-EINVAL);  		break; +	case BPF_CGROUP_INET_INGRESS: +	case BPF_CGROUP_INET_EGRESS: +	case BPF_CGROUP_INET_SOCK_CREATE: +	case BPF_CGROUP_INET_SOCK_RELEASE: +	case BPF_CGROUP_INET4_BIND: +	case BPF_CGROUP_INET6_BIND: +	case BPF_CGROUP_INET4_POST_BIND: +	case BPF_CGROUP_INET6_POST_BIND: +	case BPF_CGROUP_INET4_CONNECT: +	case BPF_CGROUP_INET6_CONNECT: +	case BPF_CGROUP_UNIX_CONNECT: +	case BPF_CGROUP_INET4_GETPEERNAME: +	case BPF_CGROUP_INET6_GETPEERNAME: +	case BPF_CGROUP_UNIX_GETPEERNAME: +	case BPF_CGROUP_INET4_GETSOCKNAME: +	case BPF_CGROUP_INET6_GETSOCKNAME: +	case BPF_CGROUP_UNIX_GETSOCKNAME: +	case BPF_CGROUP_UDP4_SENDMSG: +	case BPF_CGROUP_UDP6_SENDMSG: +	case BPF_CGROUP_UNIX_SENDMSG: +	case BPF_CGROUP_UDP4_RECVMSG: +	case BPF_CGROUP_UDP6_RECVMSG: +	case BPF_CGROUP_UNIX_RECVMSG: +	case BPF_CGROUP_SOCK_OPS: +	case BPF_CGROUP_DEVICE: +	case BPF_CGROUP_SYSCTL: +	case BPF_CGROUP_GETSOCKOPT: +	case BPF_CGROUP_SETSOCKOPT: +	case BPF_LSM_CGROUP: +		relative_fd = OPTS_GET(opts, cgroup.relative_fd, 0); +		relative_id = OPTS_GET(opts, cgroup.relative_id, 0); +		if (relative_fd && relative_id) +			return libbpf_err(-EINVAL); +		if (relative_id) { +			attr.link_create.cgroup.relative_id = relative_id; +			attr.link_create.flags |= BPF_F_ID; +		} else { +			attr.link_create.cgroup.relative_fd = relative_fd; +		} +		attr.link_create.cgroup.expected_revision = +			OPTS_GET(opts, cgroup.expected_revision, 0); +		if (!OPTS_ZEROED(opts, cgroup)) +			return libbpf_err(-EINVAL); +		break;  	default:  		if (!OPTS_ZEROED(opts, flags))  			return libbpf_err(-EINVAL); @@ -1331,3 +1375,23 @@ int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts)  	fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz);  	return libbpf_err_errno(fd);  } + +int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len, +			 struct bpf_prog_stream_read_opts *opts) +{ +	const size_t attr_sz = offsetofend(union bpf_attr, prog_stream_read); +	union bpf_attr attr; +	int err; + +	if (!OPTS_VALID(opts, bpf_prog_stream_read_opts)) +		return libbpf_err(-EINVAL); + +	memset(&attr, 0, attr_sz); +	attr.prog_stream_read.stream_buf = ptr_to_u64(buf); +	attr.prog_stream_read.stream_buf_len = buf_len; +	attr.prog_stream_read.stream_id = stream_id; +	attr.prog_stream_read.prog_fd = prog_fd; + +	err = sys_bpf(BPF_PROG_STREAM_READ_BY_FD, &attr, attr_sz); +	return libbpf_err_errno(err); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 777627d33d25..7252150e7ad3 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -438,6 +438,11 @@ struct bpf_link_create_opts {  			__u32 relative_id;  			__u64 expected_revision;  		} netkit; +		struct { +			__u32 relative_fd; +			__u32 relative_id; +			__u64 expected_revision; +		} cgroup;  	};  	size_t :0;  }; @@ -704,6 +709,27 @@ struct bpf_token_create_opts {  LIBBPF_API int bpf_token_create(int bpffs_fd,  				struct bpf_token_create_opts *opts); +struct bpf_prog_stream_read_opts { +	size_t sz; +	size_t :0; +}; +#define bpf_prog_stream_read_opts__last_field sz +/** + * @brief **bpf_prog_stream_read** reads data from the BPF stream of a given BPF + * program. + * + * @param prog_fd FD for the BPF program whose BPF stream is to be read. + * @param stream_id ID of the BPF stream to be read. + * @param buf Buffer to read data into from the BPF stream. + * @param buf_len Maximum number of bytes to read from the BPF stream. + * @param opts optional options, can be NULL + * + * @return The number of bytes read, on success; negative error code, otherwise + * (errno is also set to the error code) + */ +LIBBPF_API int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len, +				    struct bpf_prog_stream_read_opts *opts); +  #ifdef __cplusplus  } /* extern "C" */  #endif diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index c0e13cdf9660..b997c68bd945 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -388,7 +388,13 @@ extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak;  #define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j  #define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) +#if defined(__clang__) && (__clang_major__ >= 19) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#elif defined(__GNUC__) && (__GNUC__ >= 14) +#define ___type(...) __typeof_unqual__(___arrow(__VA_ARGS__)) +#else  #define ___type(...) typeof(___arrow(__VA_ARGS__)) +#endif  #define ___read(read_fn, dst, src_type, src, accessor)			    \  	read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 686824b8b413..80c028540656 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -15,6 +15,14 @@  #define __array(name, val) typeof(val) *name[]  #define __ulong(name, val) enum { ___bpf_concat(__unique_value, __COUNTER__) = val } name +#ifndef likely +#define likely(x)      (__builtin_expect(!!(x), 1)) +#endif + +#ifndef unlikely +#define unlikely(x)    (__builtin_expect(!!(x), 0)) +#endif +  /*   * Helper macro to place programs, maps, license in   * different sections in elf_bpf file. Section names @@ -207,6 +215,7 @@ enum libbpf_tristate {  #define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull")))  #define __arg_nullable __attribute((btf_decl_tag("arg:nullable")))  #define __arg_trusted __attribute((btf_decl_tag("arg:trusted"))) +#define __arg_untrusted __attribute((btf_decl_tag("arg:untrusted")))  #define __arg_arena __attribute((btf_decl_tag("arg:arena")))  #ifndef ___bpf_concat @@ -306,6 +315,22 @@ enum libbpf_tristate {  			  ___param, sizeof(___param));		\  }) +extern int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args, +			      __u32 len__sz, void *aux__prog) __weak __ksym; + +#define bpf_stream_printk(stream_id, fmt, args...)				\ +({										\ +	static const char ___fmt[] = fmt;					\ +	unsigned long long ___param[___bpf_narg(args)];				\ +										\ +	_Pragma("GCC diagnostic push")						\ +	_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")			\ +	___bpf_fill(___param, args);						\ +	_Pragma("GCC diagnostic pop")						\ +										\ +	bpf_stream_vprintk(stream_id, ___fmt, ___param, sizeof(___param), NULL);\ +}) +  /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args   * Otherwise use __bpf_vprintk   */ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 38bc6b14b066..37682908cb0f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -12,6 +12,7 @@  #include <sys/utsname.h>  #include <sys/param.h>  #include <sys/stat.h> +#include <sys/mman.h>  #include <linux/kernel.h>  #include <linux/err.h>  #include <linux/btf.h> @@ -120,6 +121,9 @@ struct btf {  	/* whether base_btf should be freed in btf_free for this instance */  	bool owns_base; +	/* whether raw_data is a (read-only) mmap */ +	bool raw_data_is_mmap; +  	/* BTF object FD, if loaded into kernel */  	int fd; @@ -951,6 +955,17 @@ static bool btf_is_modifiable(const struct btf *btf)  	return (void *)btf->hdr != btf->raw_data;  } +static void btf_free_raw_data(struct btf *btf) +{ +	if (btf->raw_data_is_mmap) { +		munmap(btf->raw_data, btf->raw_size); +		btf->raw_data_is_mmap = false; +	} else { +		free(btf->raw_data); +	} +	btf->raw_data = NULL; +} +  void btf__free(struct btf *btf)  {  	if (IS_ERR_OR_NULL(btf)) @@ -970,7 +985,7 @@ void btf__free(struct btf *btf)  		free(btf->types_data);  		strset__free(btf->strs_set);  	} -	free(btf->raw_data); +	btf_free_raw_data(btf);  	free(btf->raw_data_swapped);  	free(btf->type_offs);  	if (btf->owns_base) @@ -996,7 +1011,7 @@ static struct btf *btf_new_empty(struct btf *base_btf)  	if (base_btf) {  		btf->base_btf = base_btf;  		btf->start_id = btf__type_cnt(base_btf); -		btf->start_str_off = base_btf->hdr->str_len; +		btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;  		btf->swapped_endian = base_btf->swapped_endian;  	} @@ -1030,7 +1045,7 @@ struct btf *btf__new_empty_split(struct btf *base_btf)  	return libbpf_ptr(btf_new_empty(base_btf));  } -static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf) +static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, bool is_mmap)  {  	struct btf *btf;  	int err; @@ -1050,12 +1065,18 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)  		btf->start_str_off = base_btf->hdr->str_len;  	} -	btf->raw_data = malloc(size); -	if (!btf->raw_data) { -		err = -ENOMEM; -		goto done; +	if (is_mmap) { +		btf->raw_data = (void *)data; +		btf->raw_data_is_mmap = true; +	} else { +		btf->raw_data = malloc(size); +		if (!btf->raw_data) { +			err = -ENOMEM; +			goto done; +		} +		memcpy(btf->raw_data, data, size);  	} -	memcpy(btf->raw_data, data, size); +  	btf->raw_size = size;  	btf->hdr = btf->raw_data; @@ -1083,12 +1104,12 @@ done:  struct btf *btf__new(const void *data, __u32 size)  { -	return libbpf_ptr(btf_new(data, size, NULL)); +	return libbpf_ptr(btf_new(data, size, NULL, false));  }  struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf)  { -	return libbpf_ptr(btf_new(data, size, base_btf)); +	return libbpf_ptr(btf_new(data, size, base_btf, false));  }  struct btf_elf_secs { @@ -1148,6 +1169,12 @@ static int btf_find_elf_sections(Elf *elf, const char *path, struct btf_elf_secs  		else  			continue; +		if (sh.sh_type != SHT_PROGBITS) { +			pr_warn("unexpected section type (%d) of section(%d, %s) from %s\n", +				sh.sh_type, idx, name, path); +			goto err; +		} +  		data = elf_getdata(scn, 0);  		if (!data) {  			pr_warn("failed to get section(%d, %s) data from %s\n", @@ -1203,7 +1230,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,  	if (secs.btf_base_data) {  		dist_base_btf = btf_new(secs.btf_base_data->d_buf, secs.btf_base_data->d_size, -					NULL); +					NULL, false);  		if (IS_ERR(dist_base_btf)) {  			err = PTR_ERR(dist_base_btf);  			dist_base_btf = NULL; @@ -1212,7 +1239,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,  	}  	btf = btf_new(secs.btf_data->d_buf, secs.btf_data->d_size, -		      dist_base_btf ?: base_btf); +		      dist_base_btf ?: base_btf, false);  	if (IS_ERR(btf)) {  		err = PTR_ERR(btf);  		goto done; @@ -1329,7 +1356,7 @@ static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)  	}  	/* finally parse BTF data */ -	btf = btf_new(data, sz, base_btf); +	btf = btf_new(data, sz, base_btf, false);  err_out:  	free(data); @@ -1348,6 +1375,37 @@ struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)  	return libbpf_ptr(btf_parse_raw(path, base_btf));  } +static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf) +{ +	struct stat st; +	void *data; +	struct btf *btf; +	int fd, err; + +	fd = open(path, O_RDONLY); +	if (fd < 0) +		return ERR_PTR(-errno); + +	if (fstat(fd, &st) < 0) { +		err = -errno; +		close(fd); +		return ERR_PTR(err); +	} + +	data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); +	err = -errno; +	close(fd); + +	if (data == MAP_FAILED) +		return ERR_PTR(err); + +	btf = btf_new(data, st.st_size, base_btf, true); +	if (IS_ERR(btf)) +		munmap(data, st.st_size); + +	return btf; +} +  static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)  {  	struct btf *btf; @@ -1612,7 +1670,7 @@ struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf)  		goto exit_free;  	} -	btf = btf_new(ptr, btf_info.btf_size, base_btf); +	btf = btf_new(ptr, btf_info.btf_size, base_btf, false);  exit_free:  	free(ptr); @@ -1652,10 +1710,8 @@ struct btf *btf__load_from_kernel_by_id(__u32 id)  static void btf_invalidate_raw_data(struct btf *btf)  { -	if (btf->raw_data) { -		free(btf->raw_data); -		btf->raw_data = NULL; -	} +	if (btf->raw_data) +		btf_free_raw_data(btf);  	if (btf->raw_data_swapped) {  		free(btf->raw_data_swapped);  		btf->raw_data_swapped = NULL; @@ -4350,46 +4406,109 @@ static inline __u16 btf_fwd_kind(struct btf_type *t)  	return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;  } -/* Check if given two types are identical ARRAY definitions */ -static bool btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2) +static bool btf_dedup_identical_types(struct btf_dedup *d, __u32 id1, __u32 id2, int depth)  {  	struct btf_type *t1, *t2; +	int k1, k2; +recur: +	if (depth <= 0) +		return false;  	t1 = btf_type_by_id(d->btf, id1);  	t2 = btf_type_by_id(d->btf, id2); -	if (!btf_is_array(t1) || !btf_is_array(t2)) + +	k1 = btf_kind(t1); +	k2 = btf_kind(t2); +	if (k1 != k2)  		return false; -	return btf_equal_array(t1, t2); -} +	switch (k1) { +	case BTF_KIND_UNKN: /* VOID */ +		return true; +	case BTF_KIND_INT: +		return btf_equal_int_tag(t1, t2); +	case BTF_KIND_ENUM: +	case BTF_KIND_ENUM64: +		return btf_compat_enum(t1, t2); +	case BTF_KIND_FWD: +	case BTF_KIND_FLOAT: +		return btf_equal_common(t1, t2); +	case BTF_KIND_CONST: +	case BTF_KIND_VOLATILE: +	case BTF_KIND_RESTRICT: +	case BTF_KIND_PTR: +	case BTF_KIND_TYPEDEF: +	case BTF_KIND_FUNC: +	case BTF_KIND_TYPE_TAG: +		if (t1->info != t2->info || t1->name_off != t2->name_off) +			return false; +		id1 = t1->type; +		id2 = t2->type; +		goto recur; +	case BTF_KIND_ARRAY: { +		struct btf_array *a1, *a2; -/* Check if given two types are identical STRUCT/UNION definitions */ -static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2) -{ -	const struct btf_member *m1, *m2; -	struct btf_type *t1, *t2; -	int n, i; +		if (!btf_compat_array(t1, t2)) +			return false; -	t1 = btf_type_by_id(d->btf, id1); -	t2 = btf_type_by_id(d->btf, id2); +		a1 = btf_array(t1); +		a2 = btf_array(t1); -	if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2)) -		return false; +		if (a1->index_type != a2->index_type && +		    !btf_dedup_identical_types(d, a1->index_type, a2->index_type, depth - 1)) +			return false; -	if (!btf_shallow_equal_struct(t1, t2)) -		return false; +		if (a1->type != a2->type && +		    !btf_dedup_identical_types(d, a1->type, a2->type, depth - 1)) +			return false; -	m1 = btf_members(t1); -	m2 = btf_members(t2); -	for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { -		if (m1->type != m2->type && -		    !btf_dedup_identical_arrays(d, m1->type, m2->type) && -		    !btf_dedup_identical_structs(d, m1->type, m2->type)) +		return true; +	} +	case BTF_KIND_STRUCT: +	case BTF_KIND_UNION: { +		const struct btf_member *m1, *m2; +		int i, n; + +		if (!btf_shallow_equal_struct(t1, t2))  			return false; + +		m1 = btf_members(t1); +		m2 = btf_members(t2); +		for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { +			if (m1->type == m2->type) +				continue; +			if (!btf_dedup_identical_types(d, m1->type, m2->type, depth - 1)) +				return false; +		} +		return true; +	} +	case BTF_KIND_FUNC_PROTO: { +		const struct btf_param *p1, *p2; +		int i, n; + +		if (!btf_compat_fnproto(t1, t2)) +			return false; + +		if (t1->type != t2->type && +		    !btf_dedup_identical_types(d, t1->type, t2->type, depth - 1)) +			return false; + +		p1 = btf_params(t1); +		p2 = btf_params(t2); +		for (i = 0, n = btf_vlen(t1); i < n; i++, p1++, p2++) { +			if (p1->type == p2->type) +				continue; +			if (!btf_dedup_identical_types(d, p1->type, p2->type, depth - 1)) +				return false; +		} +		return true; +	} +	default: +		return false;  	} -	return true;  } +  /*   * Check equivalence of BTF type graph formed by candidate struct/union (we'll   * call it "candidate graph" in this description for brevity) to a type graph @@ -4508,19 +4627,13 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,  		 * different fields within the *same* struct. This breaks type  		 * equivalence check, which makes an assumption that candidate  		 * types sub-graph has a consistent and deduped-by-compiler -		 * types within a single CU. So work around that by explicitly -		 * allowing identical array types here. +		 * types within a single CU. And similar situation can happen +		 * with struct/union sometimes, and event with pointers. +		 * So accommodate cases like this doing a structural +		 * comparison recursively, but avoiding being stuck in endless +		 * loops by limiting the depth up to which we check.  		 */ -		if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id)) -			return 1; -		/* It turns out that similar situation can happen with -		 * struct/union sometimes, sigh... Handle the case where -		 * structs/unions are exactly the same, down to the referenced -		 * type IDs. Anything more complicated (e.g., if referenced -		 * types are different, but equivalent) is *way more* -		 * complicated and requires a many-to-many equivalence mapping. -		 */ -		if (btf_dedup_identical_structs(d, hypot_type_id, cand_id)) +		if (btf_dedup_identical_types(d, hypot_type_id, cand_id, 16))  			return 1;  		return 0;  	} @@ -5268,7 +5381,10 @@ struct btf *btf__load_vmlinux_btf(void)  		pr_warn("kernel BTF is missing at '%s', was CONFIG_DEBUG_INFO_BTF enabled?\n",  			sysfs_btf_path);  	} else { -		btf = btf__parse(sysfs_btf_path, NULL); +		btf = btf_parse_raw_mmap(sysfs_btf_path, NULL); +		if (IS_ERR(btf)) +			btf = btf__parse(sysfs_btf_path, NULL); +  		if (!btf) {  			err = -errno;  			pr_warn("failed to read kernel BTF from '%s': %s\n", diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 4392451d634b..ccfd905f03df 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -326,9 +326,10 @@ struct btf_dump_type_data_opts {  	bool compact;		/* no newlines/indentation */  	bool skip_names;	/* skip member/type names */  	bool emit_zeroes;	/* show 0-valued fields */ +	bool emit_strings;	/* print char arrays as strings */  	size_t :0;  }; -#define btf_dump_type_data_opts__last_field emit_zeroes +#define btf_dump_type_data_opts__last_field emit_strings  LIBBPF_API int  btf_dump__dump_type_data(struct btf_dump *d, __u32 id, diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 460c3e57fadb..f09f25eccf3c 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -68,6 +68,7 @@ struct btf_dump_data {  	bool compact;  	bool skip_names;  	bool emit_zeroes; +	bool emit_strings;  	__u8 indent_lvl;	/* base indent level */  	char indent_str[BTF_DATA_INDENT_STR_LEN];  	/* below are used during iteration */ @@ -226,6 +227,9 @@ static void btf_dump_free_names(struct hashmap *map)  	size_t bkt;  	struct hashmap_entry *cur; +	if (!map) +		return; +  	hashmap__for_each_entry(map, cur, bkt)  		free((void *)cur->pkey); @@ -2028,6 +2032,52 @@ static int btf_dump_var_data(struct btf_dump *d,  	return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0);  } +static int btf_dump_string_data(struct btf_dump *d, +				const struct btf_type *t, +				__u32 id, +				const void *data) +{ +	const struct btf_array *array = btf_array(t); +	const char *chars = data; +	__u32 i; + +	/* Make sure it is a NUL-terminated string. */ +	for (i = 0; i < array->nelems; i++) { +		if ((void *)(chars + i) >= d->typed_dump->data_end) +			return -E2BIG; +		if (chars[i] == '\0') +			break; +	} +	if (i == array->nelems) { +		/* The caller will print this as a regular array. */ +		return -EINVAL; +	} + +	btf_dump_data_pfx(d); +	btf_dump_printf(d, "\""); + +	for (i = 0; i < array->nelems; i++) { +		char c = chars[i]; + +		if (c == '\0') { +			/* +			 * When printing character arrays as strings, NUL bytes +			 * are always treated as string terminators; they are +			 * never printed. +			 */ +			break; +		} +		if (isprint(c)) +			btf_dump_printf(d, "%c", c); +		else +			btf_dump_printf(d, "\\x%02x", (__u8)c); +	} + +	btf_dump_printf(d, "\""); + +	return 0; +} +  static int btf_dump_array_data(struct btf_dump *d,  			       const struct btf_type *t,  			       __u32 id, @@ -2055,8 +2105,13 @@ static int btf_dump_array_data(struct btf_dump *d,  		 * char arrays, so if size is 1 and element is  		 * printable as a char, we'll do that.  		 */ -		if (elem_size == 1) +		if (elem_size == 1) { +			if (d->typed_dump->emit_strings && +			    btf_dump_string_data(d, t, id, data) == 0) { +				return 0; +			}  			d->typed_dump->is_array_char = true; +		}  	}  	/* note that we increment depth before calling btf_dump_print() below; @@ -2544,6 +2599,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,  	d->typed_dump->compact = OPTS_GET(opts, compact, false);  	d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false);  	d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false); +	d->typed_dump->emit_strings = OPTS_GET(opts, emit_strings, false);  	ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6b85060f07b3..8f5a81b672e1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -60,6 +60,8 @@  #define BPF_FS_MAGIC		0xcafe4a11  #endif +#define MAX_EVENT_NAME_LEN	64 +  #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"  #define BPF_INSN_SZ (sizeof(struct bpf_insn)) @@ -284,7 +286,7 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...)  	old_errno = errno;  	va_start(args, format); -	__libbpf_pr(level, format, args); +	print_fn(level, format, args);  	va_end(args);  	errno = old_errno; @@ -595,7 +597,7 @@ struct extern_desc {  	int sym_idx;  	int btf_id;  	int sec_btf_id; -	const char *name; +	char *name;  	char *essent_name;  	bool is_set;  	bool is_weak; @@ -733,7 +735,7 @@ struct bpf_object {  	struct usdt_manager *usdt_man; -	struct bpf_map *arena_map; +	int arena_map_idx;  	void *arena_data;  	size_t arena_data_sz; @@ -896,7 +898,7 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,  			return -LIBBPF_ERRNO__FORMAT;  		} -		if (sec_off + prog_sz > sec_sz) { +		if (sec_off + prog_sz > sec_sz || sec_off + prog_sz < sec_off) {  			pr_warn("sec '%s': program at offset %zu crosses section boundary\n",  				sec_name, sec_off);  			return -LIBBPF_ERRNO__FORMAT; @@ -1515,6 +1517,7 @@ static struct bpf_object *bpf_object__new(const char *path,  	obj->efile.obj_buf_sz = obj_buf_sz;  	obj->efile.btf_maps_shndx = -1;  	obj->kconfig_map_idx = -1; +	obj->arena_map_idx = -1;  	obj->kern_version = get_kernel_version();  	obj->state  = OBJ_OPEN; @@ -1725,15 +1728,6 @@ static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *nam  	return ERR_PTR(-ENOENT);  } -/* Some versions of Android don't provide memfd_create() in their libc - * implementation, so avoid complications and just go straight to Linux - * syscall. - */ -static int sys_memfd_create(const char *name, unsigned flags) -{ -	return syscall(__NR_memfd_create, name, flags); -} -  #ifndef MFD_CLOEXEC  #define MFD_CLOEXEC 0x0001U  #endif @@ -2971,7 +2965,7 @@ static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,  	const long page_sz = sysconf(_SC_PAGE_SIZE);  	size_t mmap_sz; -	mmap_sz = bpf_map_mmap_sz(obj->arena_map); +	mmap_sz = bpf_map_mmap_sz(map);  	if (roundup(data_sz, page_sz) > mmap_sz) {  		pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",  			sec_name, mmap_sz, data_sz); @@ -3045,12 +3039,12 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,  		if (map->def.type != BPF_MAP_TYPE_ARENA)  			continue; -		if (obj->arena_map) { +		if (obj->arena_map_idx >= 0) {  			pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", -				map->name, obj->arena_map->name); +				map->name, obj->maps[obj->arena_map_idx].name);  			return -EINVAL;  		} -		obj->arena_map = map; +		obj->arena_map_idx = i;  		if (obj->efile.arena_data) {  			err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, @@ -3060,7 +3054,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,  				return err;  		}  	} -	if (obj->efile.arena_data && !obj->arena_map) { +	if (obj->efile.arena_data && obj->arena_map_idx < 0) {  		pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n",  			ARENA_SEC);  		return -ENOENT; @@ -4266,7 +4260,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj)  			return ext->btf_id;  		}  		t = btf__type_by_id(obj->btf, ext->btf_id); -		ext->name = btf__name_by_offset(obj->btf, t->name_off); +		ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off)); +		if (!ext->name) +			return -ENOMEM;  		ext->sym_idx = i;  		ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; @@ -4586,10 +4582,20 @@ static int bpf_program__record_reloc(struct bpf_program *prog,  	/* arena data relocation */  	if (shdr_idx == obj->efile.arena_data_shndx) { +		if (obj->arena_map_idx < 0) { +			pr_warn("prog '%s': bad arena data relocation at insn %u, no arena maps defined\n", +				prog->name, insn_idx); +			return -LIBBPF_ERRNO__RELOC; +		}  		reloc_desc->type = RELO_DATA;  		reloc_desc->insn_idx = insn_idx; -		reloc_desc->map_idx = obj->arena_map - obj->maps; +		reloc_desc->map_idx = obj->arena_map_idx;  		reloc_desc->sym_off = sym->st_value; + +		map = &obj->maps[obj->arena_map_idx]; +		pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n", +			 prog->name, obj->arena_map_idx, map->name, map->sec_idx, +			 map->sec_offset, insn_idx);  		return 0;  	} @@ -9145,8 +9151,10 @@ void bpf_object__close(struct bpf_object *obj)  	zfree(&obj->btf_custom_path);  	zfree(&obj->kconfig); -	for (i = 0; i < obj->nr_extern; i++) +	for (i = 0; i < obj->nr_extern; i++) { +		zfree(&obj->externs[i].name);  		zfree(&obj->externs[i].essent_name); +	}  	zfree(&obj->externs);  	obj->nr_extern = 0; @@ -9213,7 +9221,7 @@ int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)  		return libbpf_err(-EFAULT);  	if (!OPTS_VALID(opts, gen_loader_opts))  		return libbpf_err(-EINVAL); -	gen = calloc(sizeof(*gen), 1); +	gen = calloc(1, sizeof(*gen));  	if (!gen)  		return libbpf_err(-ENOMEM);  	gen->opts = opts; @@ -9455,6 +9463,30 @@ int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log  	return 0;  } +struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog) +{ +	if (prog->func_info_rec_size != sizeof(struct bpf_func_info)) +		return libbpf_err_ptr(-EOPNOTSUPP); +	return prog->func_info; +} + +__u32 bpf_program__func_info_cnt(const struct bpf_program *prog) +{ +	return prog->func_info_cnt; +} + +struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog) +{ +	if (prog->line_info_rec_size != sizeof(struct bpf_line_info)) +		return libbpf_err_ptr(-EOPNOTSUPP); +	return prog->line_info; +} + +__u32 bpf_program__line_info_cnt(const struct bpf_program *prog) +{ +	return prog->line_info_cnt; +} +  #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) {			    \  	.sec = (char *)sec_pfx,						    \  	.prog_type = BPF_PROG_TYPE_##ptype,				    \ @@ -10064,7 +10096,7 @@ static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,  			      enum bpf_attach_type attach_type,  			      int *btf_obj_fd, int *btf_type_id)  { -	int ret, i, mod_len; +	int ret, i, mod_len = 0;  	const char *fn_name, *mod_name = NULL;  	fn_name = strchr(attach_name, ':'); @@ -10933,11 +10965,14 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p  		}  		link->link.fd = pfd;  	} -	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { -		err = -errno; -		pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", -			prog->name, pfd, errstr(err)); -		goto err_out; + +	if (!OPTS_GET(opts, dont_enable, false)) { +		if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { +			err = -errno; +			pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", +				prog->name, pfd, errstr(err)); +			goto err_out; +		}  	}  	return &link->link; @@ -11121,16 +11156,16 @@ static const char *tracefs_available_filter_functions_addrs(void)  			     : TRACEFS"/available_filter_functions_addrs";  } -static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz, -					 const char *kfunc_name, size_t offset) +static void gen_probe_legacy_event_name(char *buf, size_t buf_sz, +					const char *name, size_t offset)  {  	static int index = 0;  	int i; -	snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, -		 __sync_fetch_and_add(&index, 1)); +	snprintf(buf, buf_sz, "libbpf_%u_%d_%s_0x%zx", getpid(), +		 __sync_fetch_and_add(&index, 1), name, offset); -	/* sanitize binary_path in the probe name */ +	/* sanitize name in the probe name */  	for (i = 0; buf[i]; i++) {  		if (!isalnum(buf[i]))  			buf[i] = '_'; @@ -11255,9 +11290,9 @@ int probe_kern_syscall_wrapper(int token_fd)  		return pfd >= 0 ? 1 : 0;  	} else { /* legacy mode */ -		char probe_name[128]; +		char probe_name[MAX_EVENT_NAME_LEN]; -		gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0); +		gen_probe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);  		if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)  			return 0; @@ -11313,10 +11348,10 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,  					    func_name, offset,  					    -1 /* pid */, 0 /* ref_ctr_off */);  	} else { -		char probe_name[256]; +		char probe_name[MAX_EVENT_NAME_LEN]; -		gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), -					     func_name, offset); +		gen_probe_legacy_event_name(probe_name, sizeof(probe_name), +					    func_name, offset);  		legacy_probe = strdup(probe_name);  		if (!legacy_probe) @@ -11860,20 +11895,6 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru  	return ret;  } -static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, -					 const char *binary_path, uint64_t offset) -{ -	int i; - -	snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset); - -	/* sanitize binary_path in the probe name */ -	for (i = 0; buf[i]; i++) { -		if (!isalnum(buf[i])) -			buf[i] = '_'; -	} -} -  static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,  					  const char *binary_path, size_t offset)  { @@ -12297,13 +12318,14 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,  		pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,  					    func_offset, pid, ref_ctr_off);  	} else { -		char probe_name[PATH_MAX + 64]; +		char probe_name[MAX_EVENT_NAME_LEN];  		if (ref_ctr_off)  			return libbpf_err_ptr(-EINVAL); -		gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name), -					     binary_path, func_offset); +		gen_probe_legacy_event_name(probe_name, sizeof(probe_name), +					    strrchr(binary_path, '/') ? : binary_path, +					    func_offset);  		legacy_probe = strdup(probe_name);  		if (!legacy_probe) @@ -12834,6 +12856,34 @@ struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifi  }  struct bpf_link * +bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd, +				const struct bpf_cgroup_opts *opts) +{ +	LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); +	__u32 relative_id; +	int relative_fd; + +	if (!OPTS_VALID(opts, bpf_cgroup_opts)) +		return libbpf_err_ptr(-EINVAL); + +	relative_id = OPTS_GET(opts, relative_id, 0); +	relative_fd = OPTS_GET(opts, relative_fd, 0); + +	if (relative_fd && relative_id) { +		pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", +			prog->name); +		return libbpf_err_ptr(-EINVAL); +	} + +	link_create_opts.cgroup.expected_revision = OPTS_GET(opts, expected_revision, 0); +	link_create_opts.cgroup.relative_fd = relative_fd; +	link_create_opts.cgroup.relative_id = relative_id; +	link_create_opts.flags = OPTS_GET(opts, flags, 0); + +	return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", &link_create_opts); +} + +struct bpf_link *  bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,  			const struct bpf_tcx_opts *opts)  { @@ -13371,7 +13421,6 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,  	attr.config = PERF_COUNT_SW_BPF_OUTPUT;  	attr.type = PERF_TYPE_SOFTWARE;  	attr.sample_type = PERF_SAMPLE_RAW; -	attr.sample_period = sample_period;  	attr.wakeup_events = sample_period;  	p.attr = &attr; @@ -14099,6 +14148,12 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)  		}  		link = map_skel->link; +		if (!link) { +			pr_warn("map '%s': BPF map skeleton link is uninitialized\n", +				bpf_map__name(map)); +			continue; +		} +  		if (*link)  			continue; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index e0605403f977..455a957cb702 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -499,9 +499,11 @@ struct bpf_perf_event_opts {  	__u64 bpf_cookie;  	/* don't use BPF link when attach BPF program */  	bool force_ioctl_attach; +	/* don't automatically enable the event */ +	bool dont_enable;  	size_t :0;  }; -#define bpf_perf_event_opts__last_field force_ioctl_attach +#define bpf_perf_event_opts__last_field dont_enable  LIBBPF_API struct bpf_link *  bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd); @@ -877,6 +879,21 @@ LIBBPF_API struct bpf_link *  bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,  			   const struct bpf_netkit_opts *opts); +struct bpf_cgroup_opts { +	/* size of this struct, for forward/backward compatibility */ +	size_t sz; +	__u32 flags; +	__u32 relative_fd; +	__u32 relative_id; +	__u64 expected_revision; +	size_t :0; +}; +#define bpf_cgroup_opts__last_field expected_revision + +LIBBPF_API struct bpf_link * +bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd, +				const struct bpf_cgroup_opts *opts); +  struct bpf_map;  LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map); @@ -940,6 +957,12 @@ LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_le  LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size);  LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); +LIBBPF_API struct bpf_func_info *bpf_program__func_info(const struct bpf_program *prog); +LIBBPF_API __u32 bpf_program__func_info_cnt(const struct bpf_program *prog); + +LIBBPF_API struct bpf_line_info *bpf_program__line_info(const struct bpf_program *prog); +LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog); +  /**   * @brief **bpf_program__set_attach_target()** sets BTF-based attach target   * for supported BPF program types: @@ -1283,6 +1306,7 @@ enum bpf_tc_attach_point {  	BPF_TC_INGRESS = 1 << 0,  	BPF_TC_EGRESS  = 1 << 1,  	BPF_TC_CUSTOM  = 1 << 2, +	BPF_TC_QDISC   = 1 << 3,  };  #define BPF_TC_PARENT(a, b) 	\ @@ -1297,9 +1321,11 @@ struct bpf_tc_hook {  	int ifindex;  	enum bpf_tc_attach_point attach_point;  	__u32 parent; +	__u32 handle; +	const char *qdisc;  	size_t :0;  }; -#define bpf_tc_hook__last_field parent +#define bpf_tc_hook__last_field qdisc  struct bpf_tc_opts {  	size_t sz; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index d8b71f22f197..d7bd463e7017 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -437,6 +437,15 @@ LIBBPF_1.6.0 {  		bpf_linker__add_fd;  		bpf_linker__new_fd;  		bpf_object__prepare; +		bpf_prog_stream_read; +		bpf_program__attach_cgroup_opts; +		bpf_program__func_info; +		bpf_program__func_info_cnt; +		bpf_program__line_info; +		bpf_program__line_info_cnt;  		btf__add_decl_attr;  		btf__add_type_attr;  } LIBBPF_1.5.0; + +LIBBPF_1.7.0 { +} LIBBPF_1.6.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 76669c73dcd1..477a3b3389a0 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -667,6 +667,15 @@ static inline int sys_dup3(int oldfd, int newfd, int flags)  	return syscall(__NR_dup3, oldfd, newfd, flags);  } +/* Some versions of Android don't provide memfd_create() in their libc + * implementation, so avoid complications and just go straight to Linux + * syscall. + */ +static inline int sys_memfd_create(const char *name, unsigned flags) +{ +	return syscall(__NR_memfd_create, name, flags); +} +  /* Point *fixed_fd* to the same file that *tmp_fd* points to.   * Regardless of success, *tmp_fd* is closed.   * Whatever *fixed_fd* pointed to is closed silently. diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 28c58fb17250..99331e317dee 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@  #define __LIBBPF_VERSION_H  #define LIBBPF_MAJOR_VERSION 1 -#define LIBBPF_MINOR_VERSION 6 +#define LIBBPF_MINOR_VERSION 7  #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 800e0ef09c37..a469e5d4fee7 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -573,7 +573,7 @@ int bpf_linker__add_buf(struct bpf_linker *linker, void *buf, size_t buf_sz,  	snprintf(filename, sizeof(filename), "mem:%p+%zu", buf, buf_sz); -	fd = memfd_create(filename, 0); +	fd = sys_memfd_create(filename, 0);  	if (fd < 0) {  		ret = -errno;  		pr_warn("failed to create memfd '%s': %s\n", filename, errstr(ret)); @@ -1376,7 +1376,7 @@ static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj  		} else {  			if (!secs_match(dst_sec, src_sec)) {  				pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name); -				return -1; +				return -EINVAL;  			}  			/* "license" and "version" sections are deduped */ @@ -2223,7 +2223,7 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob  			}  		} else if (!secs_match(dst_sec, src_sec)) {  			pr_warn("sections %s are not compatible\n", src_sec->sec_name); -			return -1; +			return -EINVAL;  		}  		/* shdr->sh_link points to SYMTAB */ diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 68a2def17175..c997e69d507f 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -529,9 +529,9 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)  } -typedef int (*qdisc_config_t)(struct libbpf_nla_req *req); +typedef int (*qdisc_config_t)(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook); -static int clsact_config(struct libbpf_nla_req *req) +static int clsact_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook)  {  	req->tc.tcm_parent = TC_H_CLSACT;  	req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0); @@ -539,6 +539,16 @@ static int clsact_config(struct libbpf_nla_req *req)  	return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));  } +static int qdisc_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook) +{ +	const char *qdisc = OPTS_GET(hook, qdisc, NULL); + +	req->tc.tcm_parent = OPTS_GET(hook, parent, TC_H_ROOT); +	req->tc.tcm_handle = OPTS_GET(hook, handle, 0); + +	return nlattr_add(req, TCA_KIND, qdisc, strlen(qdisc) + 1); +} +  static int attach_point_to_config(struct bpf_tc_hook *hook,  				  qdisc_config_t *config)  { @@ -552,6 +562,9 @@ static int attach_point_to_config(struct bpf_tc_hook *hook,  		return 0;  	case BPF_TC_CUSTOM:  		return -EOPNOTSUPP; +	case BPF_TC_QDISC: +		*config = &qdisc_config; +		return 0;  	default:  		return -EINVAL;  	} @@ -596,7 +609,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)  	req.tc.tcm_family  = AF_UNSPEC;  	req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0); -	ret = config(&req); +	ret = config(&req, hook);  	if (ret < 0)  		return ret; @@ -639,6 +652,7 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)  	case BPF_TC_INGRESS:  	case BPF_TC_EGRESS:  		return libbpf_err(__bpf_tc_detach(hook, NULL, true)); +	case BPF_TC_QDISC:  	case BPF_TC_INGRESS | BPF_TC_EGRESS:  		return libbpf_err(tc_qdisc_delete(hook));  	case BPF_TC_CUSTOM: diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 975e265eab3b..06663f9ea581 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -63,16 +63,16 @@ static int validate_nla(struct nlattr *nla, int maxtype,  		minlen = nla_attr_minlen[pt->type];  	if (libbpf_nla_len(nla) < minlen) -		return -1; +		return -EINVAL;  	if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen) -		return -1; +		return -EINVAL;  	if (pt->type == LIBBPF_NLA_STRING) {  		char *data = libbpf_nla_data(nla);  		if (data[libbpf_nla_len(nla) - 1] != '\0') -			return -1; +			return -EINVAL;  	}  	return 0; @@ -118,19 +118,18 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,  		if (policy) {  			err = validate_nla(nla, maxtype, policy);  			if (err < 0) -				goto errout; +				return err;  		} -		if (tb[type]) +		if (tb[type]) {  			pr_warn("Attribute of type %#x found multiple times in message, "  				"previous attribute is being ignored.\n", type); +		}  		tb[type] = nla;  	} -	err = 0; -errout: -	return err; +	return 0;  }  /** diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 4e4a52742b01..3373b9d45ac4 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -59,7 +59,7 @@   *   * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y);   * - * USDT is identified by it's <provider-name>:<probe-name> pair of names. Each + * USDT is identified by its <provider-name>:<probe-name> pair of names. Each   * individual USDT has a fixed number of arguments (3 in the above example)   * and specifies values of each argument as if it was a function call.   * @@ -81,7 +81,7 @@   * NOP instruction that kernel can replace with an interrupt instruction to   * trigger instrumentation code (BPF program for all that we care about).   * - * Semaphore above is and optional feature. It records an address of a 2-byte + * Semaphore above is an optional feature. It records an address of a 2-byte   * refcount variable (normally in '.probes' ELF section) used for signaling if   * there is anything that is attached to USDT. This is useful for user   * applications if, for example, they need to prepare some arguments that are @@ -121,7 +121,7 @@   * a uprobe BPF program (which for kernel, at least currently, is just a kprobe   * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference   * that uprobe is usually attached at the function entry, while USDT will - * normally will be somewhere inside the function. But it should always be + * normally be somewhere inside the function. But it should always be   * pointing to NOP instruction, which makes such uprobes the fastest uprobe   * kind.   * @@ -151,7 +151,7 @@   * libbpf sets to spec ID during attach time, or, if kernel is too old to   * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such   * case. The latter means that some modes of operation can't be supported - * without BPF cookie. Such mode is attaching to shared library "generically", + * without BPF cookie. Such a mode is attaching to shared library "generically",   * without specifying target process. In such case, it's impossible to   * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode   * is not supported without BPF cookie support. @@ -185,7 +185,7 @@   * as even if USDT spec string is the same, USDT cookie value can be   * different. It was deemed excessive to try to deduplicate across independent   * USDT attachments by taking into account USDT spec string *and* USDT cookie - * value, which would complicated spec ID accounting significantly for little + * value, which would complicate spec ID accounting significantly for little   * gain.   */ diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index 59aabdd3cabf..4072bc9b7670 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -210,6 +210,7 @@ SYNOPSIS    struct perf_record_time_conv;    struct perf_record_header_feature;    struct perf_record_compressed; +  struct perf_record_compressed2;  --  DESCRIPTION diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index 4454a5987570..b20a5280f2b3 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -242,6 +242,16 @@ out:  	return cpus;  } +struct perf_cpu_map *perf_cpu_map__new_int(int cpu) +{ +	struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); + +	if (cpus) +		RC_CHK_ACCESS(cpus)->map[0].cpu = cpu; + +	return cpus; +} +  static int __perf_cpu_map__nr(const struct perf_cpu_map *cpus)  {  	return RC_CHK_ACCESS(cpus)->nr; diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index b1f4c8176b32..3ed023f4b190 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -36,49 +36,88 @@ void perf_evlist__init(struct perf_evlist *evlist)  static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,  					  struct perf_evsel *evsel)  { -	if (evsel->system_wide) { -		/* System wide: set the cpu map of the evsel to all online CPUs. */ -		perf_cpu_map__put(evsel->cpus); -		evsel->cpus = perf_cpu_map__new_online_cpus(); -	} else if (evlist->has_user_cpus && evsel->is_pmu_core) { -		/* -		 * User requested CPUs on a core PMU, ensure the requested CPUs -		 * are valid by intersecting with those of the PMU. -		 */ +	if (perf_cpu_map__is_empty(evsel->cpus)) { +		if (perf_cpu_map__is_empty(evsel->pmu_cpus)) { +			/* +			 * Assume the unset PMU cpus were for a system-wide +			 * event, like a software or tracepoint. +			 */ +			evsel->pmu_cpus = perf_cpu_map__new_online_cpus(); +		} +		if (evlist->has_user_cpus && !evsel->system_wide) { +			/* +			 * Use the user CPUs unless the evsel is set to be +			 * system wide, such as the dummy event. +			 */ +			evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); +		} else { +			/* +			 * System wide and other modes, assume the cpu map +			 * should be set to all PMU CPUs. +			 */ +			evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus); +		} +	} +	/* +	 * Avoid "any CPU"(-1) for uncore and PMUs that require a CPU, even if +	 * requested. +	 */ +	if (evsel->requires_cpu && perf_cpu_map__has_any_cpu(evsel->cpus)) {  		perf_cpu_map__put(evsel->cpus); -		evsel->cpus = perf_cpu_map__intersect(evlist->user_requested_cpus, evsel->own_cpus); +		evsel->cpus = perf_cpu_map__get(evsel->pmu_cpus); +	} -		/* -		 * Empty cpu lists would eventually get opened as "any" so remove -		 * genuinely empty ones before they're opened in the wrong place. -		 */ -		if (perf_cpu_map__is_empty(evsel->cpus)) { -			struct perf_evsel *next = perf_evlist__next(evlist, evsel); - -			perf_evlist__remove(evlist, evsel); -			/* Keep idx contiguous */ -			if (next) -				list_for_each_entry_from(next, &evlist->entries, node) -					next->idx--; +	/* +	 * Globally requested CPUs replace user requested unless the evsel is +	 * set to be system wide. +	 */ +	if (evlist->has_user_cpus && !evsel->system_wide) { +		assert(!perf_cpu_map__has_any_cpu(evlist->user_requested_cpus)); +		if (!perf_cpu_map__equal(evsel->cpus, evlist->user_requested_cpus)) { +			perf_cpu_map__put(evsel->cpus); +			evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);  		} -	} else if (!evsel->own_cpus || evlist->has_user_cpus || -		(!evsel->requires_cpu && perf_cpu_map__has_any_cpu(evlist->user_requested_cpus))) { -		/* -		 * The PMU didn't specify a default cpu map, this isn't a core -		 * event and the user requested CPUs or the evlist user -		 * requested CPUs have the "any CPU" (aka dummy) CPU value. In -		 * which case use the user requested CPUs rather than the PMU -		 * ones. -		 */ +	} + +	/* Ensure cpus only references valid PMU CPUs. */ +	if (!perf_cpu_map__has_any_cpu(evsel->cpus) && +	    !perf_cpu_map__is_subset(evsel->pmu_cpus, evsel->cpus)) { +		struct perf_cpu_map *tmp = perf_cpu_map__intersect(evsel->pmu_cpus, evsel->cpus); +  		perf_cpu_map__put(evsel->cpus); -		evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); -	} else if (evsel->cpus != evsel->own_cpus) { -		/* -		 * No user requested cpu map but the PMU cpu map doesn't match -		 * the evsel's. Reset it back to the PMU cpu map. -		 */ +		evsel->cpus = tmp; +	} + +	/* +	 * Was event requested on all the PMU's CPUs but the user requested is +	 * any CPU (-1)? If so switch to using any CPU (-1) to reduce the number +	 * of events. +	 */ +	if (!evsel->system_wide && +	    !evsel->requires_cpu && +	    perf_cpu_map__equal(evsel->cpus, evsel->pmu_cpus) && +	    perf_cpu_map__has_any_cpu(evlist->user_requested_cpus)) {  		perf_cpu_map__put(evsel->cpus); -		evsel->cpus = perf_cpu_map__get(evsel->own_cpus); +		evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus); +	} + +	/* Sanity check assert before the evsel is potentially removed. */ +	assert(!evsel->requires_cpu || !perf_cpu_map__has_any_cpu(evsel->cpus)); + +	/* +	 * Empty cpu lists would eventually get opened as "any" so remove +	 * genuinely empty ones before they're opened in the wrong place. +	 */ +	if (perf_cpu_map__is_empty(evsel->cpus)) { +		struct perf_evsel *next = perf_evlist__next(evlist, evsel); + +		perf_evlist__remove(evlist, evsel); +		/* Keep idx contiguous */ +		if (next) +			list_for_each_entry_from(next, &evlist->entries, node) +				next->idx--; + +		return;  	}  	if (evsel->system_wide) { @@ -98,6 +137,10 @@ static void perf_evlist__propagate_maps(struct perf_evlist *evlist)  	evlist->needs_map_propagation = true; +	/* Clear the all_cpus set which will be merged into during propagation. */ +	perf_cpu_map__put(evlist->all_cpus); +	evlist->all_cpus = NULL; +  	list_for_each_entry_safe(evsel, n, &evlist->entries, node)  		__perf_evlist__propagate_maps(evlist, evsel);  } diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index c475319e2e41..13a307fc75ae 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -40,8 +40,19 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr)  	return evsel;  } +void perf_evsel__exit(struct perf_evsel *evsel) +{ +	assert(evsel->fd == NULL);  /* If not fds were not closed. */ +	assert(evsel->mmap == NULL); /* If not munmap wasn't called. */ +	assert(evsel->sample_id == NULL); /* If not free_id wasn't called. */ +	perf_cpu_map__put(evsel->cpus); +	perf_cpu_map__put(evsel->pmu_cpus); +	perf_thread_map__put(evsel->threads); +} +  void perf_evsel__delete(struct perf_evsel *evsel)  { +	perf_evsel__exit(evsel);  	free(evsel);  } diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index ea78defa77d0..fefe64ba5e26 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -99,7 +99,7 @@ struct perf_evsel {  	 * cpu map for opening the event on, for example, the first CPU on a  	 * socket for an uncore event.  	 */ -	struct perf_cpu_map	*own_cpus; +	struct perf_cpu_map	*pmu_cpus;  	struct perf_thread_map	*threads;  	struct xyarray		*fd;  	struct xyarray		*mmap; @@ -133,6 +133,7 @@ struct perf_evsel {  void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr,  		      int idx); +void perf_evsel__exit(struct perf_evsel *evsel);  int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);  void perf_evsel__close_fd(struct perf_evsel *evsel);  void perf_evsel__free_fd(struct perf_evsel *evsel); diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 8c1ab0f9194e..58cc5c5fa47c 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -37,6 +37,8 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void);   *                     perf_cpu_map__new_online_cpus is returned.   */  LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); +/** perf_cpu_map__new_int - create a map with the one given cpu. */ +LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_int(int cpu);  LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);  LIBPERF_API int perf_cpu_map__merge(struct perf_cpu_map **orig,  				    struct perf_cpu_map *other); diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h index 37bb7771d914..6608f1e3701b 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -457,6 +457,32 @@ struct perf_record_compressed {  	char			 data[];  }; +/* + * `header.size` includes the padding we are going to add while writing the record. + * `data_size` only includes the size of `data[]` itself. + */ +struct perf_record_compressed2 { +	struct perf_event_header header; +	__u64			 data_size; +	char			 data[]; +}; + +#define BPF_METADATA_KEY_LEN   64 +#define BPF_METADATA_VALUE_LEN 256 +#define BPF_PROG_NAME_LEN      KSYM_NAME_LEN + +struct perf_record_bpf_metadata_entry { +	char key[BPF_METADATA_KEY_LEN]; +	char value[BPF_METADATA_VALUE_LEN]; +}; + +struct perf_record_bpf_metadata { +	struct perf_event_header	      header; +	char				      prog_name[BPF_PROG_NAME_LEN]; +	__u64				      nr_entries; +	struct perf_record_bpf_metadata_entry entries[]; +}; +  enum perf_user_event_type { /* above any possible kernel type */  	PERF_RECORD_USER_TYPE_START		= 64,  	PERF_RECORD_HEADER_ATTR			= 64, @@ -478,6 +504,8 @@ enum perf_user_event_type { /* above any possible kernel type */  	PERF_RECORD_HEADER_FEATURE		= 80,  	PERF_RECORD_COMPRESSED			= 81,  	PERF_RECORD_FINISHED_INIT		= 82, +	PERF_RECORD_COMPRESSED2			= 83, +	PERF_RECORD_BPF_METADATA		= 84,  	PERF_RECORD_HEADER_MAX  }; @@ -518,6 +546,8 @@ union perf_event {  	struct perf_record_time_conv		time_conv;  	struct perf_record_header_feature	feat;  	struct perf_record_compressed		pack; +	struct perf_record_compressed2		pack2; +	struct perf_record_bpf_metadata		bpf_metadata;  };  #endif /* __LIBPERF_EVENT_H */ diff --git a/tools/lib/perf/include/perf/threadmap.h b/tools/lib/perf/include/perf/threadmap.h index 8b40e7777cea..44deb815b817 100644 --- a/tools/lib/perf/include/perf/threadmap.h +++ b/tools/lib/perf/include/perf/threadmap.h @@ -14,6 +14,7 @@ LIBPERF_API void perf_thread_map__set_pid(struct perf_thread_map *map, int idx,  LIBPERF_API char *perf_thread_map__comm(struct perf_thread_map *map, int idx);  LIBPERF_API int perf_thread_map__nr(struct perf_thread_map *threads);  LIBPERF_API pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx); +LIBPERF_API int perf_thread_map__idx(struct perf_thread_map *map, pid_t pid);  LIBPERF_API struct perf_thread_map *perf_thread_map__get(struct perf_thread_map *map);  LIBPERF_API void perf_thread_map__put(struct perf_thread_map *map); diff --git a/tools/lib/perf/threadmap.c b/tools/lib/perf/threadmap.c index 07968f3ea093..db431b036f57 100644 --- a/tools/lib/perf/threadmap.c +++ b/tools/lib/perf/threadmap.c @@ -97,5 +97,22 @@ int perf_thread_map__nr(struct perf_thread_map *threads)  pid_t perf_thread_map__pid(struct perf_thread_map *map, int idx)  { +	if (!map) { +		assert(idx == 0); +		return -1; +	} +  	return map->map[idx].pid;  } + +int perf_thread_map__idx(struct perf_thread_map *threads, pid_t pid) +{ +	if (!threads) +		return pid == -1 ? 0 : -1; + +	for (int i = 0; i < threads->nr; ++i) { +		if (threads->map[i].pid == pid) +			return i; +	} +	return -1; +} diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c index 8561b0f01a24..9ef569492560 100644 --- a/tools/lib/subcmd/help.c +++ b/tools/lib/subcmd/help.c @@ -9,6 +9,7 @@  #include <sys/stat.h>  #include <unistd.h>  #include <dirent.h> +#include <assert.h>  #include "subcmd-util.h"  #include "help.h"  #include "exec-cmd.h" @@ -82,10 +83,11 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)  				ci++;  				cj++;  			} else { -				zfree(&cmds->names[cj]); -				cmds->names[cj++] = cmds->names[ci++]; +				cmds->names[cj++] = cmds->names[ci]; +				cmds->names[ci++] = NULL;  			}  		} else if (cmp == 0) { +			zfree(&cmds->names[ci]);  			ci++;  			ei++;  		} else if (cmp > 0) { @@ -94,12 +96,12 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)  	}  	if (ci != cj) {  		while (ci < cmds->cnt) { -			zfree(&cmds->names[cj]); -			cmds->names[cj++] = cmds->names[ci++]; +			cmds->names[cj++] = cmds->names[ci]; +			cmds->names[ci++] = NULL;  		}  	}  	for (ci = cj; ci < cmds->cnt; ci++) -		zfree(&cmds->names[ci]); +		assert(cmds->names[ci] == NULL);  	cmds->cnt = cj;  } diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c index 0a764c25c384..b7510f83209a 100644 --- a/tools/lib/subcmd/run-command.c +++ b/tools/lib/subcmd/run-command.c @@ -5,6 +5,7 @@  #include <ctype.h>  #include <fcntl.h>  #include <string.h> +#include <linux/compiler.h>  #include <linux/string.h>  #include <errno.h>  #include <sys/wait.h> @@ -216,10 +217,20 @@ static int wait_or_whine(struct child_process *cmd, bool block)  	return result;  } +/* + * Conservative estimate of number of characaters needed to hold an a decoded + * integer, assume each 3 bits needs a character byte and plus a possible sign + * character. + */ +#ifndef is_signed_type +#define is_signed_type(type) (((type)(-1)) < (type)1) +#endif +#define MAX_STRLEN_TYPE(type) (sizeof(type) * 8 / 3 + (is_signed_type(type) ? 1 : 0)) +  int check_if_command_finished(struct child_process *cmd)  {  #ifdef __linux__ -	char filename[FILENAME_MAX + 12]; +	char filename[6 + MAX_STRLEN_TYPE(typeof(cmd->pid)) + 7 + 1];  	char status_line[256];  	FILE *status_file; @@ -227,7 +238,7 @@ int check_if_command_finished(struct child_process *cmd)  	 * Check by reading /proc/<pid>/status as calling waitpid causes  	 * stdout/stderr to be closed and data lost.  	 */ -	sprintf(filename, "/proc/%d/status", cmd->pid); +	sprintf(filename, "/proc/%u/status", cmd->pid);  	status_file = fopen(filename, "r");  	if (status_file == NULL) {  		/* Open failed assume finish_command was called. */ | 
