From 166750bc1dd256b2184b22588fb9fe6d3fbb93ae Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 13 Dec 2019 17:47:08 -0800 Subject: libbpf: Support libbpf-provided extern variables Add support for extern variables, provided to BPF program by libbpf. Currently the following extern variables are supported: - LINUX_KERNEL_VERSION; version of a kernel in which BPF program is executing, follows KERNEL_VERSION() macro convention, can be 4- and 8-byte long; - CONFIG_xxx values; a set of values of actual kernel config. Tristate, boolean, strings, and integer values are supported. Set of possible values is determined by declared type of extern variable. Supported types of variables are: - Tristate values. Are represented as `enum libbpf_tristate`. Accepted values are **strictly** 'y', 'n', or 'm', which are represented as TRI_YES, TRI_NO, or TRI_MODULE, respectively. - Boolean values. Are represented as bool (_Bool) types. Accepted values are 'y' and 'n' only, turning into true/false values, respectively. - Single-character values. Can be used both as a substritute for bool/tristate, or as a small-range integer: - 'y'/'n'/'m' are represented as is, as characters 'y', 'n', or 'm'; - integers in a range [-128, 127] or [0, 255] (depending on signedness of char in target architecture) are recognized and represented with respective values of char type. - Strings. String values are declared as fixed-length char arrays. String of up to that length will be accepted and put in first N bytes of char array, with the rest of bytes zeroed out. If config string value is longer than space alloted, it will be truncated and warning message emitted. Char array is always zero terminated. String literals in config have to be enclosed in double quotes, just like C-style string literals. - Integers. 8-, 16-, 32-, and 64-bit integers are supported, both signed and unsigned variants. Libbpf enforces parsed config value to be in the supported range of corresponding integer type. Integers values in config can be: - decimal integers, with optional + and - signs; - hexadecimal integers, prefixed with 0x or 0X; - octal integers, starting with 0. Config file itself is searched in /boot/config-$(uname -r) location with fallback to /proc/config.gz, unless config path is specified explicitly through bpf_object_open_opts' kernel_config_path option. Both gzipped and plain text formats are supported. Libbpf adds explicit dependency on zlib because of this, but this shouldn't be a problem, given libelf already depends on zlib. All detected extern variables, are put into a separate .extern internal map. It, similarly to .rodata map, is marked as read-only from BPF program side, as well as is frozen on load. This allows BPF verifier to track extern values as constants and perform enhanced branch prediction and dead code elimination. This can be relied upon for doing kernel version/feature detection and using potentially unsupported field relocations or BPF helpers in a CO-RE-based BPF program, while still having a single version of BPF program running on old and new kernels. Selftests are validating this explicitly for unexisting BPF helper. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191214014710.3449601-3-andriin@fb.com --- tools/include/uapi/linux/btf.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 63ae4a39e58b..1a2898c482ee 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -22,9 +22,9 @@ struct btf_header { }; /* Max # of type identifier */ -#define BTF_MAX_TYPE 0x0000ffff +#define BTF_MAX_TYPE 0x000fffff /* Max offset into the string section */ -#define BTF_MAX_NAME_OFFSET 0x0000ffff +#define BTF_MAX_NAME_OFFSET 0x00ffffff /* Max # of struct/union/enum members or func args */ #define BTF_MAX_VLEN 0xffff @@ -142,7 +142,8 @@ struct btf_param { enum { BTF_VAR_STATIC = 0, - BTF_VAR_GLOBAL_ALLOCATED, + BTF_VAR_GLOBAL_ALLOCATED = 1, + BTF_VAR_GLOBAL_EXTERN = 2, }; /* BTF_KIND_VAR is followed by a single "struct btf_var" to describe -- cgit v1.2.3 From 7dd68b3279f1792103d12e69933db3128c6d416e Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 18 Dec 2019 23:44:35 -0800 Subject: bpf: Support replacing cgroup-bpf program in MULTI mode The common use-case in production is to have multiple cgroup-bpf programs per attach type that cover multiple use-cases. Such programs are attached with BPF_F_ALLOW_MULTI and can be maintained by different people. Order of programs usually matters, for example imagine two egress programs: the first one drops packets and the second one counts packets. If they're swapped the result of counting program will be different. It brings operational challenges with updating cgroup-bpf program(s) attached with BPF_F_ALLOW_MULTI since there is no way to replace a program: * One way to update is to detach all programs first and then attach the new version(s) again in the right order. This introduces an interruption in the work a program is doing and may not be acceptable (e.g. if it's egress firewall); * Another way is attach the new version of a program first and only then detach the old version. This introduces the time interval when two versions of same program are working, what may not be acceptable if a program is not idempotent. It also imposes additional burden on program developers to make sure that two versions of their program can co-exist. Solve the problem by introducing a "replace" mode in BPF_PROG_ATTACH command for cgroup-bpf programs being attached with BPF_F_ALLOW_MULTI flag. This mode is enabled by newly introduced BPF_F_REPLACE attach flag and bpf_attr.replace_bpf_fd attribute to pass fd of the old program to replace That way user can replace any program among those attached with BPF_F_ALLOW_MULTI flag without the problems described above. Details of the new API: * If BPF_F_REPLACE is set but replace_bpf_fd doesn't have valid descriptor of BPF program, BPF_PROG_ATTACH will return corresponding error (EINVAL or EBADF). * If replace_bpf_fd has valid descriptor of BPF program but such a program is not attached to specified cgroup, BPF_PROG_ATTACH will return ENOENT. BPF_F_REPLACE is introduced to make the user intent clear, since replace_bpf_fd alone can't be used for this (its default value, 0, is a valid fd). BPF_F_REPLACE also makes it possible to extend the API in the future (e.g. add BPF_F_BEFORE and BPF_F_AFTER if needed). Signed-off-by: Andrey Ignatov Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: Andrii Narkyiko Link: https://lore.kernel.org/bpf/30cd850044a0057bdfcaaf154b7d2f39850ba813.1576741281.git.rdna@fb.com --- include/linux/bpf-cgroup.h | 4 +++- include/uapi/linux/bpf.h | 10 ++++++++++ kernel/bpf/cgroup.c | 30 ++++++++++++++++++++++++++---- kernel/bpf/syscall.c | 4 ++-- kernel/cgroup/cgroup.c | 5 +++-- tools/include/uapi/linux/bpf.h | 10 ++++++++++ 6 files changed, 54 insertions(+), 9 deletions(-) (limited to 'tools/include/uapi/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 169fd25f6bc2..18f6a6da7c3c 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -85,6 +85,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp); void cgroup_bpf_offline(struct cgroup *cgrp); int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_prog *replace_prog, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); @@ -93,7 +94,8 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags); + struct bpf_prog *replace_prog, enum bpf_attach_type type, + u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index dbbcf0b02970..7df436da542d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -231,6 +231,11 @@ enum bpf_attach_type { * When children program makes decision (like picking TCP CA or sock bind) * parent program has a chance to override it. * + * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of + * programs for a cgroup. Though it's possible to replace an old program at + * any position by also specifying BPF_F_REPLACE flag and position itself in + * replace_bpf_fd attribute. Old program at this position will be released. + * * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. * A cgroup with NONE doesn't allow any programs in sub-cgroups. * Ex1: @@ -249,6 +254,7 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) #define BPF_F_ALLOW_MULTI (1U << 1) +#define BPF_F_REPLACE (1U << 2) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel @@ -442,6 +448,10 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; + __u32 replace_bpf_fd; /* previously attached eBPF + * program to replace if + * BPF_F_REPLACE is used + */ }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 283efe3ce052..45346c79613a 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -282,14 +282,17 @@ cleanup: * propagate the change to descendants * @cgrp: The cgroup which descendants to traverse * @prog: A program to attach + * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set * @type: Type of attach operation * @flags: Option flags * * Must be called with cgroup_mutex held. */ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_prog *replace_prog, enum bpf_attach_type type, u32 flags) { + u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)); struct list_head *progs = &cgrp->bpf.progs[type]; struct bpf_prog *old_prog = NULL; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE], @@ -298,14 +301,15 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_cgroup_storage_type stype; int err; - if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) + if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) || + ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI))) /* invalid combination */ return -EINVAL; if (!hierarchy_allows_attach(cgrp, type)) return -EPERM; - if (!list_empty(progs) && cgrp->bpf.flags[type] != flags) + if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags) /* Disallow attaching non-overridable on top * of existing overridable in this cgroup. * Disallow attaching multi-prog if overridable or none @@ -320,7 +324,12 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, if (pl->prog == prog) /* disallow attaching the same prog twice */ return -EINVAL; + if (pl->prog == replace_prog) + replace_pl = pl; } + if ((flags & BPF_F_REPLACE) && !replace_pl) + /* prog to replace not found for cgroup */ + return -ENOENT; } else if (!list_empty(progs)) { replace_pl = list_first_entry(progs, typeof(*pl), node); } @@ -356,7 +365,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, for_each_cgroup_storage_type(stype) pl->storage[stype] = storage[stype]; - cgrp->bpf.flags[type] = flags; + cgrp->bpf.flags[type] = saved_flags; err = update_effective_progs(cgrp, type); if (err) @@ -522,6 +531,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog) { + struct bpf_prog *replace_prog = NULL; struct cgroup *cgrp; int ret; @@ -529,8 +539,20 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr, if (IS_ERR(cgrp)) return PTR_ERR(cgrp); - ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, + if ((attr->attach_flags & BPF_F_ALLOW_MULTI) && + (attr->attach_flags & BPF_F_REPLACE)) { + replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype); + if (IS_ERR(replace_prog)) { + cgroup_put(cgrp); + return PTR_ERR(replace_prog); + } + } + + ret = cgroup_bpf_attach(cgrp, prog, replace_prog, attr->attach_type, attr->attach_flags); + + if (replace_prog) + bpf_prog_put(replace_prog); cgroup_put(cgrp); return ret; } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b08c362f4e02..81ee8595dfee 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2073,10 +2073,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, } } -#define BPF_PROG_ATTACH_LAST_FIELD attach_flags +#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd #define BPF_F_ATTACH_MASK \ - (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) + (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE) static int bpf_prog_attach(const union bpf_attr *attr) { diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 735af8f15f95..725365df066d 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6288,12 +6288,13 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd) #ifdef CONFIG_CGROUP_BPF int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags) + struct bpf_prog *replace_prog, enum bpf_attach_type type, + u32 flags) { int ret; mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_attach(cgrp, prog, type, flags); + ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, type, flags); mutex_unlock(&cgroup_mutex); return ret; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index dbbcf0b02970..7df436da542d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -231,6 +231,11 @@ enum bpf_attach_type { * When children program makes decision (like picking TCP CA or sock bind) * parent program has a chance to override it. * + * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of + * programs for a cgroup. Though it's possible to replace an old program at + * any position by also specifying BPF_F_REPLACE flag and position itself in + * replace_bpf_fd attribute. Old program at this position will be released. + * * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. * A cgroup with NONE doesn't allow any programs in sub-cgroups. * Ex1: @@ -249,6 +254,7 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) #define BPF_F_ALLOW_MULTI (1U << 1) +#define BPF_F_REPLACE (1U << 2) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel @@ -442,6 +448,10 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; + __u32 replace_bpf_fd; /* previously attached eBPF + * program to replace if + * BPF_F_REPLACE is used + */ }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ -- cgit v1.2.3