From af6eea57437a830293eab56246b6025cc7d46ee7 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 29 Mar 2020 19:59:58 -0700 Subject: bpf: Implement bpf_link-based cgroup BPF program attachment Implement new sub-command to attach cgroup BPF programs and return FD-based bpf_link back on success. bpf_link, once attached to cgroup, cannot be replaced, except by owner having its FD. Cgroup bpf_link supports only BPF_F_ALLOW_MULTI semantics. Both link-based and prog-based BPF_F_ALLOW_MULTI attachments can be freely intermixed. To prevent bpf_cgroup_link from keeping cgroup alive past the point when no BPF program can be executed, implement auto-detachment of link. When cgroup_bpf_release() is called, all attached bpf_links are forced to release cgroup refcounts, but they leave bpf_link otherwise active and allocated, as well as still owning underlying bpf_prog. This is because user-space might still have FDs open and active, so bpf_link as a user-referenced object can't be freed yet. Once last active FD is closed, bpf_link will be freed and underlying bpf_prog refcount will be dropped. But cgroup refcount won't be touched, because cgroup is released already. The inherent race between bpf_cgroup_link release (from closing last FD) and cgroup_bpf_release() is resolved by both operations taking cgroup_mutex. So the only additional check required is when bpf_cgroup_link attempts to detach itself from cgroup. At that time we need to check whether there is still cgroup associated with that link. And if not, exit with success, because bpf_cgroup_link was already successfully detached. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Acked-by: Roman Gushchin Link: https://lore.kernel.org/bpf/20200330030001.2312810-2-andriin@fb.com --- include/linux/bpf-cgroup.h | 29 ++++++++++++++++++++++++----- include/linux/bpf.h | 10 +++++++++- 2 files changed, 33 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a7cd5c7a2509..d2d969669564 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -51,9 +51,18 @@ struct bpf_cgroup_storage { struct rcu_head rcu; }; +struct bpf_cgroup_link { + struct bpf_link link; + struct cgroup *cgroup; + enum bpf_attach_type type; +}; + +extern const struct bpf_link_ops bpf_cgroup_link_lops; + struct bpf_prog_list { struct list_head node; struct bpf_prog *prog; + struct bpf_cgroup_link *link; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; }; @@ -84,20 +93,23 @@ struct cgroup_bpf { int cgroup_bpf_inherit(struct cgroup *cgrp); void cgroup_bpf_offline(struct cgroup *cgrp); -int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - struct bpf_prog *replace_prog, +int __cgroup_bpf_attach(struct cgroup *cgrp, + struct bpf_prog *prog, struct bpf_prog *replace_prog, + struct bpf_cgroup_link *link, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_cgroup_link *link, enum bpf_attach_type type); int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ -int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - struct bpf_prog *replace_prog, enum bpf_attach_type type, +int cgroup_bpf_attach(struct cgroup *cgrp, + struct bpf_prog *prog, struct bpf_prog *replace_prog, + struct bpf_cgroup_link *link, enum bpf_attach_type type, u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags); + enum bpf_attach_type type); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -332,6 +344,7 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); +int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); #else @@ -354,6 +367,12 @@ static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr, return -EINVAL; } +static inline int cgroup_bpf_link_attach(const union bpf_attr *attr, + struct bpf_prog *prog) +{ + return -EINVAL; +} + static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3bde59a8453b..56254d880293 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1082,15 +1082,23 @@ extern int sysctl_unprivileged_bpf_disabled; int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); -struct bpf_link; +struct bpf_link { + atomic64_t refcnt; + const struct bpf_link_ops *ops; + struct bpf_prog *prog; + struct work_struct work; +}; struct bpf_link_ops { void (*release)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link); + }; void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, struct bpf_prog *prog); +void bpf_link_cleanup(struct bpf_link *link, struct file *link_file, + int link_fd); void bpf_link_inc(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); -- cgit v1.2.3 From 0c991ebc8c69d29b7fc44db17075c5aa5253e2ab Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 29 Mar 2020 19:59:59 -0700 Subject: bpf: Implement bpf_prog replacement for an active bpf_cgroup_link Add new operation (LINK_UPDATE), which allows to replace active bpf_prog from under given bpf_link. Currently this is only supported for bpf_cgroup_link, but will be extended to other kinds of bpf_links in follow-up patches. For bpf_cgroup_link, implemented functionality matches existing semantics for direct bpf_prog attachment (including BPF_F_REPLACE flag). User can either unconditionally set new bpf_prog regardless of which bpf_prog is currently active under given bpf_link, or, optionally, can specify expected active bpf_prog. If active bpf_prog doesn't match expected one, no changes are performed, old bpf_link stays intact and attached, operation returns a failure. cgroup_bpf_replace() operation is resolving race between auto-detachment and bpf_prog update in the same fashion as it's done for bpf_link detachment, except in this case update has no way of succeeding because of target cgroup marked as dying. So in this case error is returned. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200330030001.2312810-3-andriin@fb.com --- include/linux/bpf-cgroup.h | 12 +++++++ include/uapi/linux/bpf.h | 12 +++++++ kernel/bpf/cgroup.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 55 +++++++++++++++++++++++++++++++ kernel/cgroup/cgroup.c | 27 ++++++++++++++++ 5 files changed, 186 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d2d969669564..c11b413d5b1a 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -100,6 +100,8 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, struct bpf_cgroup_link *link, enum bpf_attach_type type); +int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link, + struct bpf_prog *new_prog); int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -110,6 +112,8 @@ int cgroup_bpf_attach(struct cgroup *cgrp, u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); +int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog, + struct bpf_prog *new_prog); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -350,6 +354,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, #else struct bpf_prog; +struct bpf_link; struct cgroup_bpf {}; static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} @@ -373,6 +378,13 @@ static inline int cgroup_bpf_link_attach(const union bpf_attr *attr, return -EINVAL; } +static inline int cgroup_bpf_replace(struct bpf_link *link, + struct bpf_prog *old_prog, + struct bpf_prog *new_prog) +{ + return -EINVAL; +} + static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 37dffe5089a0..2e29a671d67e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -112,6 +112,7 @@ enum bpf_cmd { BPF_MAP_UPDATE_BATCH, BPF_MAP_DELETE_BATCH, BPF_LINK_CREATE, + BPF_LINK_UPDATE, }; enum bpf_map_type { @@ -577,6 +578,17 @@ union bpf_attr { __u32 attach_type; /* attach type */ __u32 flags; /* extra flags */ } link_create; + + struct { /* struct used by BPF_LINK_UPDATE command */ + __u32 link_fd; /* link fd */ + /* new program fd to update link with */ + __u32 new_prog_fd; + __u32 flags; /* extra flags */ + /* expected link's program fd; is specified only if + * BPF_F_REPLACE flag is set in flags */ + __u32 old_prog_fd; + } link_update; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index c24029937431..80676fc00d81 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -500,6 +500,86 @@ cleanup: return err; } +/* Swap updated BPF program for given link in effective program arrays across + * all descendant cgroups. This function is guaranteed to succeed. + */ +static void replace_effective_prog(struct cgroup *cgrp, + enum bpf_attach_type type, + struct bpf_cgroup_link *link) +{ + struct bpf_prog_array_item *item; + struct cgroup_subsys_state *css; + struct bpf_prog_array *progs; + struct bpf_prog_list *pl; + struct list_head *head; + struct cgroup *cg; + int pos; + + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + if (percpu_ref_is_zero(&desc->bpf.refcnt)) + continue; + + /* find position of link in effective progs array */ + for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { + if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI)) + continue; + + head = &cg->bpf.progs[type]; + list_for_each_entry(pl, head, node) { + if (!prog_list_prog(pl)) + continue; + if (pl->link == link) + goto found; + pos++; + } + } +found: + BUG_ON(!cg); + progs = rcu_dereference_protected( + desc->bpf.effective[type], + lockdep_is_held(&cgroup_mutex)); + item = &progs->items[pos]; + WRITE_ONCE(item->prog, link->link.prog); + } +} + +/** + * __cgroup_bpf_replace() - Replace link's program and propagate the change + * to descendants + * @cgrp: The cgroup which descendants to traverse + * @link: A link for which to replace BPF program + * @type: Type of attach operation + * + * Must be called with cgroup_mutex held. + */ +int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link, + struct bpf_prog *new_prog) +{ + struct list_head *progs = &cgrp->bpf.progs[link->type]; + struct bpf_prog *old_prog; + struct bpf_prog_list *pl; + bool found = false; + + if (link->link.prog->type != new_prog->type) + return -EINVAL; + + list_for_each_entry(pl, progs, node) { + if (pl->link == link) { + found = true; + break; + } + } + if (!found) + return -ENOENT; + + old_prog = xchg(&link->link.prog, new_prog); + replace_effective_prog(cgrp, link->type, link); + bpf_prog_put(old_prog); + return 0; +} + static struct bpf_prog_list *find_detach_entry(struct list_head *progs, struct bpf_prog *prog, struct bpf_cgroup_link *link, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 97d5c6fb63cd..e0a3b34d7039 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3596,6 +3596,58 @@ err_out: return ret; } +#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd + +static int link_update(union bpf_attr *attr) +{ + struct bpf_prog *old_prog = NULL, *new_prog; + struct bpf_link *link; + u32 flags; + int ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (CHECK_ATTR(BPF_LINK_UPDATE)) + return -EINVAL; + + flags = attr->link_update.flags; + if (flags & ~BPF_F_REPLACE) + return -EINVAL; + + link = bpf_link_get_from_fd(attr->link_update.link_fd); + if (IS_ERR(link)) + return PTR_ERR(link); + + new_prog = bpf_prog_get(attr->link_update.new_prog_fd); + if (IS_ERR(new_prog)) + return PTR_ERR(new_prog); + + if (flags & BPF_F_REPLACE) { + old_prog = bpf_prog_get(attr->link_update.old_prog_fd); + if (IS_ERR(old_prog)) { + ret = PTR_ERR(old_prog); + old_prog = NULL; + goto out_put_progs; + } + } + +#ifdef CONFIG_CGROUP_BPF + if (link->ops == &bpf_cgroup_link_lops) { + ret = cgroup_bpf_replace(link, old_prog, new_prog); + goto out_put_progs; + } +#endif + ret = -EINVAL; + +out_put_progs: + if (old_prog) + bpf_prog_put(old_prog); + if (ret) + bpf_prog_put(new_prog); + return ret; +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr = {}; @@ -3709,6 +3761,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_LINK_CREATE: err = link_create(&attr); break; + case BPF_LINK_UPDATE: + err = link_update(&attr); + break; default: err = -EINVAL; break; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 219624fba9ba..915dda3f7f19 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6317,6 +6317,33 @@ int cgroup_bpf_attach(struct cgroup *cgrp, return ret; } +int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog, + struct bpf_prog *new_prog) +{ + struct bpf_cgroup_link *cg_link; + int ret; + + if (link->ops != &bpf_cgroup_link_lops) + return -EINVAL; + + cg_link = container_of(link, struct bpf_cgroup_link, link); + + mutex_lock(&cgroup_mutex); + /* link might have been auto-released by dying cgroup, so fail */ + if (!cg_link->cgroup) { + ret = -EINVAL; + goto out_unlock; + } + if (old_prog && link->prog != old_prog) { + ret = -EPERM; + goto out_unlock; + } + ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog); +out_unlock: + mutex_unlock(&cgroup_mutex); + return ret; +} + int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type) { -- cgit v1.2.3