diff options
| -rw-r--r-- | include/linux/bpf_local_storage.h | 29 | ||||
| -rw-r--r-- | kernel/bpf/bpf_cgrp_storage.c | 62 | ||||
| -rw-r--r-- | kernel/bpf/bpf_inode_storage.c | 6 | ||||
| -rw-r--r-- | kernel/bpf/bpf_local_storage.c | 408 | ||||
| -rw-r--r-- | kernel/bpf/bpf_task_storage.c | 154 | ||||
| -rw-r--r-- | kernel/bpf/helpers.c | 4 | ||||
| -rw-r--r-- | net/core/bpf_sk_storage.c | 20 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/map_tests/task_storage_map.c | 128 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/btf_dump.c | 4 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/prog_tests/task_local_storage.c | 10 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/local_storage.c | 19 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c | 38 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c | 12 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/task_ls_recursion.c | 14 | ||||
| -rw-r--r-- | tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c | 7 |
15 files changed, 354 insertions, 561 deletions
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 66432248cd81..85efa9772530 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -15,12 +15,13 @@ #include <linux/types.h> #include <linux/bpf_mem_alloc.h> #include <uapi/linux/btf.h> +#include <asm/rqspinlock.h> #define BPF_LOCAL_STORAGE_CACHE_SIZE 16 struct bpf_local_storage_map_bucket { struct hlist_head list; - raw_spinlock_t lock; + rqspinlock_t lock; }; /* Thp map is not the primary owner of a bpf_local_storage_elem. @@ -67,6 +68,11 @@ struct bpf_local_storage_data { u8 data[] __aligned(8); }; +#define SELEM_MAP_UNLINKED (1 << 0) +#define SELEM_STORAGE_UNLINKED (1 << 1) +#define SELEM_UNLINKED (SELEM_MAP_UNLINKED | SELEM_STORAGE_UNLINKED) +#define SELEM_TOFREE (1 << 2) + /* Linked to bpf_local_storage and bpf_local_storage_map */ struct bpf_local_storage_elem { struct hlist_node map_node; /* Linked to bpf_local_storage_map */ @@ -79,7 +85,9 @@ struct bpf_local_storage_elem { * after raw_spin_unlock */ }; - /* 8 bytes hole */ + atomic_t state; + bool use_kmalloc_nolock; + /* 3 bytes hole */ /* The data is stored in another cacheline to minimize * the number of cachelines access during a cache hit. */ @@ -88,13 +96,14 @@ struct bpf_local_storage_elem { struct bpf_local_storage { struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE]; - struct bpf_local_storage_map __rcu *smap; struct hlist_head list; /* List of bpf_local_storage_elem */ void *owner; /* The object that owns the above "list" of * bpf_local_storage_elem. */ struct rcu_head rcu; - raw_spinlock_t lock; /* Protect adding/removing from the "list" */ + rqspinlock_t lock; /* Protect adding/removing from the "list" */ + u64 mem_charge; /* Copy of mem charged to owner. Protected by "lock" */ + refcount_t owner_refcnt;/* Used to pin owner when map_free is uncharging */ bool use_kmalloc_nolock; }; @@ -162,11 +171,10 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, return SDATA(selem); } -void bpf_local_storage_destroy(struct bpf_local_storage *local_storage); +u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage); void bpf_local_storage_map_free(struct bpf_map *map, - struct bpf_local_storage_cache *cache, - int __percpu *busy_counter); + struct bpf_local_storage_cache *cache); int bpf_local_storage_map_check_btf(const struct bpf_map *map, const struct btf *btf, @@ -176,10 +184,11 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem); -void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now); +int bpf_selem_unlink(struct bpf_local_storage_elem *selem); -void bpf_selem_link_map(struct bpf_local_storage_map *smap, - struct bpf_local_storage_elem *selem); +int bpf_selem_link_map(struct bpf_local_storage_map *smap, + struct bpf_local_storage *local_storage, + struct bpf_local_storage_elem *selem); struct bpf_local_storage_elem * bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value, diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c index 0687a760974a..c2a2ead1f466 100644 --- a/kernel/bpf/bpf_cgrp_storage.c +++ b/kernel/bpf/bpf_cgrp_storage.c @@ -11,29 +11,6 @@ DEFINE_BPF_STORAGE_CACHE(cgroup_cache); -static DEFINE_PER_CPU(int, bpf_cgrp_storage_busy); - -static void bpf_cgrp_storage_lock(void) -{ - cant_migrate(); - this_cpu_inc(bpf_cgrp_storage_busy); -} - -static void bpf_cgrp_storage_unlock(void) -{ - this_cpu_dec(bpf_cgrp_storage_busy); -} - -static bool bpf_cgrp_storage_trylock(void) -{ - cant_migrate(); - if (unlikely(this_cpu_inc_return(bpf_cgrp_storage_busy) != 1)) { - this_cpu_dec(bpf_cgrp_storage_busy); - return false; - } - return true; -} - static struct bpf_local_storage __rcu **cgroup_storage_ptr(void *owner) { struct cgroup *cg = owner; @@ -45,16 +22,14 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup) { struct bpf_local_storage *local_storage; - rcu_read_lock_dont_migrate(); + rcu_read_lock(); local_storage = rcu_dereference(cgroup->bpf_cgrp_storage); if (!local_storage) goto out; - bpf_cgrp_storage_lock(); bpf_local_storage_destroy(local_storage); - bpf_cgrp_storage_unlock(); out: - rcu_read_unlock_migrate(); + rcu_read_unlock(); } static struct bpf_local_storage_data * @@ -83,9 +58,7 @@ static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key) if (IS_ERR(cgroup)) return ERR_CAST(cgroup); - bpf_cgrp_storage_lock(); sdata = cgroup_storage_lookup(cgroup, map, true); - bpf_cgrp_storage_unlock(); cgroup_put(cgroup); return sdata ? sdata->data : NULL; } @@ -102,10 +75,8 @@ static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key, if (IS_ERR(cgroup)) return PTR_ERR(cgroup); - bpf_cgrp_storage_lock(); sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map, value, map_flags, false, GFP_ATOMIC); - bpf_cgrp_storage_unlock(); cgroup_put(cgroup); return PTR_ERR_OR_ZERO(sdata); } @@ -118,8 +89,7 @@ static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), false); - return 0; + return bpf_selem_unlink(SELEM(sdata)); } static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key) @@ -132,9 +102,7 @@ static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key) if (IS_ERR(cgroup)) return PTR_ERR(cgroup); - bpf_cgrp_storage_lock(); err = cgroup_storage_delete(cgroup, map); - bpf_cgrp_storage_unlock(); cgroup_put(cgroup); return err; } @@ -151,7 +119,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) static void cgroup_storage_map_free(struct bpf_map *map) { - bpf_local_storage_map_free(map, &cgroup_cache, &bpf_cgrp_storage_busy); + bpf_local_storage_map_free(map, &cgroup_cache); } /* *gfp_flags* is a hidden argument provided by the verifier */ @@ -159,7 +127,6 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup, void *, value, u64, flags, gfp_t, gfp_flags) { struct bpf_local_storage_data *sdata; - bool nobusy; WARN_ON_ONCE(!bpf_rcu_lock_held()); if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) @@ -168,38 +135,27 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup, if (!cgroup) return (unsigned long)NULL; - nobusy = bpf_cgrp_storage_trylock(); - - sdata = cgroup_storage_lookup(cgroup, map, nobusy); + sdata = cgroup_storage_lookup(cgroup, map, true); if (sdata) - goto unlock; + goto out; /* only allocate new storage, when the cgroup is refcounted */ if (!percpu_ref_is_dying(&cgroup->self.refcnt) && - (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) + (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST, false, gfp_flags); -unlock: - if (nobusy) - bpf_cgrp_storage_unlock(); +out: return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data; } BPF_CALL_2(bpf_cgrp_storage_delete, struct bpf_map *, map, struct cgroup *, cgroup) { - int ret; - WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!cgroup) return -EINVAL; - if (!bpf_cgrp_storage_trylock()) - return -EBUSY; - - ret = cgroup_storage_delete(cgroup, map); - bpf_cgrp_storage_unlock(); - return ret; + return cgroup_storage_delete(cgroup, map); } const struct bpf_map_ops cgrp_storage_map_ops = { diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index e54cce2b9175..e86734609f3d 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -110,9 +110,7 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), false); - - return 0; + return bpf_selem_unlink(SELEM(sdata)); } static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key) @@ -186,7 +184,7 @@ static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr) static void inode_storage_map_free(struct bpf_map *map) { - bpf_local_storage_map_free(map, &inode_cache, NULL); + bpf_local_storage_map_free(map, &inode_cache); } const struct bpf_map_ops inode_storage_map_ops = { diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index e2fe6c32822b..b28f07d3a0db 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -19,9 +19,9 @@ static struct bpf_local_storage_map_bucket * select_bucket(struct bpf_local_storage_map *smap, - struct bpf_local_storage_elem *selem) + struct bpf_local_storage *local_storage) { - return &smap->buckets[hash_ptr(selem, smap->bucket_log)]; + return &smap->buckets[hash_ptr(local_storage, smap->bucket_log)]; } static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size) @@ -61,11 +61,6 @@ static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem) return !hlist_unhashed(&selem->snode); } -static bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem) -{ - return !hlist_unhashed_lockless(&selem->map_node); -} - static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) { return !hlist_unhashed(&selem->map_node); @@ -90,6 +85,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, if (selem) { RCU_INIT_POINTER(SDATA(selem)->smap, smap); + atomic_set(&selem->state, 0); + selem->use_kmalloc_nolock = smap->use_kmalloc_nolock; if (value) { /* No need to call check_and_init_map_value as memory is zero init */ @@ -198,9 +195,11 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) /* The bpf_local_storage_map_free will wait for rcu_barrier */ smap = rcu_dereference_check(SDATA(selem)->smap, 1); - migrate_disable(); - bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); - migrate_enable(); + if (smap) { + migrate_disable(); + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + migrate_enable(); + } kfree_nolock(selem); } @@ -219,13 +218,14 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem, smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); - if (!smap->use_kmalloc_nolock) { + if (!selem->use_kmalloc_nolock) { /* * No uptr will be unpin even when reuse_now == false since uptr * is only supported in task local storage, where * smap->use_kmalloc_nolock == true. */ - bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + if (smap) + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); __bpf_selem_free(selem, reuse_now); return; } @@ -256,6 +256,36 @@ static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now) bpf_selem_free(selem, reuse_now); } +static void bpf_selem_unlink_storage_nolock_misc(struct bpf_local_storage_elem *selem, + struct bpf_local_storage_map *smap, + struct bpf_local_storage *local_storage, + bool free_local_storage, bool pin_owner) +{ + void *owner = local_storage->owner; + u32 uncharge = smap->elem_size; + + if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) == + SDATA(selem)) + RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); + + if (pin_owner && !refcount_inc_not_zero(&local_storage->owner_refcnt)) + return; + + uncharge += free_local_storage ? sizeof(*local_storage) : 0; + mem_uncharge(smap, local_storage->owner, uncharge); + local_storage->mem_charge -= uncharge; + + if (free_local_storage) { + local_storage->owner = NULL; + + /* After this RCU_INIT, owner may be freed and cannot be used */ + RCU_INIT_POINTER(*owner_storage(smap, owner), NULL); + } + + if (pin_owner) + refcount_dec(&local_storage->owner_refcnt); +} + /* local_storage->lock must be held and selem->local_storage == local_storage. * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx. @@ -266,124 +296,219 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor { struct bpf_local_storage_map *smap; bool free_local_storage; - void *owner; smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); - owner = local_storage->owner; - - /* All uncharging on the owner must be done first. - * The owner may be freed once the last selem is unlinked - * from local_storage. - */ - mem_uncharge(smap, owner, smap->elem_size); free_local_storage = hlist_is_singular_node(&selem->snode, &local_storage->list); - if (free_local_storage) { - mem_uncharge(smap, owner, sizeof(struct bpf_local_storage)); - local_storage->owner = NULL; - /* After this RCU_INIT, owner may be freed and cannot be used */ - RCU_INIT_POINTER(*owner_storage(smap, owner), NULL); + bpf_selem_unlink_storage_nolock_misc(selem, smap, local_storage, + free_local_storage, false); - /* local_storage is not freed now. local_storage->lock is - * still held and raw_spin_unlock_bh(&local_storage->lock) - * will be done by the caller. - * - * Although the unlock will be done under - * rcu_read_lock(), it is more intuitive to - * read if the freeing of the storage is done - * after the raw_spin_unlock_bh(&local_storage->lock). - * - * Hence, a "bool free_local_storage" is returned - * to the caller which then calls then frees the storage after - * all the RCU grace periods have expired. - */ - } hlist_del_init_rcu(&selem->snode); - if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) == - SDATA(selem)) - RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); hlist_add_head(&selem->free_node, free_selem_list); - if (rcu_access_pointer(local_storage->smap) == smap) - RCU_INIT_POINTER(local_storage->smap, NULL); - return free_local_storage; } -static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, - bool reuse_now) +void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, + struct bpf_local_storage_elem *selem) +{ + struct bpf_local_storage_map *smap; + + smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); + local_storage->mem_charge += smap->elem_size; + + RCU_INIT_POINTER(selem->local_storage, local_storage); + hlist_add_head_rcu(&selem->snode, &local_storage->list); +} + +static int bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) +{ + struct bpf_local_storage *local_storage; + struct bpf_local_storage_map *smap; + struct bpf_local_storage_map_bucket *b; + unsigned long flags; + int err; + + local_storage = rcu_dereference_check(selem->local_storage, + bpf_rcu_lock_held()); + smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); + b = select_bucket(smap, local_storage); + err = raw_res_spin_lock_irqsave(&b->lock, flags); + if (err) + return err; + + hlist_del_init_rcu(&selem->map_node); + raw_res_spin_unlock_irqrestore(&b->lock, flags); + + return 0; +} + +static void bpf_selem_unlink_map_nolock(struct bpf_local_storage_elem *selem) +{ + hlist_del_init_rcu(&selem->map_node); +} + +int bpf_selem_link_map(struct bpf_local_storage_map *smap, + struct bpf_local_storage *local_storage, + struct bpf_local_storage_elem *selem) +{ + struct bpf_local_storage_map_bucket *b; + unsigned long flags; + int err; + + b = select_bucket(smap, local_storage); + + err = raw_res_spin_lock_irqsave(&b->lock, flags); + if (err) + return err; + + hlist_add_head_rcu(&selem->map_node, &b->list); + raw_res_spin_unlock_irqrestore(&b->lock, flags); + + return 0; +} + +static void bpf_selem_link_map_nolock(struct bpf_local_storage_map_bucket *b, + struct bpf_local_storage_elem *selem) +{ + hlist_add_head_rcu(&selem->map_node, &b->list); +} + +/* + * Unlink an selem from map and local storage with lock held. + * This is the common path used by local storages to delete an selem. + */ +int bpf_selem_unlink(struct bpf_local_storage_elem *selem) { struct bpf_local_storage *local_storage; bool free_local_storage = false; HLIST_HEAD(selem_free_list); unsigned long flags; + int err; if (unlikely(!selem_linked_to_storage_lockless(selem))) /* selem has already been unlinked from sk */ - return; + return 0; local_storage = rcu_dereference_check(selem->local_storage, bpf_rcu_lock_held()); - raw_spin_lock_irqsave(&local_storage->lock, flags); - if (likely(selem_linked_to_storage(selem))) + err = raw_res_spin_lock_irqsave(&local_storage->lock, flags); + if (err) + return err; + + if (likely(selem_linked_to_storage(selem))) { + /* Always unlink from map before unlinking from local_storage + * because selem will be freed after successfully unlinked from + * the local_storage. + */ + err = bpf_selem_unlink_map(selem); + if (err) + goto out; + free_local_storage = bpf_selem_unlink_storage_nolock( local_storage, selem, &selem_free_list); - raw_spin_unlock_irqrestore(&local_storage->lock, flags); + } +out: + raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); - bpf_selem_free_list(&selem_free_list, reuse_now); + bpf_selem_free_list(&selem_free_list, false); if (free_local_storage) - bpf_local_storage_free(local_storage, reuse_now); -} + bpf_local_storage_free(local_storage, false); -void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, - struct bpf_local_storage_elem *selem) -{ - RCU_INIT_POINTER(selem->local_storage, local_storage); - hlist_add_head_rcu(&selem->snode, &local_storage->list); + return err; } -static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) +/* + * Unlink an selem from map and local storage with lockless fallback if callers + * are racing or rqspinlock returns error. It should only be called by + * bpf_local_storage_destroy() or bpf_local_storage_map_free(). + */ +static void bpf_selem_unlink_nofail(struct bpf_local_storage_elem *selem, + struct bpf_local_storage_map_bucket *b) { + bool in_map_free = !!b, free_storage = false; + struct bpf_local_storage *local_storage; struct bpf_local_storage_map *smap; - struct bpf_local_storage_map_bucket *b; unsigned long flags; + int err, unlink = 0; - if (unlikely(!selem_linked_to_map_lockless(selem))) - /* selem has already be unlinked from smap */ - return; - + local_storage = rcu_dereference_check(selem->local_storage, bpf_rcu_lock_held()); smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); - b = select_bucket(smap, selem); - raw_spin_lock_irqsave(&b->lock, flags); - if (likely(selem_linked_to_map(selem))) - hlist_del_init_rcu(&selem->map_node); - raw_spin_unlock_irqrestore(&b->lock, flags); -} -void bpf_selem_link_map(struct bpf_local_storage_map *smap, - struct bpf_local_storage_elem *selem) -{ - struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem); - unsigned long flags; + if (smap) { + b = b ? : select_bucket(smap, local_storage); + err = raw_res_spin_lock_irqsave(&b->lock, flags); + if (!err) { + /* + * Call bpf_obj_free_fields() under b->lock to make sure it is done + * exactly once for an selem. Safe to free special fields immediately + * as no BPF program should be referencing the selem. + */ + if (likely(selem_linked_to_map(selem))) { + hlist_del_init_rcu(&selem->map_node); + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + unlink++; + } + raw_res_spin_unlock_irqrestore(&b->lock, flags); + } + /* + * Highly unlikely scenario: resource leak + * + * When map_free(selem1), destroy(selem1) and destroy(selem2) are racing + * and both selem belong to the same bucket, if destroy(selem2) acquired + * b->lock and block for too long, neither map_free(selem1) and + * destroy(selem1) will be able to free the special field associated + * with selem1 as raw_res_spin_lock_irqsave() returns -ETIMEDOUT. + */ + WARN_ON_ONCE(err && in_map_free); + if (!err || in_map_free) + RCU_INIT_POINTER(SDATA(selem)->smap, NULL); + } - raw_spin_lock_irqsave(&b->lock, flags); - hlist_add_head_rcu(&selem->map_node, &b->list); - raw_spin_unlock_irqrestore(&b->lock, flags); -} + if (local_storage) { + err = raw_res_spin_lock_irqsave(&local_storage->lock, flags); + if (!err) { + if (likely(selem_linked_to_storage(selem))) { + free_storage = hlist_is_singular_node(&selem->snode, + &local_storage->list); + /* + * Okay to skip clearing owner_storage and storage->owner in + * destroy() since the owner is going away. No user or bpf + * programs should be able to reference it. + */ + if (smap && in_map_free) + bpf_selem_unlink_storage_nolock_misc( + selem, smap, local_storage, + free_storage, true); + hlist_del_init_rcu(&selem->snode); + unlink++; + } + raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); + } + if (!err || !in_map_free) + RCU_INIT_POINTER(selem->local_storage, NULL); + } -void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now) -{ - /* Always unlink from map before unlinking from local_storage - * because selem will be freed after successfully unlinked from - * the local_storage. + if (unlink != 2) + atomic_or(in_map_free ? SELEM_MAP_UNLINKED : SELEM_STORAGE_UNLINKED, &selem->state); + + /* + * Normally, an selem can be unlinked under local_storage->lock and b->lock, and + * then freed after an RCU grace period. However, if destroy() and map_free() are + * racing or rqspinlock returns errors in unlikely situations (unlink != 2), free + * the selem only after both map_free() and destroy() see the selem. */ - bpf_selem_unlink_map(selem); - bpf_selem_unlink_storage(selem, reuse_now); + if (unlink == 2 || + atomic_cmpxchg(&selem->state, SELEM_UNLINKED, SELEM_TOFREE) == SELEM_UNLINKED) + bpf_selem_free(selem, true); + + if (free_storage) + bpf_local_storage_free(local_storage, true); } void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, @@ -391,16 +516,20 @@ void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem) { unsigned long flags; + int err; /* spinlock is needed to avoid racing with the * parallel delete. Otherwise, publishing an already * deleted sdata to the cache will become a use-after-free * problem in the next bpf_local_storage_lookup(). */ - raw_spin_lock_irqsave(&local_storage->lock, flags); + err = raw_res_spin_lock_irqsave(&local_storage->lock, flags); + if (err) + return; + if (selem_linked_to_storage(selem)) rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem)); - raw_spin_unlock_irqrestore(&local_storage->lock, flags); + raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); } static int check_flags(const struct bpf_local_storage_data *old_sdata, @@ -424,6 +553,8 @@ int bpf_local_storage_alloc(void *owner, { struct bpf_local_storage *prev_storage, *storage; struct bpf_local_storage **owner_storage_ptr; + struct bpf_local_storage_map_bucket *b; + unsigned long flags; int err; err = mem_charge(smap, owner, sizeof(*storage)); @@ -441,14 +572,21 @@ int bpf_local_storage_alloc(void *owner, goto uncharge; } - RCU_INIT_POINTER(storage->smap, smap); INIT_HLIST_HEAD(&storage->list); - raw_spin_lock_init(&storage->lock); + raw_res_spin_lock_init(&storage->lock); storage->owner = owner; + storage->mem_charge = sizeof(*storage); storage->use_kmalloc_nolock = smap->use_kmalloc_nolock; + refcount_set(&storage->owner_refcnt, 1); bpf_selem_link_storage_nolock(storage, first_selem); - bpf_selem_link_map(smap, first_selem); + + b = select_bucket(smap, storage); + err = raw_res_spin_lock_irqsave(&b->lock, flags); + if (err) + goto uncharge; + + bpf_selem_link_map_nolock(b, first_selem); owner_storage_ptr = (struct bpf_local_storage **)owner_storage(smap, owner); @@ -464,10 +602,12 @@ int bpf_local_storage_alloc(void *owner, */ prev_storage = cmpxchg(owner_storage_ptr, NULL, storage); if (unlikely(prev_storage)) { - bpf_selem_unlink_map(first_selem); + bpf_selem_unlink_map_nolock(first_selem); + raw_res_spin_unlock_irqrestore(&b->lock, flags); err = -EAGAIN; goto uncharge; } + raw_res_spin_unlock_irqrestore(&b->lock, flags); return 0; @@ -489,8 +629,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, struct bpf_local_storage_data *old_sdata = NULL; struct bpf_local_storage_elem *alloc_selem, *selem = NULL; struct bpf_local_storage *local_storage; + struct bpf_local_storage_map_bucket *b; HLIST_HEAD(old_selem_free_list); - unsigned long flags; + unsigned long flags, b_flags; int err; /* BPF_EXIST and BPF_NOEXIST cannot be both set */ @@ -549,7 +690,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, if (!alloc_selem) return ERR_PTR(-ENOMEM); - raw_spin_lock_irqsave(&local_storage->lock, flags); + err = raw_res_spin_lock_irqsave(&local_storage->lock, flags); + if (err) + goto free_selem; /* Recheck local_storage->list under local_storage->lock */ if (unlikely(hlist_empty(&local_storage->list))) { @@ -574,22 +717,30 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, goto unlock; } + b = select_bucket(smap, local_storage); + + err = raw_res_spin_lock_irqsave(&b->lock, b_flags); + if (err) + goto unlock; + alloc_selem = NULL; /* First, link the new selem to the map */ - bpf_selem_link_map(smap, selem); + bpf_selem_link_map_nolock(b, selem); /* Second, link (and publish) the new selem to local_storage */ bpf_selem_link_storage_nolock(local_storage, selem); /* Third, remove old selem, SELEM(old_sdata) */ if (old_sdata) { - bpf_selem_unlink_map(SELEM(old_sdata)); + bpf_selem_unlink_map_nolock(SELEM(old_sdata)); bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata), &old_selem_free_list); } + raw_res_spin_unlock_irqrestore(&b->lock, b_flags); unlock: - raw_spin_unlock_irqrestore(&local_storage->lock, flags); + raw_res_spin_unlock_irqrestore(&local_storage->lock, flags); +free_selem: bpf_selem_free_list(&old_selem_free_list, false); if (alloc_selem) { mem_uncharge(smap, owner, smap->elem_size); @@ -657,13 +808,13 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map, return 0; } -void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) +/* + * Destroy local storage when the owner is going away. Caller must uncharge memory + * if memory charging is used. + */ +u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage) { struct bpf_local_storage_elem *selem; - bool free_storage = false; - HLIST_HEAD(free_selem_list); - struct hlist_node *n; - unsigned long flags; /* Neither the bpf_prog nor the bpf_map's syscall * could be modifying the local_storage->list now. @@ -674,27 +825,20 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage) * when unlinking elem from the local_storage->list and * the map's bucket->list. */ - raw_spin_lock_irqsave(&local_storage->lock, flags); - hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { - /* Always unlink from map before unlinking from - * local_storage. - */ - bpf_selem_unlink_map(selem); - /* If local_storage list has only one element, the - * bpf_selem_unlink_storage_nolock() will return true. - * Otherwise, it will return false. The current loop iteration - * intends to remove all local storage. So the last iteration - * of the loop will set the free_cgroup_storage to true. + hlist_for_each_entry_rcu(selem, &local_storage->list, snode) + bpf_selem_unlink_nofail(selem, NULL); + + if (!refcount_dec_and_test(&local_storage->owner_refcnt)) { + while (refcount_read(&local_storage->owner_refcnt)) + cpu_relax(); + /* + * Paired with refcount_dec() in bpf_selem_unlink_nofail() + * to make sure destroy() sees the correct local_storage->mem_charge. */ - free_storage = bpf_selem_unlink_storage_nolock( - local_storage, selem, &free_selem_list); + smp_mb(); } - raw_spin_unlock_irqrestore(&local_storage->lock, flags); - - bpf_selem_free_list(&free_selem_list, true); - if (free_storage) - bpf_local_storage_free(local_storage, true); + return local_storage->mem_charge; } u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) @@ -736,7 +880,7 @@ bpf_local_storage_map_alloc(union bpf_attr *attr, for (i = 0; i < nbuckets; i++) { INIT_HLIST_HEAD(&smap->buckets[i].list); - raw_spin_lock_init(&smap->buckets[i].lock); + raw_res_spin_lock_init(&smap->buckets[i].lock); } smap->elem_size = offsetof(struct bpf_local_storage_elem, @@ -758,8 +902,7 @@ free_smap: } void bpf_local_storage_map_free(struct bpf_map *map, - struct bpf_local_storage_cache *cache, - int __percpu *busy_counter) + struct bpf_local_storage_cache *cache) { struct bpf_local_storage_map_bucket *b; struct bpf_local_storage_elem *selem; @@ -789,15 +932,14 @@ void bpf_local_storage_map_free(struct bpf_map *map, rcu_read_lock(); /* No one is adding to b->list now */ - while ((selem = hlist_entry_safe( - rcu_dereference_raw(hlist_first_rcu(&b->list)), - struct bpf_local_storage_elem, map_node))) { - if (busy_counter) - this_cpu_inc(*busy_counter); - bpf_selem_unlink(selem, true); - if (busy_counter) - this_cpu_dec(*busy_counter); - cond_resched_rcu(); +restart: + hlist_for_each_entry_rcu(selem, &b->list, map_node) { + bpf_selem_unlink_nofail(selem, b); + + if (need_resched()) { + cond_resched_rcu(); + goto restart; + } } rcu_read_unlock(); } diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index a1dc1bf0848a..605506792b5b 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -20,29 +20,6 @@ DEFINE_BPF_STORAGE_CACHE(task_cache); -static DEFINE_PER_CPU(int, bpf_task_storage_busy); - -static void bpf_task_storage_lock(void) -{ - cant_migrate(); - this_cpu_inc(bpf_task_storage_busy); -} - -static void bpf_task_storage_unlock(void) -{ - this_cpu_dec(bpf_task_storage_busy); -} - -static bool bpf_task_storage_trylock(void) -{ - cant_migrate(); - if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) { - this_cpu_dec(bpf_task_storage_busy); - return false; - } - return true; -} - static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) { struct task_struct *task = owner; @@ -70,17 +47,15 @@ void bpf_task_storage_free(struct task_struct *task) { struct bpf_local_storage *local_storage; - rcu_read_lock_dont_migrate(); + rcu_read_lock(); local_storage = rcu_dereference(task->bpf_storage); if (!local_storage) goto out; - bpf_task_storage_lock(); bpf_local_storage_destroy(local_storage); - bpf_task_storage_unlock(); out: - rcu_read_unlock_migrate(); + rcu_read_unlock(); } static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) @@ -106,9 +81,7 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) goto out; } - bpf_task_storage_lock(); sdata = task_storage_lookup(task, map, true); - bpf_task_storage_unlock(); put_pid(pid); return sdata ? sdata->data : NULL; out: @@ -143,11 +116,9 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, goto out; } - bpf_task_storage_lock(); sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, map_flags, true, GFP_ATOMIC); - bpf_task_storage_unlock(); err = PTR_ERR_OR_ZERO(sdata); out: @@ -155,8 +126,7 @@ out: return err; } -static int task_storage_delete(struct task_struct *task, struct bpf_map *map, - bool nobusy) +static int task_storage_delete(struct task_struct *task, struct bpf_map *map) { struct bpf_local_storage_data *sdata; @@ -164,12 +134,7 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map, if (!sdata) return -ENOENT; - if (!nobusy) - return -EBUSY; - - bpf_selem_unlink(SELEM(sdata), false); - - return 0; + return bpf_selem_unlink(SELEM(sdata)); } static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) @@ -194,111 +159,50 @@ static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) goto out; } - bpf_task_storage_lock(); - err = task_storage_delete(task, map, true); - bpf_task_storage_unlock(); + err = task_storage_delete(task, map); out: put_pid(pid); return err; } -/* Called by bpf_task_storage_get*() helpers */ -static void *__bpf_task_storage_get(struct bpf_map *map, - struct task_struct *task, void *value, - u64 flags, gfp_t gfp_flags, bool nobusy) +/* *gfp_flags* is a hidden argument provided by the verifier */ +BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, + task, void *, value, u64, flags, gfp_t, gfp_flags) { struct bpf_local_storage_data *sdata; - sdata = task_storage_lookup(task, map, nobusy); + WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) + return (unsigned long)NULL; + + sdata = task_storage_lookup(task, map, true); if (sdata) - return sdata->data; + return (unsigned long)sdata->data; /* only allocate new storage, when the task is refcounted */ if (refcount_read(&task->usage) && - (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) { + (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) { sdata = bpf_local_storage_update( task, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST, false, gfp_flags); - return IS_ERR(sdata) ? NULL : sdata->data; + return IS_ERR(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data; } - return NULL; -} - -/* *gfp_flags* is a hidden argument provided by the verifier */ -BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *, - task, void *, value, u64, flags, gfp_t, gfp_flags) -{ - bool nobusy; - void *data; - - WARN_ON_ONCE(!bpf_rcu_lock_held()); - if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) - return (unsigned long)NULL; - - nobusy = bpf_task_storage_trylock(); - data = __bpf_task_storage_get(map, task, value, flags, - gfp_flags, nobusy); - if (nobusy) - bpf_task_storage_unlock(); - return (unsigned long)data; -} - -/* *gfp_flags* is a hidden argument provided by the verifier */ -BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, - task, void *, value, u64, flags, gfp_t, gfp_flags) -{ - void *data; - - WARN_ON_ONCE(!bpf_rcu_lock_held()); - if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task) - return (unsigned long)NULL; - - bpf_task_storage_lock(); - data = __bpf_task_storage_get(map, task, value, flags, - gfp_flags, true); - bpf_task_storage_unlock(); - return (unsigned long)data; -} - -BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *, - task) -{ - bool nobusy; - int ret; - - WARN_ON_ONCE(!bpf_rcu_lock_held()); - if (!task) - return -EINVAL; - - nobusy = bpf_task_storage_trylock(); - /* This helper must only be called from places where the lifetime of the task - * is guaranteed. Either by being refcounted or by being protected - * by an RCU read-side critical section. - */ - ret = task_storage_delete(task, map, nobusy); - if (nobusy) - bpf_task_storage_unlock(); - return ret; + return (unsigned long)NULL; } BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, task) { - int ret; - WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!task) return -EINVAL; - bpf_task_storage_lock(); /* This helper must only be called from places where the lifetime of the task * is guaranteed. Either by being refcounted or by being protected * by an RCU read-side critical section. */ - ret = task_storage_delete(task, map, true); - bpf_task_storage_unlock(); - return ret; + return task_storage_delete(task, map); } static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) @@ -313,7 +217,7 @@ static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) static void task_storage_map_free(struct bpf_map *map) { - bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy); + bpf_local_storage_map_free(map, &task_cache); } BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map) @@ -332,17 +236,6 @@ const struct bpf_map_ops task_storage_map_ops = { .map_owner_storage_ptr = task_storage_ptr, }; -const struct bpf_func_proto bpf_task_storage_get_recur_proto = { - .func = bpf_task_storage_get_recur, - .gpl_only = false, - .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, - .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], - .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, - .arg4_type = ARG_ANYTHING, -}; - const struct bpf_func_proto bpf_task_storage_get_proto = { .func = bpf_task_storage_get, .gpl_only = false, @@ -354,15 +247,6 @@ const struct bpf_func_proto bpf_task_storage_get_proto = { .arg4_type = ARG_ANYTHING, }; -const struct bpf_func_proto bpf_task_storage_delete_recur_proto = { - .func = bpf_task_storage_delete_recur, - .gpl_only = false, - .ret_type = RET_INTEGER, - .arg1_type = ARG_CONST_MAP_PTR, - .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, - .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], -}; - const struct bpf_func_proto bpf_task_storage_delete_proto = { .func = bpf_task_storage_delete, .gpl_only = false, diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 0458597134da..7ac32798eb04 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2167,12 +2167,8 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_cgroup_classid_curr_proto; #endif case BPF_FUNC_task_storage_get: - if (bpf_prog_check_recur(prog)) - return &bpf_task_storage_get_recur_proto; return &bpf_task_storage_get_proto; case BPF_FUNC_task_storage_delete: - if (bpf_prog_check_recur(prog)) - return &bpf_task_storage_delete_recur_proto; return &bpf_task_storage_delete_proto; default: break; diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index de111818f3a0..1eb3e060994e 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -40,29 +40,30 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) if (!sdata) return -ENOENT; - bpf_selem_unlink(SELEM(sdata), false); - - return 0; + return bpf_selem_unlink(SELEM(sdata)); } /* Called by __sk_destruct() & bpf_sk_storage_clone() */ void bpf_sk_storage_free(struct sock *sk) { struct bpf_local_storage *sk_storage; + u32 uncharge; rcu_read_lock_dont_migrate(); sk_storage = rcu_dereference(sk->sk_bpf_storage); if (!sk_storage) goto out; - bpf_local_storage_destroy(sk_storage); + uncharge = bpf_local_storage_destroy(sk_storage); + if (uncharge) + atomic_sub(uncharge, &sk->sk_omem_alloc); out: rcu_read_unlock_migrate(); } static void bpf_sk_storage_map_free(struct bpf_map *map) { - bpf_local_storage_map_free(map, &sk_cache, NULL); + bpf_local_storage_map_free(map, &sk_cache); } static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) @@ -191,7 +192,14 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) } if (new_sk_storage) { - bpf_selem_link_map(smap, copy_selem); + ret = bpf_selem_link_map(smap, new_sk_storage, copy_selem); + if (ret) { + bpf_selem_free(copy_selem, true); + atomic_sub(smap->elem_size, + &newsk->sk_omem_alloc); + bpf_map_put(map); + goto out; + } bpf_selem_link_storage_nolock(new_sk_storage, copy_selem); } else { ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC); diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c deleted file mode 100644 index a4121d2248ac..000000000000 --- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c +++ /dev/null @@ -1,128 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2022. Huawei Technologies Co., Ltd */ -#define _GNU_SOURCE -#include <sched.h> -#include <unistd.h> -#include <stdlib.h> -#include <stdbool.h> -#include <errno.h> -#include <string.h> -#include <pthread.h> - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "bpf_util.h" -#include "test_maps.h" -#include "task_local_storage_helpers.h" -#include "read_bpf_task_storage_busy.skel.h" - -struct lookup_ctx { - bool start; - bool stop; - int pid_fd; - int map_fd; - int loop; -}; - -static void *lookup_fn(void *arg) -{ - struct lookup_ctx *ctx = arg; - long value; - int i = 0; - - while (!ctx->start) - usleep(1); - - while (!ctx->stop && i++ < ctx->loop) - bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value); - return NULL; -} - -static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr) -{ - unsigned int i; - - ctx->stop = true; - ctx->start = true; - for (i = 0; i < nr; i++) - pthread_join(tids[i], NULL); -} - -void test_task_storage_map_stress_lookup(void) -{ -#define MAX_NR_THREAD 4096 - unsigned int i, nr = 256, loop = 8192, cpu = 0; - struct read_bpf_task_storage_busy *skel; - pthread_t tids[MAX_NR_THREAD]; - struct lookup_ctx ctx; - cpu_set_t old, new; - const char *cfg; - int err; - - cfg = getenv("TASK_STORAGE_MAP_NR_THREAD"); - if (cfg) { - nr = atoi(cfg); - if (nr > MAX_NR_THREAD) - nr = MAX_NR_THREAD; - } - cfg = getenv("TASK_STORAGE_MAP_NR_LOOP"); - if (cfg) - loop = atoi(cfg); - cfg = getenv("TASK_STORAGE_MAP_PIN_CPU"); - if (cfg) - cpu = atoi(cfg); - - skel = read_bpf_task_storage_busy__open_and_load(); - err = libbpf_get_error(skel); - CHECK(err, "open_and_load", "error %d\n", err); - - /* Only for a fully preemptible kernel */ - if (!skel->kconfig->CONFIG_PREEMPTION) { - printf("%s SKIP (no CONFIG_PREEMPTION)\n", __func__); - read_bpf_task_storage_busy__destroy(skel); - skips++; - return; - } - - /* Save the old affinity setting */ - sched_getaffinity(getpid(), sizeof(old), &old); - - /* Pinned on a specific CPU */ - CPU_ZERO(&new); - CPU_SET(cpu, &new); - sched_setaffinity(getpid(), sizeof(new), &new); - - ctx.start = false; - ctx.stop = false; - ctx.pid_fd = sys_pidfd_open(getpid(), 0); - ctx.map_fd = bpf_map__fd(skel->maps.task); - ctx.loop = loop; - for (i = 0; i < nr; i++) { - err = pthread_create(&tids[i], NULL, lookup_fn, &ctx); - if (err) { - abort_lookup(&ctx, tids, i); - CHECK(err, "pthread_create", "error %d\n", err); - goto out; - } - } - - ctx.start = true; - for (i = 0; i < nr; i++) - pthread_join(tids[i], NULL); - - skel->bss->pid = getpid(); - err = read_bpf_task_storage_busy__attach(skel); - CHECK(err, "attach", "error %d\n", err); - - /* Trigger program */ - sys_gettid(); - skel->bss->pid = 0; - - CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy); -out: - read_bpf_task_storage_busy__destroy(skel); - /* Restore affinity setting */ - sched_setaffinity(getpid(), sizeof(old), &old); - printf("%s:PASS\n", __func__); -} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 10cba526d3e6..f1642794f70e 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -875,8 +875,8 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT, "int cpu_number = (int)100", 100); #endif - TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_cgrp_storage_busy", int, BTF_F_COMPACT, - "static int bpf_cgrp_storage_busy = (int)2", 2); + TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_bprintf_nest_level", int, BTF_F_COMPACT, + "static int bpf_bprintf_nest_level = (int)2", 2); } struct btf_dump_string_ctx { diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index 42e822ea352f..7bee33797c71 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -112,24 +112,24 @@ static void test_recursion(void) task_ls_recursion__detach(skel); /* Refer to the comment in BPF_PROG(on_update) for - * the explanation on the value 201 and 100. + * the explanation on the value 200 and 1. */ map_fd = bpf_map__fd(skel->maps.map_a); err = bpf_map_lookup_elem(map_fd, &task_fd, &value); ASSERT_OK(err, "lookup map_a"); - ASSERT_EQ(value, 201, "map_a value"); - ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy"); + ASSERT_EQ(value, 200, "map_a value"); + ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy"); map_fd = bpf_map__fd(skel->maps.map_b); err = bpf_map_lookup_elem(map_fd, &task_fd, &value); ASSERT_OK(err, "lookup map_b"); - ASSERT_EQ(value, 100, "map_b value"); + ASSERT_EQ(value, 1, "map_b value"); prog_fd = bpf_program__fd(skel->progs.on_update); memset(&info, 0, sizeof(info)); err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); ASSERT_OK(err, "get prog info"); - ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion"); + ASSERT_EQ(info.recursion_misses, 2, "on_update prog recursion"); prog_fd = bpf_program__fd(skel->progs.on_enter); memset(&info, 0, sizeof(info)); diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c index 637e75df2e14..d0be77011a84 100644 --- a/tools/testing/selftests/bpf/progs/local_storage.c +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -62,7 +62,6 @@ SEC("lsm/inode_unlink") int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) { __u32 pid = bpf_get_current_pid_tgid() >> 32; - struct bpf_local_storage *local_storage; struct local_storage *storage; struct task_struct *task; bool is_self_unlink; @@ -88,15 +87,10 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) if (!storage || storage->value) return 0; - if (bpf_task_storage_delete(&task_storage_map, task)) + if (bpf_task_storage_delete(&task_storage_map2, task)) return 0; - /* Ensure that the task_storage_map is disconnected from the storage. - * The storage memory should not be freed back to the - * bpf_mem_alloc. - */ - local_storage = task->bpf_storage; - if (!local_storage || local_storage->smap) + if (bpf_task_storage_delete(&task_storage_map, task)) return 0; task_storage_result = 0; @@ -164,18 +158,9 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, if (bpf_sk_storage_delete(&sk_storage_map2, sk)) return 0; - storage = bpf_sk_storage_get(&sk_storage_map2, sk, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); - if (!storage) - return 0; - if (bpf_sk_storage_delete(&sk_storage_map, sk)) return 0; - /* Ensure that the sk_storage_map is disconnected from the storage. */ - if (!sk->sk_bpf_storage || sk->sk_bpf_storage->smap) - return 0; - sk_storage_result = 0; return 0; } diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c deleted file mode 100644 index 69da05bb6c63..000000000000 --- a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2022. Huawei Technologies Co., Ltd */ -#include "vmlinux.h" -#include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> - -extern bool CONFIG_PREEMPTION __kconfig __weak; -extern const int bpf_task_storage_busy __ksym; - -char _license[] SEC("license") = "GPL"; - -int pid = 0; -int busy = 0; - -struct { - __uint(type, BPF_MAP_TYPE_TASK_STORAGE); - __uint(map_flags, BPF_F_NO_PREALLOC); - __type(key, int); - __type(value, long); -} task SEC(".maps"); - -SEC("raw_tp/sys_enter") -int BPF_PROG(read_bpf_task_storage_busy) -{ - int *value; - - if (!CONFIG_PREEMPTION) - return 0; - - if (bpf_get_current_pid_tgid() >> 32 != pid) - return 0; - - value = bpf_this_cpu_ptr(&bpf_task_storage_busy); - if (value) - busy = *value; - - return 0; -} diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c index 46d6eb2a3b17..c8f4815c8dfb 100644 --- a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c +++ b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c @@ -6,7 +6,6 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_core_read.h> -void *local_storage_ptr = NULL; void *sk_ptr = NULL; int cookie_found = 0; __u64 cookie = 0; @@ -19,21 +18,17 @@ struct { __type(value, int); } sk_storage SEC(".maps"); -SEC("fexit/bpf_local_storage_destroy") -int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage) +SEC("fexit/bpf_sk_storage_free") +int BPF_PROG(bpf_sk_storage_free, struct sock *sk) { - struct sock *sk; - - if (local_storage_ptr != local_storage) + if (sk_ptr != sk) return 0; - sk = bpf_core_cast(sk_ptr, struct sock); if (sk->sk_cookie.counter != cookie) return 0; cookie_found++; omem = sk->sk_omem_alloc.counter; - local_storage_ptr = NULL; return 0; } @@ -50,7 +45,6 @@ int BPF_PROG(inet6_sock_destruct, struct sock *sk) if (value && *value == 0xdeadbeef) { cookie_found++; sk_ptr = sk; - local_storage_ptr = sk->sk_bpf_storage; } return 0; diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c index f1853c38aada..b37359432692 100644 --- a/tools/testing/selftests/bpf/progs/task_ls_recursion.c +++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c @@ -36,14 +36,9 @@ int BPF_PROG(on_update) if (!test_pid || task->pid != test_pid) return 0; + /* This will succeed as there is no real deadlock */ ptr = bpf_task_storage_get(&map_a, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); - /* ptr will not be NULL when it is called from - * the bpf_task_storage_get(&map_b,...F_CREATE) in - * the BPF_PROG(on_enter) below. It is because - * the value can be found in map_a and the kernel - * does not need to acquire any spin_lock. - */ if (ptr) { int err; @@ -53,12 +48,7 @@ int BPF_PROG(on_update) nr_del_errs++; } - /* This will still fail because map_b is empty and - * this BPF_PROG(on_update) has failed to acquire - * the percpu busy lock => meaning potential - * deadlock is detected and it will fail to create - * new storage. - */ + /* This will succeed as there is no real deadlock */ ptr = bpf_task_storage_get(&map_b, task, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c index 986829aaf73a..6ce98fe9f387 100644 --- a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c +++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c @@ -1,15 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include "vmlinux.h" +#include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; -#ifndef EBUSY -#define EBUSY 16 -#endif - extern bool CONFIG_PREEMPTION __kconfig __weak; int nr_get_errs = 0; int nr_del_errs = 0; @@ -40,7 +37,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, ret = bpf_task_storage_delete(&task_storage, bpf_get_current_task_btf()); - if (ret == -EBUSY) + if (ret == -EDEADLK || ret == -ETIMEDOUT) __sync_fetch_and_add(&nr_del_errs, 1); return 0; |
