summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/bpf_local_storage.h29
-rw-r--r--kernel/bpf/bpf_cgrp_storage.c62
-rw-r--r--kernel/bpf/bpf_inode_storage.c6
-rw-r--r--kernel/bpf/bpf_local_storage.c408
-rw-r--r--kernel/bpf/bpf_task_storage.c154
-rw-r--r--kernel/bpf/helpers.c4
-rw-r--r--net/core/bpf_sk_storage.c20
-rw-r--r--tools/testing/selftests/bpf/map_tests/task_storage_map.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_storage.c10
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage.c19
-rw-r--r--tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c38
-rw-r--r--tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c12
-rw-r--r--tools/testing/selftests/bpf/progs/task_ls_recursion.c14
-rw-r--r--tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c7
15 files changed, 354 insertions, 561 deletions
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 66432248cd81..85efa9772530 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -15,12 +15,13 @@
#include <linux/types.h>
#include <linux/bpf_mem_alloc.h>
#include <uapi/linux/btf.h>
+#include <asm/rqspinlock.h>
#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
struct bpf_local_storage_map_bucket {
struct hlist_head list;
- raw_spinlock_t lock;
+ rqspinlock_t lock;
};
/* Thp map is not the primary owner of a bpf_local_storage_elem.
@@ -67,6 +68,11 @@ struct bpf_local_storage_data {
u8 data[] __aligned(8);
};
+#define SELEM_MAP_UNLINKED (1 << 0)
+#define SELEM_STORAGE_UNLINKED (1 << 1)
+#define SELEM_UNLINKED (SELEM_MAP_UNLINKED | SELEM_STORAGE_UNLINKED)
+#define SELEM_TOFREE (1 << 2)
+
/* Linked to bpf_local_storage and bpf_local_storage_map */
struct bpf_local_storage_elem {
struct hlist_node map_node; /* Linked to bpf_local_storage_map */
@@ -79,7 +85,9 @@ struct bpf_local_storage_elem {
* after raw_spin_unlock
*/
};
- /* 8 bytes hole */
+ atomic_t state;
+ bool use_kmalloc_nolock;
+ /* 3 bytes hole */
/* The data is stored in another cacheline to minimize
* the number of cachelines access during a cache hit.
*/
@@ -88,13 +96,14 @@ struct bpf_local_storage_elem {
struct bpf_local_storage {
struct bpf_local_storage_data __rcu *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
- struct bpf_local_storage_map __rcu *smap;
struct hlist_head list; /* List of bpf_local_storage_elem */
void *owner; /* The object that owns the above "list" of
* bpf_local_storage_elem.
*/
struct rcu_head rcu;
- raw_spinlock_t lock; /* Protect adding/removing from the "list" */
+ rqspinlock_t lock; /* Protect adding/removing from the "list" */
+ u64 mem_charge; /* Copy of mem charged to owner. Protected by "lock" */
+ refcount_t owner_refcnt;/* Used to pin owner when map_free is uncharging */
bool use_kmalloc_nolock;
};
@@ -162,11 +171,10 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
return SDATA(selem);
}
-void bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
+u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage);
void bpf_local_storage_map_free(struct bpf_map *map,
- struct bpf_local_storage_cache *cache,
- int __percpu *busy_counter);
+ struct bpf_local_storage_cache *cache);
int bpf_local_storage_map_check_btf(const struct bpf_map *map,
const struct btf *btf,
@@ -176,10 +184,11 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem);
-void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now);
+int bpf_selem_unlink(struct bpf_local_storage_elem *selem);
-void bpf_selem_link_map(struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *selem);
+int bpf_selem_link_map(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem);
struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
diff --git a/kernel/bpf/bpf_cgrp_storage.c b/kernel/bpf/bpf_cgrp_storage.c
index 0687a760974a..c2a2ead1f466 100644
--- a/kernel/bpf/bpf_cgrp_storage.c
+++ b/kernel/bpf/bpf_cgrp_storage.c
@@ -11,29 +11,6 @@
DEFINE_BPF_STORAGE_CACHE(cgroup_cache);
-static DEFINE_PER_CPU(int, bpf_cgrp_storage_busy);
-
-static void bpf_cgrp_storage_lock(void)
-{
- cant_migrate();
- this_cpu_inc(bpf_cgrp_storage_busy);
-}
-
-static void bpf_cgrp_storage_unlock(void)
-{
- this_cpu_dec(bpf_cgrp_storage_busy);
-}
-
-static bool bpf_cgrp_storage_trylock(void)
-{
- cant_migrate();
- if (unlikely(this_cpu_inc_return(bpf_cgrp_storage_busy) != 1)) {
- this_cpu_dec(bpf_cgrp_storage_busy);
- return false;
- }
- return true;
-}
-
static struct bpf_local_storage __rcu **cgroup_storage_ptr(void *owner)
{
struct cgroup *cg = owner;
@@ -45,16 +22,14 @@ void bpf_cgrp_storage_free(struct cgroup *cgroup)
{
struct bpf_local_storage *local_storage;
- rcu_read_lock_dont_migrate();
+ rcu_read_lock();
local_storage = rcu_dereference(cgroup->bpf_cgrp_storage);
if (!local_storage)
goto out;
- bpf_cgrp_storage_lock();
bpf_local_storage_destroy(local_storage);
- bpf_cgrp_storage_unlock();
out:
- rcu_read_unlock_migrate();
+ rcu_read_unlock();
}
static struct bpf_local_storage_data *
@@ -83,9 +58,7 @@ static void *bpf_cgrp_storage_lookup_elem(struct bpf_map *map, void *key)
if (IS_ERR(cgroup))
return ERR_CAST(cgroup);
- bpf_cgrp_storage_lock();
sdata = cgroup_storage_lookup(cgroup, map, true);
- bpf_cgrp_storage_unlock();
cgroup_put(cgroup);
return sdata ? sdata->data : NULL;
}
@@ -102,10 +75,8 @@ static long bpf_cgrp_storage_update_elem(struct bpf_map *map, void *key,
if (IS_ERR(cgroup))
return PTR_ERR(cgroup);
- bpf_cgrp_storage_lock();
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
value, map_flags, false, GFP_ATOMIC);
- bpf_cgrp_storage_unlock();
cgroup_put(cgroup);
return PTR_ERR_OR_ZERO(sdata);
}
@@ -118,8 +89,7 @@ static int cgroup_storage_delete(struct cgroup *cgroup, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata), false);
- return 0;
+ return bpf_selem_unlink(SELEM(sdata));
}
static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
@@ -132,9 +102,7 @@ static long bpf_cgrp_storage_delete_elem(struct bpf_map *map, void *key)
if (IS_ERR(cgroup))
return PTR_ERR(cgroup);
- bpf_cgrp_storage_lock();
err = cgroup_storage_delete(cgroup, map);
- bpf_cgrp_storage_unlock();
cgroup_put(cgroup);
return err;
}
@@ -151,7 +119,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
static void cgroup_storage_map_free(struct bpf_map *map)
{
- bpf_local_storage_map_free(map, &cgroup_cache, &bpf_cgrp_storage_busy);
+ bpf_local_storage_map_free(map, &cgroup_cache);
}
/* *gfp_flags* is a hidden argument provided by the verifier */
@@ -159,7 +127,6 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
- bool nobusy;
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE))
@@ -168,38 +135,27 @@ BPF_CALL_5(bpf_cgrp_storage_get, struct bpf_map *, map, struct cgroup *, cgroup,
if (!cgroup)
return (unsigned long)NULL;
- nobusy = bpf_cgrp_storage_trylock();
-
- sdata = cgroup_storage_lookup(cgroup, map, nobusy);
+ sdata = cgroup_storage_lookup(cgroup, map, true);
if (sdata)
- goto unlock;
+ goto out;
/* only allocate new storage, when the cgroup is refcounted */
if (!percpu_ref_is_dying(&cgroup->self.refcnt) &&
- (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy)
+ (flags & BPF_LOCAL_STORAGE_GET_F_CREATE))
sdata = bpf_local_storage_update(cgroup, (struct bpf_local_storage_map *)map,
value, BPF_NOEXIST, false, gfp_flags);
-unlock:
- if (nobusy)
- bpf_cgrp_storage_unlock();
+out:
return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
}
BPF_CALL_2(bpf_cgrp_storage_delete, struct bpf_map *, map, struct cgroup *, cgroup)
{
- int ret;
-
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!cgroup)
return -EINVAL;
- if (!bpf_cgrp_storage_trylock())
- return -EBUSY;
-
- ret = cgroup_storage_delete(cgroup, map);
- bpf_cgrp_storage_unlock();
- return ret;
+ return cgroup_storage_delete(cgroup, map);
}
const struct bpf_map_ops cgrp_storage_map_ops = {
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index e54cce2b9175..e86734609f3d 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -110,9 +110,7 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata), false);
-
- return 0;
+ return bpf_selem_unlink(SELEM(sdata));
}
static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key)
@@ -186,7 +184,7 @@ static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
static void inode_storage_map_free(struct bpf_map *map)
{
- bpf_local_storage_map_free(map, &inode_cache, NULL);
+ bpf_local_storage_map_free(map, &inode_cache);
}
const struct bpf_map_ops inode_storage_map_ops = {
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index e2fe6c32822b..b28f07d3a0db 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -19,9 +19,9 @@
static struct bpf_local_storage_map_bucket *
select_bucket(struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *selem)
+ struct bpf_local_storage *local_storage)
{
- return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
+ return &smap->buckets[hash_ptr(local_storage, smap->bucket_log)];
}
static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
@@ -61,11 +61,6 @@ static bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
return !hlist_unhashed(&selem->snode);
}
-static bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem)
-{
- return !hlist_unhashed_lockless(&selem->map_node);
-}
-
static bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
{
return !hlist_unhashed(&selem->map_node);
@@ -90,6 +85,8 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
if (selem) {
RCU_INIT_POINTER(SDATA(selem)->smap, smap);
+ atomic_set(&selem->state, 0);
+ selem->use_kmalloc_nolock = smap->use_kmalloc_nolock;
if (value) {
/* No need to call check_and_init_map_value as memory is zero init */
@@ -198,9 +195,11 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
/* The bpf_local_storage_map_free will wait for rcu_barrier */
smap = rcu_dereference_check(SDATA(selem)->smap, 1);
- migrate_disable();
- bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
- migrate_enable();
+ if (smap) {
+ migrate_disable();
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ migrate_enable();
+ }
kfree_nolock(selem);
}
@@ -219,13 +218,14 @@ void bpf_selem_free(struct bpf_local_storage_elem *selem,
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
- if (!smap->use_kmalloc_nolock) {
+ if (!selem->use_kmalloc_nolock) {
/*
* No uptr will be unpin even when reuse_now == false since uptr
* is only supported in task local storage, where
* smap->use_kmalloc_nolock == true.
*/
- bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ if (smap)
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
__bpf_selem_free(selem, reuse_now);
return;
}
@@ -256,6 +256,36 @@ static void bpf_selem_free_list(struct hlist_head *list, bool reuse_now)
bpf_selem_free(selem, reuse_now);
}
+static void bpf_selem_unlink_storage_nolock_misc(struct bpf_local_storage_elem *selem,
+ struct bpf_local_storage_map *smap,
+ struct bpf_local_storage *local_storage,
+ bool free_local_storage, bool pin_owner)
+{
+ void *owner = local_storage->owner;
+ u32 uncharge = smap->elem_size;
+
+ if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
+ SDATA(selem))
+ RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
+
+ if (pin_owner && !refcount_inc_not_zero(&local_storage->owner_refcnt))
+ return;
+
+ uncharge += free_local_storage ? sizeof(*local_storage) : 0;
+ mem_uncharge(smap, local_storage->owner, uncharge);
+ local_storage->mem_charge -= uncharge;
+
+ if (free_local_storage) {
+ local_storage->owner = NULL;
+
+ /* After this RCU_INIT, owner may be freed and cannot be used */
+ RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
+ }
+
+ if (pin_owner)
+ refcount_dec(&local_storage->owner_refcnt);
+}
+
/* local_storage->lock must be held and selem->local_storage == local_storage.
* The caller must ensure selem->smap is still valid to be
* dereferenced for its smap->elem_size and smap->cache_idx.
@@ -266,124 +296,219 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
{
struct bpf_local_storage_map *smap;
bool free_local_storage;
- void *owner;
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
- owner = local_storage->owner;
-
- /* All uncharging on the owner must be done first.
- * The owner may be freed once the last selem is unlinked
- * from local_storage.
- */
- mem_uncharge(smap, owner, smap->elem_size);
free_local_storage = hlist_is_singular_node(&selem->snode,
&local_storage->list);
- if (free_local_storage) {
- mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
- local_storage->owner = NULL;
- /* After this RCU_INIT, owner may be freed and cannot be used */
- RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
+ bpf_selem_unlink_storage_nolock_misc(selem, smap, local_storage,
+ free_local_storage, false);
- /* local_storage is not freed now. local_storage->lock is
- * still held and raw_spin_unlock_bh(&local_storage->lock)
- * will be done by the caller.
- *
- * Although the unlock will be done under
- * rcu_read_lock(), it is more intuitive to
- * read if the freeing of the storage is done
- * after the raw_spin_unlock_bh(&local_storage->lock).
- *
- * Hence, a "bool free_local_storage" is returned
- * to the caller which then calls then frees the storage after
- * all the RCU grace periods have expired.
- */
- }
hlist_del_init_rcu(&selem->snode);
- if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
- SDATA(selem))
- RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
hlist_add_head(&selem->free_node, free_selem_list);
- if (rcu_access_pointer(local_storage->smap) == smap)
- RCU_INIT_POINTER(local_storage->smap, NULL);
-
return free_local_storage;
}
-static void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
- bool reuse_now)
+void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem)
+{
+ struct bpf_local_storage_map *smap;
+
+ smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
+ local_storage->mem_charge += smap->elem_size;
+
+ RCU_INIT_POINTER(selem->local_storage, local_storage);
+ hlist_add_head_rcu(&selem->snode, &local_storage->list);
+}
+
+static int bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+{
+ struct bpf_local_storage *local_storage;
+ struct bpf_local_storage_map *smap;
+ struct bpf_local_storage_map_bucket *b;
+ unsigned long flags;
+ int err;
+
+ local_storage = rcu_dereference_check(selem->local_storage,
+ bpf_rcu_lock_held());
+ smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
+ b = select_bucket(smap, local_storage);
+ err = raw_res_spin_lock_irqsave(&b->lock, flags);
+ if (err)
+ return err;
+
+ hlist_del_init_rcu(&selem->map_node);
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
+
+ return 0;
+}
+
+static void bpf_selem_unlink_map_nolock(struct bpf_local_storage_elem *selem)
+{
+ hlist_del_init_rcu(&selem->map_node);
+}
+
+int bpf_selem_link_map(struct bpf_local_storage_map *smap,
+ struct bpf_local_storage *local_storage,
+ struct bpf_local_storage_elem *selem)
+{
+ struct bpf_local_storage_map_bucket *b;
+ unsigned long flags;
+ int err;
+
+ b = select_bucket(smap, local_storage);
+
+ err = raw_res_spin_lock_irqsave(&b->lock, flags);
+ if (err)
+ return err;
+
+ hlist_add_head_rcu(&selem->map_node, &b->list);
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
+
+ return 0;
+}
+
+static void bpf_selem_link_map_nolock(struct bpf_local_storage_map_bucket *b,
+ struct bpf_local_storage_elem *selem)
+{
+ hlist_add_head_rcu(&selem->map_node, &b->list);
+}
+
+/*
+ * Unlink an selem from map and local storage with lock held.
+ * This is the common path used by local storages to delete an selem.
+ */
+int bpf_selem_unlink(struct bpf_local_storage_elem *selem)
{
struct bpf_local_storage *local_storage;
bool free_local_storage = false;
HLIST_HEAD(selem_free_list);
unsigned long flags;
+ int err;
if (unlikely(!selem_linked_to_storage_lockless(selem)))
/* selem has already been unlinked from sk */
- return;
+ return 0;
local_storage = rcu_dereference_check(selem->local_storage,
bpf_rcu_lock_held());
- raw_spin_lock_irqsave(&local_storage->lock, flags);
- if (likely(selem_linked_to_storage(selem)))
+ err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
+ if (err)
+ return err;
+
+ if (likely(selem_linked_to_storage(selem))) {
+ /* Always unlink from map before unlinking from local_storage
+ * because selem will be freed after successfully unlinked from
+ * the local_storage.
+ */
+ err = bpf_selem_unlink_map(selem);
+ if (err)
+ goto out;
+
free_local_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, &selem_free_list);
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+ }
+out:
+ raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
- bpf_selem_free_list(&selem_free_list, reuse_now);
+ bpf_selem_free_list(&selem_free_list, false);
if (free_local_storage)
- bpf_local_storage_free(local_storage, reuse_now);
-}
+ bpf_local_storage_free(local_storage, false);
-void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
- struct bpf_local_storage_elem *selem)
-{
- RCU_INIT_POINTER(selem->local_storage, local_storage);
- hlist_add_head_rcu(&selem->snode, &local_storage->list);
+ return err;
}
-static void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
+/*
+ * Unlink an selem from map and local storage with lockless fallback if callers
+ * are racing or rqspinlock returns error. It should only be called by
+ * bpf_local_storage_destroy() or bpf_local_storage_map_free().
+ */
+static void bpf_selem_unlink_nofail(struct bpf_local_storage_elem *selem,
+ struct bpf_local_storage_map_bucket *b)
{
+ bool in_map_free = !!b, free_storage = false;
+ struct bpf_local_storage *local_storage;
struct bpf_local_storage_map *smap;
- struct bpf_local_storage_map_bucket *b;
unsigned long flags;
+ int err, unlink = 0;
- if (unlikely(!selem_linked_to_map_lockless(selem)))
- /* selem has already be unlinked from smap */
- return;
-
+ local_storage = rcu_dereference_check(selem->local_storage, bpf_rcu_lock_held());
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
- b = select_bucket(smap, selem);
- raw_spin_lock_irqsave(&b->lock, flags);
- if (likely(selem_linked_to_map(selem)))
- hlist_del_init_rcu(&selem->map_node);
- raw_spin_unlock_irqrestore(&b->lock, flags);
-}
-void bpf_selem_link_map(struct bpf_local_storage_map *smap,
- struct bpf_local_storage_elem *selem)
-{
- struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
- unsigned long flags;
+ if (smap) {
+ b = b ? : select_bucket(smap, local_storage);
+ err = raw_res_spin_lock_irqsave(&b->lock, flags);
+ if (!err) {
+ /*
+ * Call bpf_obj_free_fields() under b->lock to make sure it is done
+ * exactly once for an selem. Safe to free special fields immediately
+ * as no BPF program should be referencing the selem.
+ */
+ if (likely(selem_linked_to_map(selem))) {
+ hlist_del_init_rcu(&selem->map_node);
+ bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
+ unlink++;
+ }
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
+ }
+ /*
+ * Highly unlikely scenario: resource leak
+ *
+ * When map_free(selem1), destroy(selem1) and destroy(selem2) are racing
+ * and both selem belong to the same bucket, if destroy(selem2) acquired
+ * b->lock and block for too long, neither map_free(selem1) and
+ * destroy(selem1) will be able to free the special field associated
+ * with selem1 as raw_res_spin_lock_irqsave() returns -ETIMEDOUT.
+ */
+ WARN_ON_ONCE(err && in_map_free);
+ if (!err || in_map_free)
+ RCU_INIT_POINTER(SDATA(selem)->smap, NULL);
+ }
- raw_spin_lock_irqsave(&b->lock, flags);
- hlist_add_head_rcu(&selem->map_node, &b->list);
- raw_spin_unlock_irqrestore(&b->lock, flags);
-}
+ if (local_storage) {
+ err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
+ if (!err) {
+ if (likely(selem_linked_to_storage(selem))) {
+ free_storage = hlist_is_singular_node(&selem->snode,
+ &local_storage->list);
+ /*
+ * Okay to skip clearing owner_storage and storage->owner in
+ * destroy() since the owner is going away. No user or bpf
+ * programs should be able to reference it.
+ */
+ if (smap && in_map_free)
+ bpf_selem_unlink_storage_nolock_misc(
+ selem, smap, local_storage,
+ free_storage, true);
+ hlist_del_init_rcu(&selem->snode);
+ unlink++;
+ }
+ raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
+ }
+ if (!err || !in_map_free)
+ RCU_INIT_POINTER(selem->local_storage, NULL);
+ }
-void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
-{
- /* Always unlink from map before unlinking from local_storage
- * because selem will be freed after successfully unlinked from
- * the local_storage.
+ if (unlink != 2)
+ atomic_or(in_map_free ? SELEM_MAP_UNLINKED : SELEM_STORAGE_UNLINKED, &selem->state);
+
+ /*
+ * Normally, an selem can be unlinked under local_storage->lock and b->lock, and
+ * then freed after an RCU grace period. However, if destroy() and map_free() are
+ * racing or rqspinlock returns errors in unlikely situations (unlink != 2), free
+ * the selem only after both map_free() and destroy() see the selem.
*/
- bpf_selem_unlink_map(selem);
- bpf_selem_unlink_storage(selem, reuse_now);
+ if (unlink == 2 ||
+ atomic_cmpxchg(&selem->state, SELEM_UNLINKED, SELEM_TOFREE) == SELEM_UNLINKED)
+ bpf_selem_free(selem, true);
+
+ if (free_storage)
+ bpf_local_storage_free(local_storage, true);
}
void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
@@ -391,16 +516,20 @@ void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
struct bpf_local_storage_elem *selem)
{
unsigned long flags;
+ int err;
/* spinlock is needed to avoid racing with the
* parallel delete. Otherwise, publishing an already
* deleted sdata to the cache will become a use-after-free
* problem in the next bpf_local_storage_lookup().
*/
- raw_spin_lock_irqsave(&local_storage->lock, flags);
+ err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
+ if (err)
+ return;
+
if (selem_linked_to_storage(selem))
rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem));
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+ raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
}
static int check_flags(const struct bpf_local_storage_data *old_sdata,
@@ -424,6 +553,8 @@ int bpf_local_storage_alloc(void *owner,
{
struct bpf_local_storage *prev_storage, *storage;
struct bpf_local_storage **owner_storage_ptr;
+ struct bpf_local_storage_map_bucket *b;
+ unsigned long flags;
int err;
err = mem_charge(smap, owner, sizeof(*storage));
@@ -441,14 +572,21 @@ int bpf_local_storage_alloc(void *owner,
goto uncharge;
}
- RCU_INIT_POINTER(storage->smap, smap);
INIT_HLIST_HEAD(&storage->list);
- raw_spin_lock_init(&storage->lock);
+ raw_res_spin_lock_init(&storage->lock);
storage->owner = owner;
+ storage->mem_charge = sizeof(*storage);
storage->use_kmalloc_nolock = smap->use_kmalloc_nolock;
+ refcount_set(&storage->owner_refcnt, 1);
bpf_selem_link_storage_nolock(storage, first_selem);
- bpf_selem_link_map(smap, first_selem);
+
+ b = select_bucket(smap, storage);
+ err = raw_res_spin_lock_irqsave(&b->lock, flags);
+ if (err)
+ goto uncharge;
+
+ bpf_selem_link_map_nolock(b, first_selem);
owner_storage_ptr =
(struct bpf_local_storage **)owner_storage(smap, owner);
@@ -464,10 +602,12 @@ int bpf_local_storage_alloc(void *owner,
*/
prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
if (unlikely(prev_storage)) {
- bpf_selem_unlink_map(first_selem);
+ bpf_selem_unlink_map_nolock(first_selem);
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
err = -EAGAIN;
goto uncharge;
}
+ raw_res_spin_unlock_irqrestore(&b->lock, flags);
return 0;
@@ -489,8 +629,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
struct bpf_local_storage_data *old_sdata = NULL;
struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
struct bpf_local_storage *local_storage;
+ struct bpf_local_storage_map_bucket *b;
HLIST_HEAD(old_selem_free_list);
- unsigned long flags;
+ unsigned long flags, b_flags;
int err;
/* BPF_EXIST and BPF_NOEXIST cannot be both set */
@@ -549,7 +690,9 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
if (!alloc_selem)
return ERR_PTR(-ENOMEM);
- raw_spin_lock_irqsave(&local_storage->lock, flags);
+ err = raw_res_spin_lock_irqsave(&local_storage->lock, flags);
+ if (err)
+ goto free_selem;
/* Recheck local_storage->list under local_storage->lock */
if (unlikely(hlist_empty(&local_storage->list))) {
@@ -574,22 +717,30 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
goto unlock;
}
+ b = select_bucket(smap, local_storage);
+
+ err = raw_res_spin_lock_irqsave(&b->lock, b_flags);
+ if (err)
+ goto unlock;
+
alloc_selem = NULL;
/* First, link the new selem to the map */
- bpf_selem_link_map(smap, selem);
+ bpf_selem_link_map_nolock(b, selem);
/* Second, link (and publish) the new selem to local_storage */
bpf_selem_link_storage_nolock(local_storage, selem);
/* Third, remove old selem, SELEM(old_sdata) */
if (old_sdata) {
- bpf_selem_unlink_map(SELEM(old_sdata));
+ bpf_selem_unlink_map_nolock(SELEM(old_sdata));
bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
&old_selem_free_list);
}
+ raw_res_spin_unlock_irqrestore(&b->lock, b_flags);
unlock:
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
+ raw_res_spin_unlock_irqrestore(&local_storage->lock, flags);
+free_selem:
bpf_selem_free_list(&old_selem_free_list, false);
if (alloc_selem) {
mem_uncharge(smap, owner, smap->elem_size);
@@ -657,13 +808,13 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
return 0;
}
-void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
+/*
+ * Destroy local storage when the owner is going away. Caller must uncharge memory
+ * if memory charging is used.
+ */
+u32 bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
{
struct bpf_local_storage_elem *selem;
- bool free_storage = false;
- HLIST_HEAD(free_selem_list);
- struct hlist_node *n;
- unsigned long flags;
/* Neither the bpf_prog nor the bpf_map's syscall
* could be modifying the local_storage->list now.
@@ -674,27 +825,20 @@ void bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
* when unlinking elem from the local_storage->list and
* the map's bucket->list.
*/
- raw_spin_lock_irqsave(&local_storage->lock, flags);
- hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
- /* Always unlink from map before unlinking from
- * local_storage.
- */
- bpf_selem_unlink_map(selem);
- /* If local_storage list has only one element, the
- * bpf_selem_unlink_storage_nolock() will return true.
- * Otherwise, it will return false. The current loop iteration
- * intends to remove all local storage. So the last iteration
- * of the loop will set the free_cgroup_storage to true.
+ hlist_for_each_entry_rcu(selem, &local_storage->list, snode)
+ bpf_selem_unlink_nofail(selem, NULL);
+
+ if (!refcount_dec_and_test(&local_storage->owner_refcnt)) {
+ while (refcount_read(&local_storage->owner_refcnt))
+ cpu_relax();
+ /*
+ * Paired with refcount_dec() in bpf_selem_unlink_nofail()
+ * to make sure destroy() sees the correct local_storage->mem_charge.
*/
- free_storage = bpf_selem_unlink_storage_nolock(
- local_storage, selem, &free_selem_list);
+ smp_mb();
}
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
-
- bpf_selem_free_list(&free_selem_list, true);
- if (free_storage)
- bpf_local_storage_free(local_storage, true);
+ return local_storage->mem_charge;
}
u64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
@@ -736,7 +880,7 @@ bpf_local_storage_map_alloc(union bpf_attr *attr,
for (i = 0; i < nbuckets; i++) {
INIT_HLIST_HEAD(&smap->buckets[i].list);
- raw_spin_lock_init(&smap->buckets[i].lock);
+ raw_res_spin_lock_init(&smap->buckets[i].lock);
}
smap->elem_size = offsetof(struct bpf_local_storage_elem,
@@ -758,8 +902,7 @@ free_smap:
}
void bpf_local_storage_map_free(struct bpf_map *map,
- struct bpf_local_storage_cache *cache,
- int __percpu *busy_counter)
+ struct bpf_local_storage_cache *cache)
{
struct bpf_local_storage_map_bucket *b;
struct bpf_local_storage_elem *selem;
@@ -789,15 +932,14 @@ void bpf_local_storage_map_free(struct bpf_map *map,
rcu_read_lock();
/* No one is adding to b->list now */
- while ((selem = hlist_entry_safe(
- rcu_dereference_raw(hlist_first_rcu(&b->list)),
- struct bpf_local_storage_elem, map_node))) {
- if (busy_counter)
- this_cpu_inc(*busy_counter);
- bpf_selem_unlink(selem, true);
- if (busy_counter)
- this_cpu_dec(*busy_counter);
- cond_resched_rcu();
+restart:
+ hlist_for_each_entry_rcu(selem, &b->list, map_node) {
+ bpf_selem_unlink_nofail(selem, b);
+
+ if (need_resched()) {
+ cond_resched_rcu();
+ goto restart;
+ }
}
rcu_read_unlock();
}
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index a1dc1bf0848a..605506792b5b 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -20,29 +20,6 @@
DEFINE_BPF_STORAGE_CACHE(task_cache);
-static DEFINE_PER_CPU(int, bpf_task_storage_busy);
-
-static void bpf_task_storage_lock(void)
-{
- cant_migrate();
- this_cpu_inc(bpf_task_storage_busy);
-}
-
-static void bpf_task_storage_unlock(void)
-{
- this_cpu_dec(bpf_task_storage_busy);
-}
-
-static bool bpf_task_storage_trylock(void)
-{
- cant_migrate();
- if (unlikely(this_cpu_inc_return(bpf_task_storage_busy) != 1)) {
- this_cpu_dec(bpf_task_storage_busy);
- return false;
- }
- return true;
-}
-
static struct bpf_local_storage __rcu **task_storage_ptr(void *owner)
{
struct task_struct *task = owner;
@@ -70,17 +47,15 @@ void bpf_task_storage_free(struct task_struct *task)
{
struct bpf_local_storage *local_storage;
- rcu_read_lock_dont_migrate();
+ rcu_read_lock();
local_storage = rcu_dereference(task->bpf_storage);
if (!local_storage)
goto out;
- bpf_task_storage_lock();
bpf_local_storage_destroy(local_storage);
- bpf_task_storage_unlock();
out:
- rcu_read_unlock_migrate();
+ rcu_read_unlock();
}
static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
@@ -106,9 +81,7 @@ static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key)
goto out;
}
- bpf_task_storage_lock();
sdata = task_storage_lookup(task, map, true);
- bpf_task_storage_unlock();
put_pid(pid);
return sdata ? sdata->data : NULL;
out:
@@ -143,11 +116,9 @@ static long bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key,
goto out;
}
- bpf_task_storage_lock();
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value, map_flags,
true, GFP_ATOMIC);
- bpf_task_storage_unlock();
err = PTR_ERR_OR_ZERO(sdata);
out:
@@ -155,8 +126,7 @@ out:
return err;
}
-static int task_storage_delete(struct task_struct *task, struct bpf_map *map,
- bool nobusy)
+static int task_storage_delete(struct task_struct *task, struct bpf_map *map)
{
struct bpf_local_storage_data *sdata;
@@ -164,12 +134,7 @@ static int task_storage_delete(struct task_struct *task, struct bpf_map *map,
if (!sdata)
return -ENOENT;
- if (!nobusy)
- return -EBUSY;
-
- bpf_selem_unlink(SELEM(sdata), false);
-
- return 0;
+ return bpf_selem_unlink(SELEM(sdata));
}
static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
@@ -194,111 +159,50 @@ static long bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key)
goto out;
}
- bpf_task_storage_lock();
- err = task_storage_delete(task, map, true);
- bpf_task_storage_unlock();
+ err = task_storage_delete(task, map);
out:
put_pid(pid);
return err;
}
-/* Called by bpf_task_storage_get*() helpers */
-static void *__bpf_task_storage_get(struct bpf_map *map,
- struct task_struct *task, void *value,
- u64 flags, gfp_t gfp_flags, bool nobusy)
+/* *gfp_flags* is a hidden argument provided by the verifier */
+BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
+ task, void *, value, u64, flags, gfp_t, gfp_flags)
{
struct bpf_local_storage_data *sdata;
- sdata = task_storage_lookup(task, map, nobusy);
+ WARN_ON_ONCE(!bpf_rcu_lock_held());
+ if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
+ return (unsigned long)NULL;
+
+ sdata = task_storage_lookup(task, map, true);
if (sdata)
- return sdata->data;
+ return (unsigned long)sdata->data;
/* only allocate new storage, when the task is refcounted */
if (refcount_read(&task->usage) &&
- (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) && nobusy) {
+ (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) {
sdata = bpf_local_storage_update(
task, (struct bpf_local_storage_map *)map, value,
BPF_NOEXIST, false, gfp_flags);
- return IS_ERR(sdata) ? NULL : sdata->data;
+ return IS_ERR(sdata) ? (unsigned long)NULL : (unsigned long)sdata->data;
}
- return NULL;
-}
-
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_task_storage_get_recur, struct bpf_map *, map, struct task_struct *,
- task, void *, value, u64, flags, gfp_t, gfp_flags)
-{
- bool nobusy;
- void *data;
-
- WARN_ON_ONCE(!bpf_rcu_lock_held());
- if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
- return (unsigned long)NULL;
-
- nobusy = bpf_task_storage_trylock();
- data = __bpf_task_storage_get(map, task, value, flags,
- gfp_flags, nobusy);
- if (nobusy)
- bpf_task_storage_unlock();
- return (unsigned long)data;
-}
-
-/* *gfp_flags* is a hidden argument provided by the verifier */
-BPF_CALL_5(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *,
- task, void *, value, u64, flags, gfp_t, gfp_flags)
-{
- void *data;
-
- WARN_ON_ONCE(!bpf_rcu_lock_held());
- if (flags & ~BPF_LOCAL_STORAGE_GET_F_CREATE || !task)
- return (unsigned long)NULL;
-
- bpf_task_storage_lock();
- data = __bpf_task_storage_get(map, task, value, flags,
- gfp_flags, true);
- bpf_task_storage_unlock();
- return (unsigned long)data;
-}
-
-BPF_CALL_2(bpf_task_storage_delete_recur, struct bpf_map *, map, struct task_struct *,
- task)
-{
- bool nobusy;
- int ret;
-
- WARN_ON_ONCE(!bpf_rcu_lock_held());
- if (!task)
- return -EINVAL;
-
- nobusy = bpf_task_storage_trylock();
- /* This helper must only be called from places where the lifetime of the task
- * is guaranteed. Either by being refcounted or by being protected
- * by an RCU read-side critical section.
- */
- ret = task_storage_delete(task, map, nobusy);
- if (nobusy)
- bpf_task_storage_unlock();
- return ret;
+ return (unsigned long)NULL;
}
BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *,
task)
{
- int ret;
-
WARN_ON_ONCE(!bpf_rcu_lock_held());
if (!task)
return -EINVAL;
- bpf_task_storage_lock();
/* This helper must only be called from places where the lifetime of the task
* is guaranteed. Either by being refcounted or by being protected
* by an RCU read-side critical section.
*/
- ret = task_storage_delete(task, map, true);
- bpf_task_storage_unlock();
- return ret;
+ return task_storage_delete(task, map);
}
static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
@@ -313,7 +217,7 @@ static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
static void task_storage_map_free(struct bpf_map *map)
{
- bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy);
+ bpf_local_storage_map_free(map, &task_cache);
}
BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map)
@@ -332,17 +236,6 @@ const struct bpf_map_ops task_storage_map_ops = {
.map_owner_storage_ptr = task_storage_ptr,
};
-const struct bpf_func_proto bpf_task_storage_get_recur_proto = {
- .func = bpf_task_storage_get_recur,
- .gpl_only = false,
- .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
- .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
- .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
- .arg4_type = ARG_ANYTHING,
-};
-
const struct bpf_func_proto bpf_task_storage_get_proto = {
.func = bpf_task_storage_get,
.gpl_only = false,
@@ -354,15 +247,6 @@ const struct bpf_func_proto bpf_task_storage_get_proto = {
.arg4_type = ARG_ANYTHING,
};
-const struct bpf_func_proto bpf_task_storage_delete_recur_proto = {
- .func = bpf_task_storage_delete_recur,
- .gpl_only = false,
- .ret_type = RET_INTEGER,
- .arg1_type = ARG_CONST_MAP_PTR,
- .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL,
- .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
-};
-
const struct bpf_func_proto bpf_task_storage_delete_proto = {
.func = bpf_task_storage_delete,
.gpl_only = false,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 0458597134da..7ac32798eb04 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2167,12 +2167,8 @@ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_cgroup_classid_curr_proto;
#endif
case BPF_FUNC_task_storage_get:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_get_recur_proto;
return &bpf_task_storage_get_proto;
case BPF_FUNC_task_storage_delete:
- if (bpf_prog_check_recur(prog))
- return &bpf_task_storage_delete_recur_proto;
return &bpf_task_storage_delete_proto;
default:
break;
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index de111818f3a0..1eb3e060994e 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -40,29 +40,30 @@ static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
if (!sdata)
return -ENOENT;
- bpf_selem_unlink(SELEM(sdata), false);
-
- return 0;
+ return bpf_selem_unlink(SELEM(sdata));
}
/* Called by __sk_destruct() & bpf_sk_storage_clone() */
void bpf_sk_storage_free(struct sock *sk)
{
struct bpf_local_storage *sk_storage;
+ u32 uncharge;
rcu_read_lock_dont_migrate();
sk_storage = rcu_dereference(sk->sk_bpf_storage);
if (!sk_storage)
goto out;
- bpf_local_storage_destroy(sk_storage);
+ uncharge = bpf_local_storage_destroy(sk_storage);
+ if (uncharge)
+ atomic_sub(uncharge, &sk->sk_omem_alloc);
out:
rcu_read_unlock_migrate();
}
static void bpf_sk_storage_map_free(struct bpf_map *map)
{
- bpf_local_storage_map_free(map, &sk_cache, NULL);
+ bpf_local_storage_map_free(map, &sk_cache);
}
static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
@@ -191,7 +192,14 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
}
if (new_sk_storage) {
- bpf_selem_link_map(smap, copy_selem);
+ ret = bpf_selem_link_map(smap, new_sk_storage, copy_selem);
+ if (ret) {
+ bpf_selem_free(copy_selem, true);
+ atomic_sub(smap->elem_size,
+ &newsk->sk_omem_alloc);
+ bpf_map_put(map);
+ goto out;
+ }
bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
} else {
ret = bpf_local_storage_alloc(newsk, smap, copy_selem, GFP_ATOMIC);
diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
deleted file mode 100644
index a4121d2248ac..000000000000
--- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c
+++ /dev/null
@@ -1,128 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
-#define _GNU_SOURCE
-#include <sched.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <errno.h>
-#include <string.h>
-#include <pthread.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_util.h"
-#include "test_maps.h"
-#include "task_local_storage_helpers.h"
-#include "read_bpf_task_storage_busy.skel.h"
-
-struct lookup_ctx {
- bool start;
- bool stop;
- int pid_fd;
- int map_fd;
- int loop;
-};
-
-static void *lookup_fn(void *arg)
-{
- struct lookup_ctx *ctx = arg;
- long value;
- int i = 0;
-
- while (!ctx->start)
- usleep(1);
-
- while (!ctx->stop && i++ < ctx->loop)
- bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value);
- return NULL;
-}
-
-static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr)
-{
- unsigned int i;
-
- ctx->stop = true;
- ctx->start = true;
- for (i = 0; i < nr; i++)
- pthread_join(tids[i], NULL);
-}
-
-void test_task_storage_map_stress_lookup(void)
-{
-#define MAX_NR_THREAD 4096
- unsigned int i, nr = 256, loop = 8192, cpu = 0;
- struct read_bpf_task_storage_busy *skel;
- pthread_t tids[MAX_NR_THREAD];
- struct lookup_ctx ctx;
- cpu_set_t old, new;
- const char *cfg;
- int err;
-
- cfg = getenv("TASK_STORAGE_MAP_NR_THREAD");
- if (cfg) {
- nr = atoi(cfg);
- if (nr > MAX_NR_THREAD)
- nr = MAX_NR_THREAD;
- }
- cfg = getenv("TASK_STORAGE_MAP_NR_LOOP");
- if (cfg)
- loop = atoi(cfg);
- cfg = getenv("TASK_STORAGE_MAP_PIN_CPU");
- if (cfg)
- cpu = atoi(cfg);
-
- skel = read_bpf_task_storage_busy__open_and_load();
- err = libbpf_get_error(skel);
- CHECK(err, "open_and_load", "error %d\n", err);
-
- /* Only for a fully preemptible kernel */
- if (!skel->kconfig->CONFIG_PREEMPTION) {
- printf("%s SKIP (no CONFIG_PREEMPTION)\n", __func__);
- read_bpf_task_storage_busy__destroy(skel);
- skips++;
- return;
- }
-
- /* Save the old affinity setting */
- sched_getaffinity(getpid(), sizeof(old), &old);
-
- /* Pinned on a specific CPU */
- CPU_ZERO(&new);
- CPU_SET(cpu, &new);
- sched_setaffinity(getpid(), sizeof(new), &new);
-
- ctx.start = false;
- ctx.stop = false;
- ctx.pid_fd = sys_pidfd_open(getpid(), 0);
- ctx.map_fd = bpf_map__fd(skel->maps.task);
- ctx.loop = loop;
- for (i = 0; i < nr; i++) {
- err = pthread_create(&tids[i], NULL, lookup_fn, &ctx);
- if (err) {
- abort_lookup(&ctx, tids, i);
- CHECK(err, "pthread_create", "error %d\n", err);
- goto out;
- }
- }
-
- ctx.start = true;
- for (i = 0; i < nr; i++)
- pthread_join(tids[i], NULL);
-
- skel->bss->pid = getpid();
- err = read_bpf_task_storage_busy__attach(skel);
- CHECK(err, "attach", "error %d\n", err);
-
- /* Trigger program */
- sys_gettid();
- skel->bss->pid = 0;
-
- CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy);
-out:
- read_bpf_task_storage_busy__destroy(skel);
- /* Restore affinity setting */
- sched_setaffinity(getpid(), sizeof(old), &old);
- printf("%s:PASS\n", __func__);
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 10cba526d3e6..f1642794f70e 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -875,8 +875,8 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
"int cpu_number = (int)100", 100);
#endif
- TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_cgrp_storage_busy", int, BTF_F_COMPACT,
- "static int bpf_cgrp_storage_busy = (int)2", 2);
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "bpf_bprintf_nest_level", int, BTF_F_COMPACT,
+ "static int bpf_bprintf_nest_level = (int)2", 2);
}
struct btf_dump_string_ctx {
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index 42e822ea352f..7bee33797c71 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -112,24 +112,24 @@ static void test_recursion(void)
task_ls_recursion__detach(skel);
/* Refer to the comment in BPF_PROG(on_update) for
- * the explanation on the value 201 and 100.
+ * the explanation on the value 200 and 1.
*/
map_fd = bpf_map__fd(skel->maps.map_a);
err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
ASSERT_OK(err, "lookup map_a");
- ASSERT_EQ(value, 201, "map_a value");
- ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy");
+ ASSERT_EQ(value, 200, "map_a value");
+ ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy");
map_fd = bpf_map__fd(skel->maps.map_b);
err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
ASSERT_OK(err, "lookup map_b");
- ASSERT_EQ(value, 100, "map_b value");
+ ASSERT_EQ(value, 1, "map_b value");
prog_fd = bpf_program__fd(skel->progs.on_update);
memset(&info, 0, sizeof(info));
err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
ASSERT_OK(err, "get prog info");
- ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion");
+ ASSERT_EQ(info.recursion_misses, 2, "on_update prog recursion");
prog_fd = bpf_program__fd(skel->progs.on_enter);
memset(&info, 0, sizeof(info));
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index 637e75df2e14..d0be77011a84 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -62,7 +62,6 @@ SEC("lsm/inode_unlink")
int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
- struct bpf_local_storage *local_storage;
struct local_storage *storage;
struct task_struct *task;
bool is_self_unlink;
@@ -88,15 +87,10 @@ int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
if (!storage || storage->value)
return 0;
- if (bpf_task_storage_delete(&task_storage_map, task))
+ if (bpf_task_storage_delete(&task_storage_map2, task))
return 0;
- /* Ensure that the task_storage_map is disconnected from the storage.
- * The storage memory should not be freed back to the
- * bpf_mem_alloc.
- */
- local_storage = task->bpf_storage;
- if (!local_storage || local_storage->smap)
+ if (bpf_task_storage_delete(&task_storage_map, task))
return 0;
task_storage_result = 0;
@@ -164,18 +158,9 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
if (bpf_sk_storage_delete(&sk_storage_map2, sk))
return 0;
- storage = bpf_sk_storage_get(&sk_storage_map2, sk, 0,
- BPF_LOCAL_STORAGE_GET_F_CREATE);
- if (!storage)
- return 0;
-
if (bpf_sk_storage_delete(&sk_storage_map, sk))
return 0;
- /* Ensure that the sk_storage_map is disconnected from the storage. */
- if (!sk->sk_bpf_storage || sk->sk_bpf_storage->smap)
- return 0;
-
sk_storage_result = 0;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
deleted file mode 100644
index 69da05bb6c63..000000000000
--- a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-
-extern bool CONFIG_PREEMPTION __kconfig __weak;
-extern const int bpf_task_storage_busy __ksym;
-
-char _license[] SEC("license") = "GPL";
-
-int pid = 0;
-int busy = 0;
-
-struct {
- __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
- __uint(map_flags, BPF_F_NO_PREALLOC);
- __type(key, int);
- __type(value, long);
-} task SEC(".maps");
-
-SEC("raw_tp/sys_enter")
-int BPF_PROG(read_bpf_task_storage_busy)
-{
- int *value;
-
- if (!CONFIG_PREEMPTION)
- return 0;
-
- if (bpf_get_current_pid_tgid() >> 32 != pid)
- return 0;
-
- value = bpf_this_cpu_ptr(&bpf_task_storage_busy);
- if (value)
- busy = *value;
-
- return 0;
-}
diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
index 46d6eb2a3b17..c8f4815c8dfb 100644
--- a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
+++ b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
@@ -6,7 +6,6 @@
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
-void *local_storage_ptr = NULL;
void *sk_ptr = NULL;
int cookie_found = 0;
__u64 cookie = 0;
@@ -19,21 +18,17 @@ struct {
__type(value, int);
} sk_storage SEC(".maps");
-SEC("fexit/bpf_local_storage_destroy")
-int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage)
+SEC("fexit/bpf_sk_storage_free")
+int BPF_PROG(bpf_sk_storage_free, struct sock *sk)
{
- struct sock *sk;
-
- if (local_storage_ptr != local_storage)
+ if (sk_ptr != sk)
return 0;
- sk = bpf_core_cast(sk_ptr, struct sock);
if (sk->sk_cookie.counter != cookie)
return 0;
cookie_found++;
omem = sk->sk_omem_alloc.counter;
- local_storage_ptr = NULL;
return 0;
}
@@ -50,7 +45,6 @@ int BPF_PROG(inet6_sock_destruct, struct sock *sk)
if (value && *value == 0xdeadbeef) {
cookie_found++;
sk_ptr = sk;
- local_storage_ptr = sk->sk_bpf_storage;
}
return 0;
diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
index f1853c38aada..b37359432692 100644
--- a/tools/testing/selftests/bpf/progs/task_ls_recursion.c
+++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
@@ -36,14 +36,9 @@ int BPF_PROG(on_update)
if (!test_pid || task->pid != test_pid)
return 0;
+ /* This will succeed as there is no real deadlock */
ptr = bpf_task_storage_get(&map_a, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
- /* ptr will not be NULL when it is called from
- * the bpf_task_storage_get(&map_b,...F_CREATE) in
- * the BPF_PROG(on_enter) below. It is because
- * the value can be found in map_a and the kernel
- * does not need to acquire any spin_lock.
- */
if (ptr) {
int err;
@@ -53,12 +48,7 @@ int BPF_PROG(on_update)
nr_del_errs++;
}
- /* This will still fail because map_b is empty and
- * this BPF_PROG(on_update) has failed to acquire
- * the percpu busy lock => meaning potential
- * deadlock is detected and it will fail to create
- * new storage.
- */
+ /* This will succeed as there is no real deadlock */
ptr = bpf_task_storage_get(&map_b, task, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (ptr)
diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
index 986829aaf73a..6ce98fe9f387 100644
--- a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
+++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
@@ -1,15 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include "vmlinux.h"
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-#ifndef EBUSY
-#define EBUSY 16
-#endif
-
extern bool CONFIG_PREEMPTION __kconfig __weak;
int nr_get_errs = 0;
int nr_del_errs = 0;
@@ -40,7 +37,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
ret = bpf_task_storage_delete(&task_storage,
bpf_get_current_task_btf());
- if (ret == -EBUSY)
+ if (ret == -EDEADLK || ret == -ETIMEDOUT)
__sync_fetch_and_add(&nr_del_errs, 1);
return 0;