diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/bpf/arena.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/arraymap.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/bloom_filter.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/bpf_insn_array.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/bpf_local_storage.c | 75 | ||||
| -rw-r--r-- | kernel/bpf/cpumap.c | 17 | ||||
| -rw-r--r-- | kernel/bpf/devmap.c | 47 | ||||
| -rw-r--r-- | kernel/bpf/hashtab.c | 86 | ||||
| -rw-r--r-- | kernel/bpf/local_storage.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/lpm_trie.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/memalloc.c | 58 | ||||
| -rw-r--r-- | kernel/bpf/syscall.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/tnum.c | 56 | ||||
| -rw-r--r-- | kernel/bpf/verifier.c | 30 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 4 |
15 files changed, 321 insertions, 66 deletions
diff --git a/kernel/bpf/arena.c b/kernel/bpf/arena.c index 144f30e740e8..f355cf1c1a16 100644 --- a/kernel/bpf/arena.c +++ b/kernel/bpf/arena.c @@ -303,7 +303,7 @@ static long arena_map_update_elem(struct bpf_map *map, void *key, return -EOPNOTSUPP; } -static int arena_map_check_btf(const struct bpf_map *map, const struct btf *btf, +static int arena_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) { return 0; diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 26763df6134a..33de68c95ad8 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -548,7 +548,7 @@ static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key, rcu_read_unlock(); } -static int array_map_check_btf(const struct bpf_map *map, +static int array_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c index 35e1ddca74d2..b73336c976b7 100644 --- a/kernel/bpf/bloom_filter.c +++ b/kernel/bpf/bloom_filter.c @@ -180,7 +180,7 @@ static long bloom_map_update_elem(struct bpf_map *map, void *key, return -EINVAL; } -static int bloom_map_check_btf(const struct bpf_map *map, +static int bloom_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) diff --git a/kernel/bpf/bpf_insn_array.c b/kernel/bpf/bpf_insn_array.c index c0286f25ca3c..a2f84afe6f7c 100644 --- a/kernel/bpf/bpf_insn_array.c +++ b/kernel/bpf/bpf_insn_array.c @@ -98,7 +98,7 @@ static long insn_array_delete_elem(struct bpf_map *map, void *key) return -EINVAL; } -static int insn_array_check_btf(const struct bpf_map *map, +static int insn_array_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index b28f07d3a0db..9c96a4477f81 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -107,14 +107,12 @@ static void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) { struct bpf_local_storage *local_storage; - /* If RCU Tasks Trace grace period implies RCU grace period, do - * kfree(), else do kfree_rcu(). + /* + * RCU Tasks Trace grace period implies RCU grace period, do + * kfree() directly. */ local_storage = container_of(rcu, struct bpf_local_storage, rcu); - if (rcu_trace_implies_rcu_gp()) - kfree(local_storage); - else - kfree_rcu(local_storage, rcu); + kfree(local_storage); } /* Handle use_kmalloc_nolock == false */ @@ -138,10 +136,11 @@ static void bpf_local_storage_free_rcu(struct rcu_head *rcu) static void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) { - if (rcu_trace_implies_rcu_gp()) - bpf_local_storage_free_rcu(rcu); - else - call_rcu(rcu, bpf_local_storage_free_rcu); + /* + * RCU Tasks Trace grace period implies RCU grace period, do + * kfree() directly. + */ + bpf_local_storage_free_rcu(rcu); } static void bpf_local_storage_free(struct bpf_local_storage *local_storage, @@ -164,16 +163,29 @@ static void bpf_local_storage_free(struct bpf_local_storage *local_storage, bpf_local_storage_free_trace_rcu); } -/* rcu tasks trace callback for use_kmalloc_nolock == false */ -static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu) +/* rcu callback for use_kmalloc_nolock == false */ +static void __bpf_selem_free_rcu(struct rcu_head *rcu) { struct bpf_local_storage_elem *selem; + struct bpf_local_storage_map *smap; selem = container_of(rcu, struct bpf_local_storage_elem, rcu); - if (rcu_trace_implies_rcu_gp()) - kfree(selem); - else - kfree_rcu(selem, rcu); + /* bpf_selem_unlink_nofail may have already cleared smap and freed fields. */ + smap = rcu_dereference_check(SDATA(selem)->smap, 1); + + if (smap) + bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); + kfree(selem); +} + +/* rcu tasks trace callback for use_kmalloc_nolock == false */ +static void __bpf_selem_free_trace_rcu(struct rcu_head *rcu) +{ + /* + * RCU Tasks Trace grace period implies RCU grace period, do + * kfree() directly. + */ + __bpf_selem_free_rcu(rcu); } /* Handle use_kmalloc_nolock == false */ @@ -181,7 +193,7 @@ static void __bpf_selem_free(struct bpf_local_storage_elem *selem, bool vanilla_rcu) { if (vanilla_rcu) - kfree_rcu(selem, rcu); + call_rcu(&selem->rcu, __bpf_selem_free_rcu); else call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu); } @@ -195,37 +207,29 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu) /* The bpf_local_storage_map_free will wait for rcu_barrier */ smap = rcu_dereference_check(SDATA(selem)->smap, 1); - if (smap) { - migrate_disable(); + if (smap) bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); - migrate_enable(); - } kfree_nolock(selem); } static void bpf_selem_free_trace_rcu(struct rcu_head *rcu) { - if (rcu_trace_implies_rcu_gp()) - bpf_selem_free_rcu(rcu); - else - call_rcu(rcu, bpf_selem_free_rcu); + /* + * RCU Tasks Trace grace period implies RCU grace period, do + * kfree() directly. + */ + bpf_selem_free_rcu(rcu); } void bpf_selem_free(struct bpf_local_storage_elem *selem, bool reuse_now) { - struct bpf_local_storage_map *smap; - - smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); - if (!selem->use_kmalloc_nolock) { /* * No uptr will be unpin even when reuse_now == false since uptr * is only supported in task local storage, where * smap->use_kmalloc_nolock == true. */ - if (smap) - bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); __bpf_selem_free(selem, reuse_now); return; } @@ -797,7 +801,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr) return 0; } -int bpf_local_storage_map_check_btf(const struct bpf_map *map, +int bpf_local_storage_map_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) @@ -958,10 +962,9 @@ restart: */ synchronize_rcu(); - if (smap->use_kmalloc_nolock) { - rcu_barrier_tasks_trace(); - rcu_barrier(); - } + /* smap remains in use regardless of kmalloc_nolock, so wait unconditionally. */ + rcu_barrier_tasks_trace(); + rcu_barrier(); kvfree(smap->buckets); bpf_map_area_free(smap); } diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 04171fbc39cb..32b43cb9061b 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -29,6 +29,7 @@ #include <linux/sched.h> #include <linux/workqueue.h> #include <linux/kthread.h> +#include <linux/local_lock.h> #include <linux/completion.h> #include <trace/events/xdp.h> #include <linux/btf_ids.h> @@ -52,6 +53,7 @@ struct xdp_bulk_queue { struct list_head flush_node; struct bpf_cpu_map_entry *obj; unsigned int count; + local_lock_t bq_lock; }; /* Struct for every remote "destination" CPU in map */ @@ -451,6 +453,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value, for_each_possible_cpu(i) { bq = per_cpu_ptr(rcpu->bulkq, i); bq->obj = rcpu; + local_lock_init(&bq->bq_lock); } /* Alloc queue */ @@ -722,6 +725,8 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq) struct ptr_ring *q; int i; + lockdep_assert_held(&bq->bq_lock); + if (unlikely(!bq->count)) return; @@ -749,11 +754,15 @@ static void bq_flush_to_queue(struct xdp_bulk_queue *bq) } /* Runs under RCU-read-side, plus in softirq under NAPI protection. - * Thus, safe percpu variable access. + * Thus, safe percpu variable access. PREEMPT_RT relies on + * local_lock_nested_bh() to serialise access to the per-CPU bq. */ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) { - struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); + struct xdp_bulk_queue *bq; + + local_lock_nested_bh(&rcpu->bulkq->bq_lock); + bq = this_cpu_ptr(rcpu->bulkq); if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) bq_flush_to_queue(bq); @@ -774,6 +783,8 @@ static void bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) list_add(&bq->flush_node, flush_list); } + + local_unlock_nested_bh(&rcpu->bulkq->bq_lock); } int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, @@ -810,7 +821,9 @@ void __cpu_map_flush(struct list_head *flush_list) struct xdp_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { + local_lock_nested_bh(&bq->obj->bulkq->bq_lock); bq_flush_to_queue(bq); + local_unlock_nested_bh(&bq->obj->bulkq->bq_lock); /* If already running, costs spin_lock_irqsave + smb_mb */ wake_up_process(bq->obj->kthread); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 2625601de76e..3d619d01088e 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -45,6 +45,7 @@ * types of devmap; only the lookup and insertion is different. */ #include <linux/bpf.h> +#include <linux/local_lock.h> #include <net/xdp.h> #include <linux/filter.h> #include <trace/events/xdp.h> @@ -60,6 +61,7 @@ struct xdp_dev_bulk_queue { struct net_device *dev_rx; struct bpf_prog *xdp_prog; unsigned int count; + local_lock_t bq_lock; }; struct bpf_dtab_netdev { @@ -381,6 +383,8 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags) int to_send = cnt; int i; + lockdep_assert_held(&bq->bq_lock); + if (unlikely(!cnt)) return; @@ -425,10 +429,12 @@ void __dev_flush(struct list_head *flush_list) struct xdp_dev_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { + local_lock_nested_bh(&bq->dev->xdp_bulkq->bq_lock); bq_xmit_all(bq, XDP_XMIT_FLUSH); bq->dev_rx = NULL; bq->xdp_prog = NULL; __list_del_clearprev(&bq->flush_node); + local_unlock_nested_bh(&bq->dev->xdp_bulkq->bq_lock); } } @@ -451,12 +457,16 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key) /* Runs in NAPI, i.e., softirq under local_bh_disable(). Thus, safe percpu * variable access, and map elements stick around. See comment above - * xdp_do_flush() in filter.c. + * xdp_do_flush() in filter.c. PREEMPT_RT relies on local_lock_nested_bh() + * to serialise access to the per-CPU bq. */ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_prog *xdp_prog) { - struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq); + struct xdp_dev_bulk_queue *bq; + + local_lock_nested_bh(&dev->xdp_bulkq->bq_lock); + bq = this_cpu_ptr(dev->xdp_bulkq); if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) bq_xmit_all(bq, 0); @@ -477,6 +487,8 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf, } bq->q[bq->count++] = xdpf; + + local_unlock_nested_bh(&dev->xdp_bulkq->bq_lock); } static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf, @@ -588,18 +600,22 @@ static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifin } /* Get ifindex of each upper device. 'indexes' must be able to hold at - * least MAX_NEST_DEV elements. - * Returns the number of ifindexes added. + * least 'max' elements. + * Returns the number of ifindexes added, or -EOVERFLOW if there are too + * many upper devices. */ -static int get_upper_ifindexes(struct net_device *dev, int *indexes) +static int get_upper_ifindexes(struct net_device *dev, int *indexes, int max) { struct net_device *upper; struct list_head *iter; int n = 0; netdev_for_each_upper_dev_rcu(dev, upper, iter) { + if (n >= max) + return -EOVERFLOW; indexes[n++] = upper->ifindex; } + return n; } @@ -615,7 +631,11 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, int err; if (exclude_ingress) { - num_excluded = get_upper_ifindexes(dev_rx, excluded_devices); + num_excluded = get_upper_ifindexes(dev_rx, excluded_devices, + ARRAY_SIZE(excluded_devices) - 1); + if (num_excluded < 0) + return num_excluded; + excluded_devices[num_excluded++] = dev_rx->ifindex; } @@ -733,7 +753,11 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, int err; if (exclude_ingress) { - num_excluded = get_upper_ifindexes(dev, excluded_devices); + num_excluded = get_upper_ifindexes(dev, excluded_devices, + ARRAY_SIZE(excluded_devices) - 1); + if (num_excluded < 0) + return num_excluded; + excluded_devices[num_excluded++] = dev->ifindex; } @@ -1115,8 +1139,13 @@ static int dev_map_notification(struct notifier_block *notifier, if (!netdev->xdp_bulkq) return NOTIFY_BAD; - for_each_possible_cpu(cpu) - per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev; + for_each_possible_cpu(cpu) { + struct xdp_dev_bulk_queue *bq; + + bq = per_cpu_ptr(netdev->xdp_bulkq, cpu); + bq->dev = netdev; + local_lock_init(&bq->bq_lock); + } break; case NETDEV_UNREGISTER: /* This rcu_read_lock/unlock pair is needed because diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 3b9d297a53be..bc6bc8bb871d 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -125,6 +125,11 @@ struct htab_elem { char key[] __aligned(8); }; +struct htab_btf_record { + struct btf_record *record; + u32 key_size; +}; + static inline bool htab_is_prealloc(const struct bpf_htab *htab) { return !(htab->map.map_flags & BPF_F_NO_PREALLOC); @@ -457,6 +462,83 @@ static int htab_map_alloc_check(union bpf_attr *attr) return 0; } +static void htab_mem_dtor(void *obj, void *ctx) +{ + struct htab_btf_record *hrec = ctx; + struct htab_elem *elem = obj; + void *map_value; + + if (IS_ERR_OR_NULL(hrec->record)) + return; + + map_value = htab_elem_value(elem, hrec->key_size); + bpf_obj_free_fields(hrec->record, map_value); +} + +static void htab_pcpu_mem_dtor(void *obj, void *ctx) +{ + void __percpu *pptr = *(void __percpu **)obj; + struct htab_btf_record *hrec = ctx; + int cpu; + + if (IS_ERR_OR_NULL(hrec->record)) + return; + + for_each_possible_cpu(cpu) + bpf_obj_free_fields(hrec->record, per_cpu_ptr(pptr, cpu)); +} + +static void htab_dtor_ctx_free(void *ctx) +{ + struct htab_btf_record *hrec = ctx; + + btf_record_free(hrec->record); + kfree(ctx); +} + +static int htab_set_dtor(struct bpf_htab *htab, void (*dtor)(void *, void *)) +{ + u32 key_size = htab->map.key_size; + struct bpf_mem_alloc *ma; + struct htab_btf_record *hrec; + int err; + + /* No need for dtors. */ + if (IS_ERR_OR_NULL(htab->map.record)) + return 0; + + hrec = kzalloc(sizeof(*hrec), GFP_KERNEL); + if (!hrec) + return -ENOMEM; + hrec->key_size = key_size; + hrec->record = btf_record_dup(htab->map.record); + if (IS_ERR(hrec->record)) { + err = PTR_ERR(hrec->record); + kfree(hrec); + return err; + } + ma = htab_is_percpu(htab) ? &htab->pcpu_ma : &htab->ma; + bpf_mem_alloc_set_dtor(ma, dtor, htab_dtor_ctx_free, hrec); + return 0; +} + +static int htab_map_check_btf(struct bpf_map *map, const struct btf *btf, + const struct btf_type *key_type, const struct btf_type *value_type) +{ + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + + if (htab_is_prealloc(htab)) + return 0; + /* + * We must set the dtor using this callback, as map's BTF record is not + * populated in htab_map_alloc(), so it will always appear as NULL. + */ + if (htab_is_percpu(htab)) + return htab_set_dtor(htab, htab_pcpu_mem_dtor); + else + return htab_set_dtor(htab, htab_mem_dtor); +} + static struct bpf_map *htab_map_alloc(union bpf_attr *attr) { bool percpu = (attr->map_type == BPF_MAP_TYPE_PERCPU_HASH || @@ -2281,6 +2363,7 @@ const struct bpf_map_ops htab_map_ops = { .map_seq_show_elem = htab_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_check_btf = htab_map_check_btf, .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab), .map_btf_id = &htab_map_btf_ids[0], @@ -2303,6 +2386,7 @@ const struct bpf_map_ops htab_lru_map_ops = { .map_seq_show_elem = htab_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_check_btf = htab_map_check_btf, .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab_lru), .map_btf_id = &htab_map_btf_ids[0], @@ -2482,6 +2566,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_check_btf = htab_map_check_btf, .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab_percpu), .map_btf_id = &htab_map_btf_ids[0], @@ -2502,6 +2587,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { .map_seq_show_elem = htab_percpu_map_seq_show_elem, .map_set_for_each_callback_args = map_set_for_each_callback_args, .map_for_each_callback = bpf_for_each_hash_elem, + .map_check_btf = htab_map_check_btf, .map_mem_usage = htab_map_mem_usage, BATCH_OPS(htab_lru_percpu), .map_btf_id = &htab_map_btf_ids[0], diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 1ccbf28b2ad9..8fca0c64f7b1 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -364,7 +364,7 @@ static long cgroup_storage_delete_elem(struct bpf_map *map, void *key) return -EINVAL; } -static int cgroup_storage_check_btf(const struct bpf_map *map, +static int cgroup_storage_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 1adeb4d3b8cf..0f57608b385d 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -751,7 +751,7 @@ free_stack: return err; } -static int trie_check_btf(const struct bpf_map *map, +static int trie_check_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index bd45dda9dc35..682a9f34214b 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -102,6 +102,8 @@ struct bpf_mem_cache { int percpu_size; bool draining; struct bpf_mem_cache *tgt; + void (*dtor)(void *obj, void *ctx); + void *dtor_ctx; /* list of objects to be freed after RCU GP */ struct llist_head free_by_rcu; @@ -260,12 +262,14 @@ static void free_one(void *obj, bool percpu) kfree(obj); } -static int free_all(struct llist_node *llnode, bool percpu) +static int free_all(struct bpf_mem_cache *c, struct llist_node *llnode, bool percpu) { struct llist_node *pos, *t; int cnt = 0; llist_for_each_safe(pos, t, llnode) { + if (c->dtor) + c->dtor((void *)pos + LLIST_NODE_SZ, c->dtor_ctx); free_one(pos, percpu); cnt++; } @@ -276,7 +280,7 @@ static void __free_rcu(struct rcu_head *head) { struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu_ttrace); - free_all(llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size); + free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size); atomic_set(&c->call_rcu_ttrace_in_progress, 0); } @@ -308,7 +312,7 @@ static void do_call_rcu_ttrace(struct bpf_mem_cache *c) if (atomic_xchg(&c->call_rcu_ttrace_in_progress, 1)) { if (unlikely(READ_ONCE(c->draining))) { llnode = llist_del_all(&c->free_by_rcu_ttrace); - free_all(llnode, !!c->percpu_size); + free_all(c, llnode, !!c->percpu_size); } return; } @@ -417,7 +421,7 @@ static void check_free_by_rcu(struct bpf_mem_cache *c) dec_active(c, &flags); if (unlikely(READ_ONCE(c->draining))) { - free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size); + free_all(c, llist_del_all(&c->waiting_for_gp), !!c->percpu_size); atomic_set(&c->call_rcu_in_progress, 0); } else { call_rcu_hurry(&c->rcu, __free_by_rcu); @@ -635,13 +639,13 @@ static void drain_mem_cache(struct bpf_mem_cache *c) * Except for waiting_for_gp_ttrace list, there are no concurrent operations * on these lists, so it is safe to use __llist_del_all(). */ - free_all(llist_del_all(&c->free_by_rcu_ttrace), percpu); - free_all(llist_del_all(&c->waiting_for_gp_ttrace), percpu); - free_all(__llist_del_all(&c->free_llist), percpu); - free_all(__llist_del_all(&c->free_llist_extra), percpu); - free_all(__llist_del_all(&c->free_by_rcu), percpu); - free_all(__llist_del_all(&c->free_llist_extra_rcu), percpu); - free_all(llist_del_all(&c->waiting_for_gp), percpu); + free_all(c, llist_del_all(&c->free_by_rcu_ttrace), percpu); + free_all(c, llist_del_all(&c->waiting_for_gp_ttrace), percpu); + free_all(c, __llist_del_all(&c->free_llist), percpu); + free_all(c, __llist_del_all(&c->free_llist_extra), percpu); + free_all(c, __llist_del_all(&c->free_by_rcu), percpu); + free_all(c, __llist_del_all(&c->free_llist_extra_rcu), percpu); + free_all(c, llist_del_all(&c->waiting_for_gp), percpu); } static void check_mem_cache(struct bpf_mem_cache *c) @@ -680,6 +684,9 @@ static void check_leaked_objs(struct bpf_mem_alloc *ma) static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma) { + /* We can free dtor ctx only once all callbacks are done using it. */ + if (ma->dtor_ctx_free) + ma->dtor_ctx_free(ma->dtor_ctx); check_leaked_objs(ma); free_percpu(ma->cache); free_percpu(ma->caches); @@ -1014,3 +1021,32 @@ int bpf_mem_alloc_check_size(bool percpu, size_t size) return 0; } + +void bpf_mem_alloc_set_dtor(struct bpf_mem_alloc *ma, void (*dtor)(void *obj, void *ctx), + void (*dtor_ctx_free)(void *ctx), void *ctx) +{ + struct bpf_mem_caches *cc; + struct bpf_mem_cache *c; + int cpu, i; + + ma->dtor_ctx_free = dtor_ctx_free; + ma->dtor_ctx = ctx; + + if (ma->cache) { + for_each_possible_cpu(cpu) { + c = per_cpu_ptr(ma->cache, cpu); + c->dtor = dtor; + c->dtor_ctx = ctx; + } + } + if (ma->caches) { + for_each_possible_cpu(cpu) { + cc = per_cpu_ptr(ma->caches, cpu); + for (i = 0; i < NUM_CACHES; i++) { + c = &cc->cache[i]; + c->dtor = dtor; + c->dtor_ctx = ctx; + } + } + } +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 0378e83b4099..274039e36465 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1234,7 +1234,7 @@ int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size) } EXPORT_SYMBOL_GPL(bpf_obj_name_cpy); -int map_check_no_btf(const struct bpf_map *map, +int map_check_no_btf(struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c index 26fbfbb01700..4abc359b3db0 100644 --- a/kernel/bpf/tnum.c +++ b/kernel/bpf/tnum.c @@ -269,3 +269,59 @@ struct tnum tnum_bswap64(struct tnum a) { return TNUM(swab64(a.value), swab64(a.mask)); } + +/* Given tnum t, and a number z such that tmin <= z < tmax, where tmin + * is the smallest member of the t (= t.value) and tmax is the largest + * member of t (= t.value | t.mask), returns the smallest member of t + * larger than z. + * + * For example, + * t = x11100x0 + * z = 11110001 (241) + * result = 11110010 (242) + * + * Note: if this function is called with z >= tmax, it just returns + * early with tmax; if this function is called with z < tmin, the + * algorithm already returns tmin. + */ +u64 tnum_step(struct tnum t, u64 z) +{ + u64 tmax, j, p, q, r, s, v, u, w, res; + u8 k; + + tmax = t.value | t.mask; + + /* if z >= largest member of t, return largest member of t */ + if (z >= tmax) + return tmax; + + /* if z < smallest member of t, return smallest member of t */ + if (z < t.value) + return t.value; + + /* keep t's known bits, and match all unknown bits to z */ + j = t.value | (z & t.mask); + + if (j > z) { + p = ~z & t.value & ~t.mask; + k = fls64(p); /* k is the most-significant 0-to-1 flip */ + q = U64_MAX << k; + r = q & z; /* positions > k matched to z */ + s = ~q & t.value; /* positions <= k matched to t.value */ + v = r | s; + res = v; + } else { + p = z & ~t.value & ~t.mask; + k = fls64(p); /* k is the most-significant 1-to-0 flip */ + q = U64_MAX << k; + r = q & t.mask & z; /* unknown positions > k, matched to z */ + s = q & ~t.mask; /* known positions > k, set to 1 */ + v = r | s; + /* add 1 to unknown positions > k to make value greater than z */ + u = v + (1ULL << k); + /* extract bits in unknown positions > k from u, rest from t.value */ + w = (u & t.mask) | t.value; + res = w; + } + return res; +} diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bb12ba020649..401d6c4960ec 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2379,6 +2379,9 @@ static void __update_reg32_bounds(struct bpf_reg_state *reg) static void __update_reg64_bounds(struct bpf_reg_state *reg) { + u64 tnum_next, tmax; + bool umin_in_tnum; + /* min signed is max(sign bit) | min(other bits) */ reg->smin_value = max_t(s64, reg->smin_value, reg->var_off.value | (reg->var_off.mask & S64_MIN)); @@ -2388,6 +2391,33 @@ static void __update_reg64_bounds(struct bpf_reg_state *reg) reg->umin_value = max(reg->umin_value, reg->var_off.value); reg->umax_value = min(reg->umax_value, reg->var_off.value | reg->var_off.mask); + + /* Check if u64 and tnum overlap in a single value */ + tnum_next = tnum_step(reg->var_off, reg->umin_value); + umin_in_tnum = (reg->umin_value & ~reg->var_off.mask) == reg->var_off.value; + tmax = reg->var_off.value | reg->var_off.mask; + if (umin_in_tnum && tnum_next > reg->umax_value) { + /* The u64 range and the tnum only overlap in umin. + * u64: ---[xxxxxx]----- + * tnum: --xx----------x- + */ + ___mark_reg_known(reg, reg->umin_value); + } else if (!umin_in_tnum && tnum_next == tmax) { + /* The u64 range and the tnum only overlap in the maximum value + * represented by the tnum, called tmax. + * u64: ---[xxxxxx]----- + * tnum: xx-----x-------- + */ + ___mark_reg_known(reg, tmax); + } else if (!umin_in_tnum && tnum_next <= reg->umax_value && + tnum_step(reg->var_off, tnum_next) > reg->umax_value) { + /* The u64 range and the tnum only overlap in between umin + * (excluded) and umax. + * u64: ---[xxxxxx]----- + * tnum: xx----x-------x- + */ + ___mark_reg_known(reg, tnum_next); + } } static void __update_reg_bounds(struct bpf_reg_state *reg) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 9bc0dfd235af..0b040a417442 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2454,8 +2454,10 @@ static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link, struct seq_file *seq) { struct bpf_kprobe_multi_link *kmulti_link; + bool has_cookies; kmulti_link = container_of(link, struct bpf_kprobe_multi_link, link); + has_cookies = !!kmulti_link->cookies; seq_printf(seq, "kprobe_cnt:\t%u\n" @@ -2467,7 +2469,7 @@ static void bpf_kprobe_multi_show_fdinfo(const struct bpf_link *link, for (int i = 0; i < kmulti_link->cnt; i++) { seq_printf(seq, "%llu\t %pS\n", - kmulti_link->cookies[i], + has_cookies ? kmulti_link->cookies[i] : 0, (void *)kmulti_link->addrs[i]); } } |
