summaryrefslogtreecommitdiff
path: root/kernel/bpf/devmap.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-02-28 19:54:28 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2026-02-28 19:54:28 -0800
commiteb71ab2bf72260054677e348498ba995a057c463 (patch)
tree425776573c762c80fd1e173f92537741300fb8d1 /kernel/bpf/devmap.c
parent63a43faf6a68ce0045c874b32e60acac2089a41a (diff)
parentb9c0a5c48396aea4cde25fc701027ebbc5d78de1 (diff)
Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Pull bpf fixes from Alexei Starovoitov: - Fix alignment of arm64 JIT buffer to prevent atomic tearing (Fuad Tabba) - Fix invariant violation for single value tnums in the verifier (Harishankar Vishwanathan, Paul Chaignon) - Fix a bunch of issues found by ASAN in selftests/bpf (Ihor Solodrai) - Fix race in devmpa and cpumap on PREEMPT_RT (Jiayuan Chen) - Fix show_fdinfo of kprobe_multi when cookies are not present (Jiri Olsa) - Fix race in freeing special fields in BPF maps to prevent memory leaks (Kumar Kartikeya Dwivedi) - Fix OOB read in dmabuf_collector (T.J. Mercier) * tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: (36 commits) selftests/bpf: Avoid simplification of crafted bounds test selftests/bpf: Test refinement of single-value tnum bpf: Improve bounds when tnum has a single possible value bpf: Introduce tnum_step to step through tnum's members bpf: Fix race in devmap on PREEMPT_RT bpf: Fix race in cpumap on PREEMPT_RT selftests/bpf: Add tests for special fields races bpf: Retire rcu_trace_implies_rcu_gp() from local storage bpf: Delay freeing fields in local storage bpf: Lose const-ness of map in map_check_btf() bpf: Register dtor for freeing special fields selftests/bpf: Fix OOB read in dmabuf_collector selftests/bpf: Fix a memory leak in xdp_flowtable test bpf: Fix stack-out-of-bounds write in devmap bpf: Fix kprobe_multi cookies access in show_fdinfo callback bpf, arm64: Force 8-byte alignment for JIT buffer to prevent atomic tearing selftests/bpf: Don't override SIGSEGV handler with ASAN selftests/bpf: Check BPFTOOL env var in detect_bpftool_path() selftests/bpf: Fix out-of-bounds array access bugs reported by ASAN selftests/bpf: Fix array bounds warning in jit_disasm_helpers ...
Diffstat (limited to 'kernel/bpf/devmap.c')
-rw-r--r--kernel/bpf/devmap.c47
1 files changed, 38 insertions, 9 deletions
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 2625601de76e..3d619d01088e 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -45,6 +45,7 @@
* types of devmap; only the lookup and insertion is different.
*/
#include <linux/bpf.h>
+#include <linux/local_lock.h>
#include <net/xdp.h>
#include <linux/filter.h>
#include <trace/events/xdp.h>
@@ -60,6 +61,7 @@ struct xdp_dev_bulk_queue {
struct net_device *dev_rx;
struct bpf_prog *xdp_prog;
unsigned int count;
+ local_lock_t bq_lock;
};
struct bpf_dtab_netdev {
@@ -381,6 +383,8 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
int to_send = cnt;
int i;
+ lockdep_assert_held(&bq->bq_lock);
+
if (unlikely(!cnt))
return;
@@ -425,10 +429,12 @@ void __dev_flush(struct list_head *flush_list)
struct xdp_dev_bulk_queue *bq, *tmp;
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
+ local_lock_nested_bh(&bq->dev->xdp_bulkq->bq_lock);
bq_xmit_all(bq, XDP_XMIT_FLUSH);
bq->dev_rx = NULL;
bq->xdp_prog = NULL;
__list_del_clearprev(&bq->flush_node);
+ local_unlock_nested_bh(&bq->dev->xdp_bulkq->bq_lock);
}
}
@@ -451,12 +457,16 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
/* Runs in NAPI, i.e., softirq under local_bh_disable(). Thus, safe percpu
* variable access, and map elements stick around. See comment above
- * xdp_do_flush() in filter.c.
+ * xdp_do_flush() in filter.c. PREEMPT_RT relies on local_lock_nested_bh()
+ * to serialise access to the per-CPU bq.
*/
static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx, struct bpf_prog *xdp_prog)
{
- struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
+ struct xdp_dev_bulk_queue *bq;
+
+ local_lock_nested_bh(&dev->xdp_bulkq->bq_lock);
+ bq = this_cpu_ptr(dev->xdp_bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
bq_xmit_all(bq, 0);
@@ -477,6 +487,8 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
}
bq->q[bq->count++] = xdpf;
+
+ local_unlock_nested_bh(&dev->xdp_bulkq->bq_lock);
}
static inline int __xdp_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
@@ -588,18 +600,22 @@ static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifin
}
/* Get ifindex of each upper device. 'indexes' must be able to hold at
- * least MAX_NEST_DEV elements.
- * Returns the number of ifindexes added.
+ * least 'max' elements.
+ * Returns the number of ifindexes added, or -EOVERFLOW if there are too
+ * many upper devices.
*/
-static int get_upper_ifindexes(struct net_device *dev, int *indexes)
+static int get_upper_ifindexes(struct net_device *dev, int *indexes, int max)
{
struct net_device *upper;
struct list_head *iter;
int n = 0;
netdev_for_each_upper_dev_rcu(dev, upper, iter) {
+ if (n >= max)
+ return -EOVERFLOW;
indexes[n++] = upper->ifindex;
}
+
return n;
}
@@ -615,7 +631,11 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx,
int err;
if (exclude_ingress) {
- num_excluded = get_upper_ifindexes(dev_rx, excluded_devices);
+ num_excluded = get_upper_ifindexes(dev_rx, excluded_devices,
+ ARRAY_SIZE(excluded_devices) - 1);
+ if (num_excluded < 0)
+ return num_excluded;
+
excluded_devices[num_excluded++] = dev_rx->ifindex;
}
@@ -733,7 +753,11 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
int err;
if (exclude_ingress) {
- num_excluded = get_upper_ifindexes(dev, excluded_devices);
+ num_excluded = get_upper_ifindexes(dev, excluded_devices,
+ ARRAY_SIZE(excluded_devices) - 1);
+ if (num_excluded < 0)
+ return num_excluded;
+
excluded_devices[num_excluded++] = dev->ifindex;
}
@@ -1115,8 +1139,13 @@ static int dev_map_notification(struct notifier_block *notifier,
if (!netdev->xdp_bulkq)
return NOTIFY_BAD;
- for_each_possible_cpu(cpu)
- per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev;
+ for_each_possible_cpu(cpu) {
+ struct xdp_dev_bulk_queue *bq;
+
+ bq = per_cpu_ptr(netdev->xdp_bulkq, cpu);
+ bq->dev = netdev;
+ local_lock_init(&bq->bq_lock);
+ }
break;
case NETDEV_UNREGISTER:
/* This rcu_read_lock/unlock pair is needed because