summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrii Nakryiko <andrii@kernel.org>2026-02-04 13:12:51 -0800
committerAndrii Nakryiko <andrii@kernel.org>2026-02-04 13:12:51 -0800
commit4af526698b15ec6cb3c8ad8085335b1d55692f00 (patch)
treeaba1b31ab5eb9b3bef324d359ef8fd054bf6e2ef
parent5e6e1dc43a217624087ce45bafd20ac2cfb3c190 (diff)
parent6e65cf81accf908d2480739b85dba4731048290d (diff)
Merge branch 'bpf-fix-conditions-when-timer-wq-can-be-called'
Alexei Starovoitov says: ==================== bpf: Fix conditions when timer/wq can be called From: Alexei Starovoitov <ast@kernel.org> v2->v3: - Add missing refcount_put - Detect recursion of indiviual async_cb v2: https://lore.kernel.org/bpf/20260204040834.22263-4-alexei.starovoitov@gmail.com/ v1->v2: - Add a recursion check v1: https://lore.kernel.org/bpf/20260204030927.171-1-alexei.starovoitov@gmail.com/ ==================== Link: https://patch.msgid.link/20260204055147.54960-1-alexei.starovoitov@gmail.com Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
-rw-r--r--kernel/bpf/helpers.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c33
-rw-r--r--tools/testing/selftests/bpf/progs/timer_start_deadlock.c70
3 files changed, 128 insertions, 6 deletions
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index d4aedac14a60..01052f8664eb 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1427,10 +1427,22 @@ static int bpf_async_update_prog_callback(struct bpf_async_cb *cb,
return 0;
}
+static DEFINE_PER_CPU(struct bpf_async_cb *, async_cb_running);
+
static int bpf_async_schedule_op(struct bpf_async_cb *cb, enum bpf_async_op op,
u64 nsec, u32 timer_mode)
{
- WARN_ON_ONCE(!in_hardirq());
+ /*
+ * Do not schedule another operation on this cpu if it's in irq_work
+ * callback that is processing async_cmds queue. Otherwise the following
+ * loop is possible:
+ * bpf_timer_start() -> bpf_async_schedule_op() -> irq_work_queue().
+ * irqrestore -> bpf_async_irq_worker() -> tracepoint -> bpf_timer_start().
+ */
+ if (this_cpu_read(async_cb_running) == cb) {
+ bpf_async_refcount_put(cb);
+ return -EDEADLK;
+ }
struct bpf_async_cmd *cmd = kmalloc_nolock(sizeof(*cmd), 0, NUMA_NO_NODE);
@@ -1473,6 +1485,11 @@ static const struct bpf_func_proto bpf_timer_set_callback_proto = {
.arg2_type = ARG_PTR_TO_FUNC,
};
+static bool defer_timer_wq_op(void)
+{
+ return in_hardirq() || irqs_disabled();
+}
+
BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, async, u64, nsecs, u64, flags)
{
struct bpf_hrtimer *t;
@@ -1500,7 +1517,7 @@ BPF_CALL_3(bpf_timer_start, struct bpf_async_kern *, async, u64, nsecs, u64, fla
if (!refcount_inc_not_zero(&t->cb.refcnt))
return -ENOENT;
- if (!in_hardirq()) {
+ if (!defer_timer_wq_op()) {
hrtimer_start(&t->timer, ns_to_ktime(nsecs), mode);
bpf_async_refcount_put(&t->cb);
return 0;
@@ -1524,7 +1541,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, async)
bool inc = false;
int ret = 0;
- if (in_hardirq())
+ if (defer_timer_wq_op())
return -EOPNOTSUPP;
t = READ_ONCE(async->timer);
@@ -1625,6 +1642,7 @@ static void bpf_async_irq_worker(struct irq_work *work)
return;
list = llist_reverse_order(list);
+ this_cpu_write(async_cb_running, cb);
llist_for_each_safe(pos, n, list) {
struct bpf_async_cmd *cmd;
@@ -1632,6 +1650,7 @@ static void bpf_async_irq_worker(struct irq_work *work)
bpf_async_process_op(cb, cmd->op, cmd->nsec, cmd->mode);
kfree_nolock(cmd);
}
+ this_cpu_write(async_cb_running, NULL);
}
static void bpf_async_cancel_and_free(struct bpf_async_kern *async)
@@ -1650,7 +1669,7 @@ static void bpf_async_cancel_and_free(struct bpf_async_kern *async)
* refcnt. Either synchronously or asynchronously in irq_work.
*/
- if (!in_hardirq()) {
+ if (!defer_timer_wq_op()) {
bpf_async_process_op(cb, BPF_ASYNC_CANCEL, 0, 0);
} else {
(void)bpf_async_schedule_op(cb, BPF_ASYNC_CANCEL, 0, 0);
@@ -3161,7 +3180,7 @@ __bpf_kfunc int bpf_wq_start(struct bpf_wq *wq, unsigned int flags)
if (!refcount_inc_not_zero(&w->cb.refcnt))
return -ENOENT;
- if (!in_hardirq()) {
+ if (!defer_timer_wq_op()) {
schedule_work(&w->work);
bpf_async_refcount_put(&w->cb);
return 0;
@@ -4461,7 +4480,7 @@ __bpf_kfunc int bpf_timer_cancel_async(struct bpf_timer *timer)
if (!refcount_inc_not_zero(&cb->refcnt))
return -ENOENT;
- if (!in_hardirq()) {
+ if (!defer_timer_wq_op()) {
struct bpf_hrtimer *t = container_of(cb, struct bpf_hrtimer, cb);
ret = hrtimer_try_to_cancel(&t->timer);
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c b/tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c
new file mode 100644
index 000000000000..9f1f9aec8888
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_start_deadlock.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "timer_start_deadlock.skel.h"
+
+void test_timer_start_deadlock(void)
+{
+ struct timer_start_deadlock *skel;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ skel = timer_start_deadlock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = timer_start_deadlock__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.start_timer);
+
+ /*
+ * Run the syscall program that attempts to deadlock.
+ * If the kernel deadlocks, this call will never return.
+ */
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "prog_test_run");
+ ASSERT_EQ(opts.retval, 0, "prog_retval");
+
+ ASSERT_EQ(skel->bss->tp_called, 1, "tp_called");
+cleanup:
+ timer_start_deadlock__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_start_deadlock.c b/tools/testing/selftests/bpf/progs/timer_start_deadlock.c
new file mode 100644
index 000000000000..019518ee18cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_start_deadlock.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define CLOCK_MONOTONIC 1
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer_map SEC(".maps");
+
+volatile int in_timer_start;
+volatile int tp_called;
+
+static int timer_cb(void *map, int *key, struct elem *value)
+{
+ return 0;
+}
+
+SEC("tp_btf/hrtimer_cancel")
+int BPF_PROG(tp_hrtimer_cancel, struct hrtimer *hrtimer)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ if (!in_timer_start)
+ return 0;
+
+ tp_called = 1;
+ timer = bpf_map_lookup_elem(&timer_map, &key);
+
+ /*
+ * Call bpf_timer_start() from the tracepoint within hrtimer logic
+ * on the same timer to make sure it doesn't deadlock.
+ */
+ bpf_timer_start(timer, 1000000000, 0);
+ return 0;
+}
+
+SEC("syscall")
+int start_timer(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer_map, &key);
+ /* claude may complain here that there is no NULL check. Ignoring it. */
+ bpf_timer_init(timer, &timer_map, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(timer, timer_cb);
+
+ /*
+ * call hrtimer_start() twice, so that 2nd call does
+ * remove_hrtimer() and trace_hrtimer_cancel() tracepoint.
+ */
+ in_timer_start = 1;
+ bpf_timer_start(timer, 1000000000, 0);
+ bpf_timer_start(timer, 1000000000, 0);
+ in_timer_start = 0;
+ return 0;
+}