diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-04 15:11:24 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-04 15:11:24 -0800 |
| commit | 3c7b4d1994f63d6fa3984d7d5ad06dbaad96f167 (patch) | |
| tree | 37029e996143b6a4b0081e3f53c86ae108e21e7a /kernel | |
| parent | 27db1ae6ecdf23f4176276da6037eaafbd23bf94 (diff) | |
| parent | 0eca95cba2b7bf7b7b4f2fa90734a85fcaa72782 (diff) | |
Merge tag 'sched_ext-for-6.19-rc8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext fix from Tejun Heo:
- Fix race where sched_class operations (sched_setscheduler() and
friends) could be invoked on dead tasks after sched_ext_dead()
already ran, causing invalid SCX task state transitions and NULL
pointer dereferences.
This was a regression from the cgroup exit ordering fix which
moved sched_ext_free() to finish_task_switch().
* tag 'sched_ext-for-6.19-rc8-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
sched_ext: Short-circuit sched_class operations on dead tasks
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched/ext.c | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index afe28c04d5aa..0bb8fa927e9e 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -194,6 +194,7 @@ MODULE_PARM_DESC(bypass_lb_intv_us, "bypass load balance interval in microsecond #include <trace/events/sched_ext.h> static void process_ddsp_deferred_locals(struct rq *rq); +static bool task_dead_and_done(struct task_struct *p); static u32 reenq_local(struct rq *rq); static void scx_kick_cpu(struct scx_sched *sch, s32 cpu, u64 flags); static bool scx_vexit(struct scx_sched *sch, enum scx_exit_kind kind, @@ -2619,6 +2620,9 @@ static void set_cpus_allowed_scx(struct task_struct *p, set_cpus_allowed_common(p, ac); + if (task_dead_and_done(p)) + return; + /* * The effective cpumask is stored in @p->cpus_ptr which may temporarily * differ from the configured one in @p->cpus_mask. Always tell the bpf @@ -3034,10 +3038,45 @@ void scx_cancel_fork(struct task_struct *p) percpu_up_read(&scx_fork_rwsem); } +/** + * task_dead_and_done - Is a task dead and done running? + * @p: target task + * + * Once sched_ext_dead() removes the dead task from scx_tasks and exits it, the + * task no longer exists from SCX's POV. However, certain sched_class ops may be + * invoked on these dead tasks leading to failures - e.g. sched_setscheduler() + * may try to switch a task which finished sched_ext_dead() back into SCX + * triggering invalid SCX task state transitions and worse. + * + * Once a task has finished the final switch, sched_ext_dead() is the only thing + * that needs to happen on the task. Use this test to short-circuit sched_class + * operations which may be called on dead tasks. + */ +static bool task_dead_and_done(struct task_struct *p) +{ + struct rq *rq = task_rq(p); + + lockdep_assert_rq_held(rq); + + /* + * In do_task_dead(), a dying task sets %TASK_DEAD with preemption + * disabled and __schedule(). If @p has %TASK_DEAD set and off CPU, @p + * won't ever run again. + */ + return unlikely(READ_ONCE(p->__state) == TASK_DEAD) && + !task_on_cpu(rq, p); +} + void sched_ext_dead(struct task_struct *p) { unsigned long flags; + /* + * By the time control reaches here, @p has %TASK_DEAD set, switched out + * for the last time and then dropped the rq lock - task_dead_and_done() + * should be returning %true nullifying the straggling sched_class ops. + * Remove from scx_tasks and exit @p. + */ raw_spin_lock_irqsave(&scx_tasks_lock, flags); list_del_init(&p->scx.tasks_node); raw_spin_unlock_irqrestore(&scx_tasks_lock, flags); @@ -3063,6 +3102,9 @@ static void reweight_task_scx(struct rq *rq, struct task_struct *p, lockdep_assert_rq_held(task_rq(p)); + if (task_dead_and_done(p)) + return; + p->scx.weight = sched_weight_to_cgroup(scale_load_down(lw->weight)); if (SCX_HAS_OP(sch, set_weight)) SCX_CALL_OP_TASK(sch, SCX_KF_REST, set_weight, rq, @@ -3077,6 +3119,9 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p) { struct scx_sched *sch = scx_root; + if (task_dead_and_done(p)) + return; + scx_enable_task(p); /* @@ -3090,6 +3135,9 @@ static void switching_to_scx(struct rq *rq, struct task_struct *p) static void switched_from_scx(struct rq *rq, struct task_struct *p) { + if (task_dead_and_done(p)) + return; + scx_disable_task(p); } |
