From f8380f976804533df4c6c3d3a0b2cd03c2d262bc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Dec 2025 17:52:06 +0100 Subject: rseq: Provide static branch for time slice extensions Guard the time slice extension functionality with a static key, which can be disabled on the kernel command line. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20251215155708.733429292@linutronix.de --- include/linux/rseq_entry.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux/rseq_entry.h') diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index a36b472627de..d0ec4714ffd6 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -75,6 +75,17 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled); #define rseq_inline __always_inline #endif +#ifdef CONFIG_RSEQ_SLICE_EXTENSION +DECLARE_STATIC_KEY_TRUE(rseq_slice_extension_key); + +static __always_inline bool rseq_slice_extension_enabled(void) +{ + return static_branch_likely(&rseq_slice_extension_key); +} +#else /* CONFIG_RSEQ_SLICE_EXTENSION */ +static inline bool rseq_slice_extension_enabled(void) { return false; } +#endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ + bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); bool rseq_debug_validate_ids(struct task_struct *t); -- cgit v1.2.3 From b5b8282441bc4f8f1ff505e19d566dbd7b805761 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Dec 2025 17:52:09 +0100 Subject: rseq: Add statistics for time slice extensions Extend the quick statistics with time slice specific fields. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20251215155708.795202254@linutronix.de --- include/linux/rseq_entry.h | 5 +++++ kernel/rseq.c | 14 ++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'include/linux/rseq_entry.h') diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index d0ec4714ffd6..54d8e338b26e 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -15,6 +15,11 @@ struct rseq_stats { unsigned long cs; unsigned long clear; unsigned long fixup; + unsigned long s_granted; + unsigned long s_expired; + unsigned long s_revoked; + unsigned long s_yielded; + unsigned long s_aborted; }; DECLARE_PER_CPU(struct rseq_stats, rseq_stats); diff --git a/kernel/rseq.c b/kernel/rseq.c index bf75268580ef..415d75b6df2c 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -138,6 +138,13 @@ static int rseq_stats_show(struct seq_file *m, void *p) stats.cs += data_race(per_cpu(rseq_stats.cs, cpu)); stats.clear += data_race(per_cpu(rseq_stats.clear, cpu)); stats.fixup += data_race(per_cpu(rseq_stats.fixup, cpu)); + if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION)) { + stats.s_granted += data_race(per_cpu(rseq_stats.s_granted, cpu)); + stats.s_expired += data_race(per_cpu(rseq_stats.s_expired, cpu)); + stats.s_revoked += data_race(per_cpu(rseq_stats.s_revoked, cpu)); + stats.s_yielded += data_race(per_cpu(rseq_stats.s_yielded, cpu)); + stats.s_aborted += data_race(per_cpu(rseq_stats.s_aborted, cpu)); + } } seq_printf(m, "exit: %16lu\n", stats.exit); @@ -148,6 +155,13 @@ static int rseq_stats_show(struct seq_file *m, void *p) seq_printf(m, "cs: %16lu\n", stats.cs); seq_printf(m, "clear: %16lu\n", stats.clear); seq_printf(m, "fixup: %16lu\n", stats.fixup); + if (IS_ENABLED(CONFIG_RSEQ_SLICE_EXTENSION)) { + seq_printf(m, "sgrant: %16lu\n", stats.s_granted); + seq_printf(m, "sexpir: %16lu\n", stats.s_expired); + seq_printf(m, "srevok: %16lu\n", stats.s_revoked); + seq_printf(m, "syield: %16lu\n", stats.s_yielded); + seq_printf(m, "sabort: %16lu\n", stats.s_aborted); + } return 0; } -- cgit v1.2.3 From 0ac3b5c3dc45085b28a10ee730fb2860841f08ef Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Dec 2025 17:52:22 +0100 Subject: rseq: Implement time slice extension enforcement timer If a time slice extension is granted and the reschedule delayed, the kernel has to ensure that user space cannot abuse the extension and exceed the maximum granted time. It was suggested to implement this via the existing hrtick() timer in the scheduler, but that turned out to be problematic for several reasons: 1) It creates a dependency on CONFIG_SCHED_HRTICK, which can be disabled independently of CONFIG_HIGHRES_TIMERS 2) HRTICK usage in the scheduler can be runtime disabled or is only used for certain aspects of scheduling. 3) The function is calling into the scheduler code and that might have unexpected consequences when this is invoked due to a time slice enforcement expiry. Especially when the task managed to clear the grant via sched_yield(0). It would be possible to address #2 and #3 by storing state in the scheduler, but that is extra complexity and fragility for no value. Implement a dedicated per CPU hrtimer instead, which is solely used for the purpose of time slice enforcement. The timer is armed when an extension was granted right before actually returning to user mode in rseq_exit_to_user_mode_restart(). It is disarmed, when the task relinquishes the CPU. This is expensive as the timer is probably the first expiring timer on the CPU, which means it has to reprogram the hardware. But that's less expensive than going through a full hrtimer interrupt cycle for nothing. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mathieu Desnoyers Link: https://patch.msgid.link/20251215155709.068329497@linutronix.de --- Documentation/admin-guide/sysctl/kernel.rst | 11 +++ include/linux/rseq_entry.h | 38 +++++--- include/linux/rseq_types.h | 2 + kernel/rseq.c | 132 +++++++++++++++++++++++++++- 4 files changed, 170 insertions(+), 13 deletions(-) (limited to 'include/linux/rseq_entry.h') diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 239da22c4e28..b09d18e0f75b 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1248,6 +1248,17 @@ reboot-cmd (SPARC only) ROM/Flash boot loader. Maybe to tell it what to do after rebooting. ??? +rseq_slice_extension_nsec +========================= + +A task can request to delay its scheduling if it is in a critical section +via the prctl(PR_RSEQ_SLICE_EXTENSION_SET) mechanism. This sets the maximum +allowed extension in nanoseconds before scheduling of the task is enforced. +Default value is 10000ns (10us). The possible range is 10000ns (10us) to +50000ns (50us). + +This value has a direct correlation to the worst case scheduling latency; +increment at your own risk. sched_energy_aware ================== diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index 54d8e338b26e..8d04611056aa 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -87,8 +87,24 @@ static __always_inline bool rseq_slice_extension_enabled(void) { return static_branch_likely(&rseq_slice_extension_key); } + +extern unsigned int rseq_slice_ext_nsecs; +bool __rseq_arm_slice_extension_timer(void); + +static __always_inline bool rseq_arm_slice_extension_timer(void) +{ + if (!rseq_slice_extension_enabled()) + return false; + + if (likely(!current->rseq.slice.state.granted)) + return false; + + return __rseq_arm_slice_extension_timer(); +} + #else /* CONFIG_RSEQ_SLICE_EXTENSION */ static inline bool rseq_slice_extension_enabled(void) { return false; } +static inline bool rseq_arm_slice_extension_timer(void) { return false; } #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); @@ -543,17 +559,19 @@ static __always_inline void clear_tif_rseq(void) { } static __always_inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work) { - if (likely(!test_tif_rseq(ti_work))) - return false; - - if (unlikely(__rseq_exit_to_user_mode_restart(regs))) { - current->rseq.event.slowpath = true; - set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); - return true; + if (unlikely(test_tif_rseq(ti_work))) { + if (unlikely(__rseq_exit_to_user_mode_restart(regs))) { + current->rseq.event.slowpath = true; + set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); + return true; + } + clear_tif_rseq(); } - - clear_tif_rseq(); - return false; + /* + * Arm the slice extension timer if nothing to do anymore and the + * task really goes out to user space. + */ + return rseq_arm_slice_extension_timer(); } #else /* CONFIG_GENERIC_ENTRY */ diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h index 8c540e775161..8a2e76c5d2a8 100644 --- a/include/linux/rseq_types.h +++ b/include/linux/rseq_types.h @@ -89,10 +89,12 @@ union rseq_slice_state { /** * struct rseq_slice - Status information for rseq time slice extension * @state: Time slice extension state + * @expires: The time when a grant expires * @yielded: Indicator for rseq_slice_yield() */ struct rseq_slice { union rseq_slice_state state; + u64 expires; u8 yielded; }; diff --git a/kernel/rseq.c b/kernel/rseq.c index 8aa4821e3979..275d70114107 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -71,6 +71,8 @@ #define RSEQ_BUILD_SLOW_PATH #include +#include +#include #include #include #include @@ -500,8 +502,91 @@ efault: } #ifdef CONFIG_RSEQ_SLICE_EXTENSION +struct slice_timer { + struct hrtimer timer; + void *cookie; +}; + +unsigned int rseq_slice_ext_nsecs __read_mostly = 10 * NSEC_PER_USEC; +static DEFINE_PER_CPU(struct slice_timer, slice_timer); DEFINE_STATIC_KEY_TRUE(rseq_slice_extension_key); +/* + * When the timer expires and the task is still in user space, the return + * from interrupt will revoke the grant and schedule. If the task already + * entered the kernel via a syscall and the timer fires before the syscall + * work was able to cancel it, then depending on the preemption model this + * will either reschedule on return from interrupt or in the syscall work + * below. + */ +static enum hrtimer_restart rseq_slice_expired(struct hrtimer *tmr) +{ + struct slice_timer *st = container_of(tmr, struct slice_timer, timer); + + /* + * Validate that the task which armed the timer is still on the + * CPU. It could have been scheduled out without canceling the + * timer. + */ + if (st->cookie == current && current->rseq.slice.state.granted) { + rseq_stat_inc(rseq_stats.s_expired); + set_need_resched_current(); + } + return HRTIMER_NORESTART; +} + +bool __rseq_arm_slice_extension_timer(void) +{ + struct slice_timer *st = this_cpu_ptr(&slice_timer); + struct task_struct *curr = current; + + lockdep_assert_irqs_disabled(); + + /* + * This check prevents a task, which got a time slice extension + * granted, from exceeding the maximum scheduling latency when the + * grant expired before going out to user space. Don't bother to + * clear the grant here, it will be cleaned up automatically before + * going out to user space after being scheduled back in. + */ + if ((unlikely(curr->rseq.slice.expires < ktime_get_mono_fast_ns()))) { + set_need_resched_current(); + return true; + } + + /* + * Store the task pointer as a cookie for comparison in the timer + * function. This is safe as the timer is CPU local and cannot be + * in the expiry function at this point. + */ + st->cookie = curr; + hrtimer_start(&st->timer, curr->rseq.slice.expires, HRTIMER_MODE_ABS_PINNED_HARD); + /* Arm the syscall entry work */ + set_task_syscall_work(curr, SYSCALL_RSEQ_SLICE); + return false; +} + +static void rseq_cancel_slice_extension_timer(void) +{ + struct slice_timer *st = this_cpu_ptr(&slice_timer); + + /* + * st->cookie can be safely read as preemption is disabled and the + * timer is CPU local. + * + * As this is most probably the first expiring timer, the cancel is + * expensive as it has to reprogram the hardware, but that's less + * expensive than going through a full hrtimer_interrupt() cycle + * for nothing. + * + * hrtimer_try_to_cancel() is sufficient here as the timer is CPU + * local and once the hrtimer code disabled interrupts the timer + * callback cannot be running. + */ + if (st->cookie == current) + hrtimer_try_to_cancel(&st->timer); +} + static inline void rseq_slice_set_need_resched(struct task_struct *curr) { /* @@ -563,11 +648,14 @@ void rseq_syscall_enter_work(long syscall) return; /* - * Required to make set_tsk_need_resched() correct on PREEMPT[RT] - * kernels. Leaving the scope will reschedule on preemption models - * FULL, LAZY and RT if necessary. + * Required to stabilize the per CPU timer pointer and to make + * set_tsk_need_resched() correct on PREEMPT[RT] kernels. + * + * Leaving the scope will reschedule on preemption models FULL, + * LAZY and RT if necessary. */ scoped_guard(preempt) { + rseq_cancel_slice_extension_timer(); /* * Now that preemption is disabled, quickly check whether * the task was already rescheduled before arriving here. @@ -665,6 +753,31 @@ SYSCALL_DEFINE0(rseq_slice_yield) return yielded; } +#ifdef CONFIG_SYSCTL +static const unsigned int rseq_slice_ext_nsecs_min = 10 * NSEC_PER_USEC; +static const unsigned int rseq_slice_ext_nsecs_max = 50 * NSEC_PER_USEC; + +static const struct ctl_table rseq_slice_ext_sysctl[] = { + { + .procname = "rseq_slice_extension_nsec", + .data = &rseq_slice_ext_nsecs, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = (unsigned int *)&rseq_slice_ext_nsecs_min, + .extra2 = (unsigned int *)&rseq_slice_ext_nsecs_max, + }, +}; + +static void rseq_slice_sysctl_init(void) +{ + if (rseq_slice_extension_enabled()) + register_sysctl_init("kernel", rseq_slice_ext_sysctl); +} +#else /* CONFIG_SYSCTL */ +static inline void rseq_slice_sysctl_init(void) { } +#endif /* !CONFIG_SYSCTL */ + static int __init rseq_slice_cmdline(char *str) { bool on; @@ -677,4 +790,17 @@ static int __init rseq_slice_cmdline(char *str) return 1; } __setup("rseq_slice_ext=", rseq_slice_cmdline); + +static int __init rseq_slice_init(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + hrtimer_setup(per_cpu_ptr(&slice_timer.timer, cpu), rseq_slice_expired, + CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED_HARD); + } + rseq_slice_sysctl_init(); + return 0; +} +device_initcall(rseq_slice_init); #endif /* CONFIG_RSEQ_SLICE_EXTENSION */ -- cgit v1.2.3 From 7ee58f98b59b0ec32ea8a92f0bc85cb46fcd3de3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Dec 2025 17:52:26 +0100 Subject: rseq: Reset slice extension when scheduled When a time slice extension was granted in the need_resched() check on exit to user space, the task can still be scheduled out in one of the other pending work items. When it gets scheduled back in, and need_resched() is not set, then the stale grant would be preserved, which is just wrong. RSEQ already keeps track of that and sets TIF_RSEQ, which invokes the critical section and ID update mechanisms. Utilize them and clear the user space slice control member of struct rseq unconditionally within the existing user access sections. That's just an unconditional store more in that path. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mathieu Desnoyers Link: https://patch.msgid.link/20251215155709.131081527@linutronix.de --- include/linux/rseq_entry.h | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux/rseq_entry.h') diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index 8d04611056aa..fc77b9dd0932 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -102,9 +102,17 @@ static __always_inline bool rseq_arm_slice_extension_timer(void) return __rseq_arm_slice_extension_timer(); } +static __always_inline void rseq_slice_clear_grant(struct task_struct *t) +{ + if (IS_ENABLED(CONFIG_RSEQ_STATS) && t->rseq.slice.state.granted) + rseq_stat_inc(rseq_stats.s_revoked); + t->rseq.slice.state.granted = false; +} + #else /* CONFIG_RSEQ_SLICE_EXTENSION */ static inline bool rseq_slice_extension_enabled(void) { return false; } static inline bool rseq_arm_slice_extension_timer(void) { return false; } +static inline void rseq_slice_clear_grant(struct task_struct *t) { } #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); @@ -391,8 +399,15 @@ bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault); if (csaddr) unsafe_get_user(*csaddr, &rseq->rseq_cs, efault); + + /* Open coded, so it's in the same user access region */ + if (rseq_slice_extension_enabled()) { + /* Unconditionally clear it, no point in conditionals */ + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); + } } + rseq_slice_clear_grant(t); /* Cache the new values */ t->rseq.ids.cpu_cid = ids->cpu_cid; rseq_stat_inc(rseq_stats.ids); @@ -488,8 +503,17 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t */ u64 csaddr; - if (unlikely(get_user_inline(csaddr, &rseq->rseq_cs))) - return false; + scoped_user_rw_access(rseq, efault) { + unsafe_get_user(csaddr, &rseq->rseq_cs, efault); + + /* Open coded, so it's in the same user access region */ + if (rseq_slice_extension_enabled()) { + /* Unconditionally clear it, no point in conditionals */ + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); + } + } + + rseq_slice_clear_grant(t); if (static_branch_unlikely(&rseq_debug_enabled) || unlikely(csaddr)) { if (unlikely(!rseq_update_user_cs(t, regs, csaddr))) @@ -505,6 +529,8 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t u32 node_id = cpu_to_node(ids.cpu_id); return rseq_update_usr(t, regs, &ids, node_id); +efault: + return false; } static __always_inline bool __rseq_exit_to_user_mode_restart(struct pt_regs *regs) -- cgit v1.2.3 From dfb630f548a7c715efb0651c6abf334dca75cd52 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 15 Dec 2025 17:52:28 +0100 Subject: rseq: Implement rseq_grant_slice_extension() Provide the actual decision function, which decides whether a time slice extension is granted in the exit to user mode path when NEED_RESCHED is evaluated. The decision is made in two stages. First an inline quick check to avoid going into the actual decision function. This checks whether: #1 the functionality is enabled #2 the exit is a return from interrupt to user mode #3 any TIF bit, which causes extra work is set. That includes TIF_RSEQ, which means the task was already scheduled out. The slow path, which implements the actual user space ABI, is invoked when: A) #1 is true, #2 is true and #3 is false It checks whether user space requested a slice extension by setting the request bit in the rseq slice_ctrl field. If so, it grants the extension and stores the slice expiry time, so that the actual exit code can double check whether the slice is already exhausted before going back. B) #1 - #3 are true _and_ a slice extension was granted in a previous loop iteration In this case the grant is revoked. In case that the user space access faults or invalid state is detected, the task is terminated with SIGSEGV. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20251215155709.195303303@linutronix.de --- include/linux/rseq_entry.h | 108 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) (limited to 'include/linux/rseq_entry.h') diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index fc77b9dd0932..cbc4a791618b 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -42,6 +42,7 @@ DECLARE_PER_CPU(struct rseq_stats, rseq_stats); #ifdef CONFIG_RSEQ #include #include +#include #include #include @@ -109,10 +110,116 @@ static __always_inline void rseq_slice_clear_grant(struct task_struct *t) t->rseq.slice.state.granted = false; } +static __always_inline bool rseq_grant_slice_extension(bool work_pending) +{ + struct task_struct *curr = current; + struct rseq_slice_ctrl usr_ctrl; + union rseq_slice_state state; + struct rseq __user *rseq; + + if (!rseq_slice_extension_enabled()) + return false; + + /* If not enabled or not a return from interrupt, nothing to do. */ + state = curr->rseq.slice.state; + state.enabled &= curr->rseq.event.user_irq; + if (likely(!state.state)) + return false; + + rseq = curr->rseq.usrptr; + scoped_user_rw_access(rseq, efault) { + + /* + * Quick check conditions where a grant is not possible or + * needs to be revoked. + * + * 1) Any TIF bit which needs to do extra work aside of + * rescheduling prevents a grant. + * + * 2) A previous rescheduling request resulted in a slice + * extension grant. + */ + if (unlikely(work_pending || state.granted)) { + /* Clear user control unconditionally. No point for checking */ + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); + rseq_slice_clear_grant(curr); + return false; + } + + unsafe_get_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault); + if (likely(!(usr_ctrl.request))) + return false; + + /* Grant the slice extention */ + usr_ctrl.request = 0; + usr_ctrl.granted = 1; + unsafe_put_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault); + } + + rseq_stat_inc(rseq_stats.s_granted); + + curr->rseq.slice.state.granted = true; + /* Store expiry time for arming the timer on the way out */ + curr->rseq.slice.expires = data_race(rseq_slice_ext_nsecs) + ktime_get_mono_fast_ns(); + /* + * This is racy against a remote CPU setting TIF_NEED_RESCHED in + * several ways: + * + * 1) + * CPU0 CPU1 + * clear_tsk() + * set_tsk() + * clear_preempt() + * Raise scheduler IPI on CPU0 + * --> IPI + * fold_need_resched() -> Folds correctly + * 2) + * CPU0 CPU1 + * set_tsk() + * clear_tsk() + * clear_preempt() + * Raise scheduler IPI on CPU0 + * --> IPI + * fold_need_resched() <- NOOP as TIF_NEED_RESCHED is false + * + * #1 is not any different from a regular remote reschedule as it + * sets the previously not set bit and then raises the IPI which + * folds it into the preempt counter + * + * #2 is obviously incorrect from a scheduler POV, but it's not + * differently incorrect than the code below clearing the + * reschedule request with the safety net of the timer. + * + * The important part is that the clearing is protected against the + * scheduler IPI and also against any other interrupt which might + * end up waking up a task and setting the bits in the middle of + * the operation: + * + * clear_tsk() + * ---> Interrupt + * wakeup_on_this_cpu() + * set_tsk() + * set_preempt() + * clear_preempt() + * + * which would be inconsistent state. + */ + scoped_guard(irq) { + clear_tsk_need_resched(curr); + clear_preempt_need_resched(); + } + return true; + +efault: + force_sig(SIGSEGV); + return false; +} + #else /* CONFIG_RSEQ_SLICE_EXTENSION */ static inline bool rseq_slice_extension_enabled(void) { return false; } static inline bool rseq_arm_slice_extension_timer(void) { return false; } static inline void rseq_slice_clear_grant(struct task_struct *t) { } +static inline bool rseq_grant_slice_extension(bool work_pending) { return false; } #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); @@ -671,6 +778,7 @@ static inline void rseq_syscall_exit_to_user_mode(void) { } static inline void rseq_irqentry_exit_to_user_mode(void) { } static inline void rseq_exit_to_user_mode_legacy(void) { } static inline void rseq_debug_syscall_return(struct pt_regs *regs) { } +static inline bool rseq_grant_slice_extension(bool work_pending) { return false; } #endif /* !CONFIG_RSEQ */ #endif /* _LINUX_RSEQ_ENTRY_H */ -- cgit v1.2.3