diff options
Diffstat (limited to 'include/linux/rseq_entry.h')
| -rw-r--r-- | include/linux/rseq_entry.h | 192 |
1 files changed, 180 insertions, 12 deletions
diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index a36b472627de..cbc4a791618b 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -15,6 +15,11 @@ struct rseq_stats { unsigned long cs; unsigned long clear; unsigned long fixup; + unsigned long s_granted; + unsigned long s_expired; + unsigned long s_revoked; + unsigned long s_yielded; + unsigned long s_aborted; }; DECLARE_PER_CPU(struct rseq_stats, rseq_stats); @@ -37,6 +42,7 @@ DECLARE_PER_CPU(struct rseq_stats, rseq_stats); #ifdef CONFIG_RSEQ #include <linux/jump_label.h> #include <linux/rseq.h> +#include <linux/sched/signal.h> #include <linux/uaccess.h> #include <linux/tracepoint-defs.h> @@ -75,6 +81,147 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled); #define rseq_inline __always_inline #endif +#ifdef CONFIG_RSEQ_SLICE_EXTENSION +DECLARE_STATIC_KEY_TRUE(rseq_slice_extension_key); + +static __always_inline bool rseq_slice_extension_enabled(void) +{ + return static_branch_likely(&rseq_slice_extension_key); +} + +extern unsigned int rseq_slice_ext_nsecs; +bool __rseq_arm_slice_extension_timer(void); + +static __always_inline bool rseq_arm_slice_extension_timer(void) +{ + if (!rseq_slice_extension_enabled()) + return false; + + if (likely(!current->rseq.slice.state.granted)) + return false; + + return __rseq_arm_slice_extension_timer(); +} + +static __always_inline void rseq_slice_clear_grant(struct task_struct *t) +{ + if (IS_ENABLED(CONFIG_RSEQ_STATS) && t->rseq.slice.state.granted) + rseq_stat_inc(rseq_stats.s_revoked); + t->rseq.slice.state.granted = false; +} + +static __always_inline bool rseq_grant_slice_extension(bool work_pending) +{ + struct task_struct *curr = current; + struct rseq_slice_ctrl usr_ctrl; + union rseq_slice_state state; + struct rseq __user *rseq; + + if (!rseq_slice_extension_enabled()) + return false; + + /* If not enabled or not a return from interrupt, nothing to do. */ + state = curr->rseq.slice.state; + state.enabled &= curr->rseq.event.user_irq; + if (likely(!state.state)) + return false; + + rseq = curr->rseq.usrptr; + scoped_user_rw_access(rseq, efault) { + + /* + * Quick check conditions where a grant is not possible or + * needs to be revoked. + * + * 1) Any TIF bit which needs to do extra work aside of + * rescheduling prevents a grant. + * + * 2) A previous rescheduling request resulted in a slice + * extension grant. + */ + if (unlikely(work_pending || state.granted)) { + /* Clear user control unconditionally. No point for checking */ + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); + rseq_slice_clear_grant(curr); + return false; + } + + unsafe_get_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault); + if (likely(!(usr_ctrl.request))) + return false; + + /* Grant the slice extention */ + usr_ctrl.request = 0; + usr_ctrl.granted = 1; + unsafe_put_user(usr_ctrl.all, &rseq->slice_ctrl.all, efault); + } + + rseq_stat_inc(rseq_stats.s_granted); + + curr->rseq.slice.state.granted = true; + /* Store expiry time for arming the timer on the way out */ + curr->rseq.slice.expires = data_race(rseq_slice_ext_nsecs) + ktime_get_mono_fast_ns(); + /* + * This is racy against a remote CPU setting TIF_NEED_RESCHED in + * several ways: + * + * 1) + * CPU0 CPU1 + * clear_tsk() + * set_tsk() + * clear_preempt() + * Raise scheduler IPI on CPU0 + * --> IPI + * fold_need_resched() -> Folds correctly + * 2) + * CPU0 CPU1 + * set_tsk() + * clear_tsk() + * clear_preempt() + * Raise scheduler IPI on CPU0 + * --> IPI + * fold_need_resched() <- NOOP as TIF_NEED_RESCHED is false + * + * #1 is not any different from a regular remote reschedule as it + * sets the previously not set bit and then raises the IPI which + * folds it into the preempt counter + * + * #2 is obviously incorrect from a scheduler POV, but it's not + * differently incorrect than the code below clearing the + * reschedule request with the safety net of the timer. + * + * The important part is that the clearing is protected against the + * scheduler IPI and also against any other interrupt which might + * end up waking up a task and setting the bits in the middle of + * the operation: + * + * clear_tsk() + * ---> Interrupt + * wakeup_on_this_cpu() + * set_tsk() + * set_preempt() + * clear_preempt() + * + * which would be inconsistent state. + */ + scoped_guard(irq) { + clear_tsk_need_resched(curr); + clear_preempt_need_resched(); + } + return true; + +efault: + force_sig(SIGSEGV); + return false; +} + +#else /* CONFIG_RSEQ_SLICE_EXTENSION */ +static inline bool rseq_slice_extension_enabled(void) { return false; } +static inline bool rseq_arm_slice_extension_timer(void) { return false; } +static inline void rseq_slice_clear_grant(struct task_struct *t) { } +static inline bool rseq_grant_slice_extension(bool work_pending) { return false; } +#endif /* !CONFIG_RSEQ_SLICE_EXTENSION */ + bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr); bool rseq_debug_validate_ids(struct task_struct *t); @@ -359,8 +506,15 @@ bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids, unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault); if (csaddr) unsafe_get_user(*csaddr, &rseq->rseq_cs, efault); + + /* Open coded, so it's in the same user access region */ + if (rseq_slice_extension_enabled()) { + /* Unconditionally clear it, no point in conditionals */ + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); + } } + rseq_slice_clear_grant(t); /* Cache the new values */ t->rseq.ids.cpu_cid = ids->cpu_cid; rseq_stat_inc(rseq_stats.ids); @@ -456,8 +610,17 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t */ u64 csaddr; - if (unlikely(get_user_inline(csaddr, &rseq->rseq_cs))) - return false; + scoped_user_rw_access(rseq, efault) { + unsafe_get_user(csaddr, &rseq->rseq_cs, efault); + + /* Open coded, so it's in the same user access region */ + if (rseq_slice_extension_enabled()) { + /* Unconditionally clear it, no point in conditionals */ + unsafe_put_user(0U, &rseq->slice_ctrl.all, efault); + } + } + + rseq_slice_clear_grant(t); if (static_branch_unlikely(&rseq_debug_enabled) || unlikely(csaddr)) { if (unlikely(!rseq_update_user_cs(t, regs, csaddr))) @@ -473,6 +636,8 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t u32 node_id = cpu_to_node(ids.cpu_id); return rseq_update_usr(t, regs, &ids, node_id); +efault: + return false; } static __always_inline bool __rseq_exit_to_user_mode_restart(struct pt_regs *regs) @@ -527,17 +692,19 @@ static __always_inline void clear_tif_rseq(void) { } static __always_inline bool rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work) { - if (likely(!test_tif_rseq(ti_work))) - return false; - - if (unlikely(__rseq_exit_to_user_mode_restart(regs))) { - current->rseq.event.slowpath = true; - set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); - return true; + if (unlikely(test_tif_rseq(ti_work))) { + if (unlikely(__rseq_exit_to_user_mode_restart(regs))) { + current->rseq.event.slowpath = true; + set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); + return true; + } + clear_tif_rseq(); } - - clear_tif_rseq(); - return false; + /* + * Arm the slice extension timer if nothing to do anymore and the + * task really goes out to user space. + */ + return rseq_arm_slice_extension_timer(); } #else /* CONFIG_GENERIC_ENTRY */ @@ -611,6 +778,7 @@ static inline void rseq_syscall_exit_to_user_mode(void) { } static inline void rseq_irqentry_exit_to_user_mode(void) { } static inline void rseq_exit_to_user_mode_legacy(void) { } static inline void rseq_debug_syscall_return(struct pt_regs *regs) { } +static inline bool rseq_grant_slice_extension(bool work_pending) { return false; } #endif /* !CONFIG_RSEQ */ #endif /* _LINUX_RSEQ_ENTRY_H */ |
