summaryrefslogtreecommitdiff
path: root/include/linux/rseq_entry.h
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2025-10-27 09:45:08 +0100
committerIngo Molnar <mingo@kernel.org>2025-11-04 08:33:33 +0100
commit0f085b41880e3140efa6941ff2b8fd43bac4d659 (patch)
tree441ac375157092ba3b346b5f661acbddda7185dd /include/linux/rseq_entry.h
parenteaa9088d568c84afd72fa32dbe01833aef861d0d (diff)
rseq: Provide and use rseq_set_ids()
Provide a new and straight forward implementation to set the IDs (CPU ID, Node ID and MM CID), which can be later inlined into the fast path. It does all operations in one scoped_user_rw_access() section and retrieves also the critical section member (rseq::cs_rseq) from user space to avoid another user..begin/end() pair. This is in preparation for optimizing the fast path to avoid extra work when not required. On rseq registration set the CPU ID fields to RSEQ_CPU_ID_UNINITIALIZED and node and MM CID to zero. That's the same as the kernel internal reset values. That makes the debug validation in the exit code work correctly on the first exit to user space. Use it to replace the whole related zoo in rseq.c Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Ingo Molnar <mingo@kernel.org> Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Link: https://patch.msgid.link/20251027084307.393972266@linutronix.de
Diffstat (limited to 'include/linux/rseq_entry.h')
-rw-r--r--include/linux/rseq_entry.h89
1 files changed, 89 insertions, 0 deletions
diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h
index fb53a6ff05d7..37444e80fd45 100644
--- a/include/linux/rseq_entry.h
+++ b/include/linux/rseq_entry.h
@@ -75,6 +75,7 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);
#endif
bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
+bool rseq_debug_validate_ids(struct task_struct *t);
static __always_inline void rseq_note_user_irq_entry(void)
{
@@ -194,6 +195,43 @@ efault:
return false;
}
+/*
+ * On debug kernels validate that user space did not mess with it if the
+ * debug branch is enabled.
+ */
+bool rseq_debug_validate_ids(struct task_struct *t)
+{
+ struct rseq __user *rseq = t->rseq.usrptr;
+ u32 cpu_id, uval, node_id;
+
+ /*
+ * On the first exit after registering the rseq region CPU ID is
+ * RSEQ_CPU_ID_UNINITIALIZED and node_id in user space is 0!
+ */
+ node_id = t->rseq.ids.cpu_id != RSEQ_CPU_ID_UNINITIALIZED ?
+ cpu_to_node(t->rseq.ids.cpu_id) : 0;
+
+ scoped_user_read_access(rseq, efault) {
+ unsafe_get_user(cpu_id, &rseq->cpu_id_start, efault);
+ if (cpu_id != t->rseq.ids.cpu_id)
+ goto die;
+ unsafe_get_user(uval, &rseq->cpu_id, efault);
+ if (uval != cpu_id)
+ goto die;
+ unsafe_get_user(uval, &rseq->node_id, efault);
+ if (uval != node_id)
+ goto die;
+ unsafe_get_user(uval, &rseq->mm_cid, efault);
+ if (uval != t->rseq.ids.mm_cid)
+ goto die;
+ }
+ return true;
+die:
+ t->rseq.event.fatal = true;
+efault:
+ return false;
+}
+
#endif /* RSEQ_BUILD_SLOW_PATH */
/*
@@ -279,6 +317,57 @@ efault:
return false;
}
+/*
+ * Updates CPU ID, Node ID and MM CID and reads the critical section
+ * address, when @csaddr != NULL. This allows to put the ID update and the
+ * read under the same uaccess region to spare a separate begin/end.
+ *
+ * As this is either invoked from a C wrapper with @csaddr = NULL or from
+ * the fast path code with a valid pointer, a clever compiler should be
+ * able to optimize the read out. Spares a duplicate implementation.
+ *
+ * Returns true, if the operation was successful, false otherwise.
+ *
+ * In the failure case task::rseq_event::fatal is set when invalid data
+ * was found on debug kernels. It's clear when the failure was an unresolved page
+ * fault.
+ *
+ * If inlined into the exit to user path with interrupts disabled, the
+ * caller has to protect against page faults with pagefault_disable().
+ *
+ * In preemptible task context this would be counterproductive as the page
+ * faults could not be fully resolved. As a consequence unresolved page
+ * faults in task context are fatal too.
+ */
+static rseq_inline
+bool rseq_set_ids_get_csaddr(struct task_struct *t, struct rseq_ids *ids,
+ u32 node_id, u64 *csaddr)
+{
+ struct rseq __user *rseq = t->rseq.usrptr;
+
+ if (static_branch_unlikely(&rseq_debug_enabled)) {
+ if (!rseq_debug_validate_ids(t))
+ return false;
+ }
+
+ scoped_user_rw_access(rseq, efault) {
+ unsafe_put_user(ids->cpu_id, &rseq->cpu_id_start, efault);
+ unsafe_put_user(ids->cpu_id, &rseq->cpu_id, efault);
+ unsafe_put_user(node_id, &rseq->node_id, efault);
+ unsafe_put_user(ids->mm_cid, &rseq->mm_cid, efault);
+ if (csaddr)
+ unsafe_get_user(*csaddr, &rseq->rseq_cs, efault);
+ }
+
+ /* Cache the new values */
+ t->rseq.ids.cpu_cid = ids->cpu_cid;
+ rseq_stat_inc(rseq_stats.ids);
+ rseq_trace_update(t, ids);
+ return true;
+efault:
+ return false;
+}
+
static __always_inline void rseq_exit_to_user_mode(void)
{
struct rseq_event *ev = &current->rseq.event;