summaryrefslogtreecommitdiff
path: root/kernel/rseq.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rseq.c')
-rw-r--r--kernel/rseq.c655
1 files changed, 280 insertions, 375 deletions
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 2452b7366b00..395d8b002350 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -8,98 +8,7 @@
* Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*/
-#include <linux/sched.h>
-#include <linux/uaccess.h>
-#include <linux/syscalls.h>
-#include <linux/rseq.h>
-#include <linux/types.h>
-#include <linux/ratelimit.h>
-#include <asm/ptrace.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/rseq.h>
-
-/* The original rseq structure size (including padding) is 32 bytes. */
-#define ORIG_RSEQ_SIZE 32
-
-#define RSEQ_CS_NO_RESTART_FLAGS (RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT | \
- RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
- RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)
-
-#ifdef CONFIG_DEBUG_RSEQ
-static struct rseq *rseq_kernel_fields(struct task_struct *t)
-{
- return (struct rseq *) t->rseq_fields;
-}
-
-static int rseq_validate_ro_fields(struct task_struct *t)
-{
- static DEFINE_RATELIMIT_STATE(_rs,
- DEFAULT_RATELIMIT_INTERVAL,
- DEFAULT_RATELIMIT_BURST);
- u32 cpu_id_start, cpu_id, node_id, mm_cid;
- struct rseq __user *rseq = t->rseq;
-
- /*
- * Validate fields which are required to be read-only by
- * user-space.
- */
- if (!user_read_access_begin(rseq, t->rseq_len))
- goto efault;
- unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end);
- unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end);
- unsafe_get_user(node_id, &rseq->node_id, efault_end);
- unsafe_get_user(mm_cid, &rseq->mm_cid, efault_end);
- user_read_access_end();
-
- if ((cpu_id_start != rseq_kernel_fields(t)->cpu_id_start ||
- cpu_id != rseq_kernel_fields(t)->cpu_id ||
- node_id != rseq_kernel_fields(t)->node_id ||
- mm_cid != rseq_kernel_fields(t)->mm_cid) && __ratelimit(&_rs)) {
-
- pr_warn("Detected rseq corruption for pid: %d, name: %s\n"
- "\tcpu_id_start: %u ?= %u\n"
- "\tcpu_id: %u ?= %u\n"
- "\tnode_id: %u ?= %u\n"
- "\tmm_cid: %u ?= %u\n",
- t->pid, t->comm,
- cpu_id_start, rseq_kernel_fields(t)->cpu_id_start,
- cpu_id, rseq_kernel_fields(t)->cpu_id,
- node_id, rseq_kernel_fields(t)->node_id,
- mm_cid, rseq_kernel_fields(t)->mm_cid);
- }
-
- /* For now, only print a console warning on mismatch. */
- return 0;
-
-efault_end:
- user_read_access_end();
-efault:
- return -EFAULT;
-}
-
-/*
- * Update an rseq field and its in-kernel copy in lock-step to keep a coherent
- * state.
- */
-#define rseq_unsafe_put_user(t, value, field, error_label) \
- do { \
- unsafe_put_user(value, &t->rseq->field, error_label); \
- rseq_kernel_fields(t)->field = value; \
- } while (0)
-
-#else
-static int rseq_validate_ro_fields(struct task_struct *t)
-{
- return 0;
-}
-
-#define rseq_unsafe_put_user(t, value, field, error_label) \
- unsafe_put_user(value, &t->rseq->field, error_label)
-#endif
-
/*
- *
* Restartable sequences are a lightweight interface that allows
* user-level code to be executed atomically relative to scheduler
* preemption and signal delivery. Typically used for implementing
@@ -158,356 +67,356 @@ static int rseq_validate_ro_fields(struct task_struct *t)
* F1. <failure>
*/
-static int rseq_update_cpu_node_id(struct task_struct *t)
-{
- struct rseq __user *rseq = t->rseq;
- u32 cpu_id = raw_smp_processor_id();
- u32 node_id = cpu_to_node(cpu_id);
- u32 mm_cid = task_mm_cid(t);
+/* Required to select the proper per_cpu ops for rseq_stats_inc() */
+#define RSEQ_BUILD_SLOW_PATH
- /*
- * Validate read-only rseq fields.
- */
- if (rseq_validate_ro_fields(t))
- goto efault;
- WARN_ON_ONCE((int) mm_cid < 0);
- if (!user_write_access_begin(rseq, t->rseq_len))
- goto efault;
+#include <linux/debugfs.h>
+#include <linux/ratelimit.h>
+#include <linux/rseq_entry.h>
+#include <linux/sched.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+#include <linux/types.h>
+#include <asm/ptrace.h>
- rseq_unsafe_put_user(t, cpu_id, cpu_id_start, efault_end);
- rseq_unsafe_put_user(t, cpu_id, cpu_id, efault_end);
- rseq_unsafe_put_user(t, node_id, node_id, efault_end);
- rseq_unsafe_put_user(t, mm_cid, mm_cid, efault_end);
+#define CREATE_TRACE_POINTS
+#include <trace/events/rseq.h>
- /*
- * Additional feature fields added after ORIG_RSEQ_SIZE
- * need to be conditionally updated only if
- * t->rseq_len != ORIG_RSEQ_SIZE.
- */
- user_write_access_end();
- trace_rseq_update(t);
- return 0;
+DEFINE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);
-efault_end:
- user_write_access_end();
-efault:
- return -EFAULT;
+static inline void rseq_control_debug(bool on)
+{
+ if (on)
+ static_branch_enable(&rseq_debug_enabled);
+ else
+ static_branch_disable(&rseq_debug_enabled);
}
-static int rseq_reset_rseq_cpu_node_id(struct task_struct *t)
+static int __init rseq_setup_debug(char *str)
{
- struct rseq __user *rseq = t->rseq;
- u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0,
- mm_cid = 0;
-
- /*
- * Validate read-only rseq fields.
- */
- if (rseq_validate_ro_fields(t))
- goto efault;
+ bool on;
- if (!user_write_access_begin(rseq, t->rseq_len))
- goto efault;
-
- /*
- * Reset all fields to their initial state.
- *
- * All fields have an initial state of 0 except cpu_id which is set to
- * RSEQ_CPU_ID_UNINITIALIZED, so that any user coming in after
- * unregistration can figure out that rseq needs to be registered
- * again.
- */
- rseq_unsafe_put_user(t, cpu_id_start, cpu_id_start, efault_end);
- rseq_unsafe_put_user(t, cpu_id, cpu_id, efault_end);
- rseq_unsafe_put_user(t, node_id, node_id, efault_end);
- rseq_unsafe_put_user(t, mm_cid, mm_cid, efault_end);
-
- /*
- * Additional feature fields added after ORIG_RSEQ_SIZE
- * need to be conditionally reset only if
- * t->rseq_len != ORIG_RSEQ_SIZE.
- */
- user_write_access_end();
- return 0;
-
-efault_end:
- user_write_access_end();
-efault:
- return -EFAULT;
+ if (kstrtobool(str, &on))
+ return -EINVAL;
+ rseq_control_debug(on);
+ return 1;
}
+__setup("rseq_debug=", rseq_setup_debug);
+#ifdef CONFIG_TRACEPOINTS
/*
- * Get the user-space pointer value stored in the 'rseq_cs' field.
+ * Out of line, so the actual update functions can be in a header to be
+ * inlined into the exit to user code.
*/
-static int rseq_get_rseq_cs_ptr_val(struct rseq __user *rseq, u64 *rseq_cs)
+void __rseq_trace_update(struct task_struct *t)
{
- if (!rseq_cs)
- return -EFAULT;
-
-#ifdef CONFIG_64BIT
- if (get_user(*rseq_cs, &rseq->rseq_cs))
- return -EFAULT;
-#else
- if (copy_from_user(rseq_cs, &rseq->rseq_cs, sizeof(*rseq_cs)))
- return -EFAULT;
-#endif
+ trace_rseq_update(t);
+}
- return 0;
+void __rseq_trace_ip_fixup(unsigned long ip, unsigned long start_ip,
+ unsigned long offset, unsigned long abort_ip)
+{
+ trace_rseq_ip_fixup(ip, start_ip, offset, abort_ip);
}
+#endif /* CONFIG_TRACEPOINTS */
-/*
- * If the rseq_cs field of 'struct rseq' contains a valid pointer to
- * user-space, copy 'struct rseq_cs' from user-space and validate its fields.
- */
-static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_RSEQ_STATS
+DEFINE_PER_CPU(struct rseq_stats, rseq_stats);
+
+static int rseq_stats_show(struct seq_file *m, void *p)
{
- struct rseq_cs __user *urseq_cs;
- u64 ptr;
- u32 __user *usig;
- u32 sig;
- int ret;
-
- ret = rseq_get_rseq_cs_ptr_val(t->rseq, &ptr);
- if (ret)
- return ret;
-
- /* If the rseq_cs pointer is NULL, return a cleared struct rseq_cs. */
- if (!ptr) {
- memset(rseq_cs, 0, sizeof(*rseq_cs));
- return 0;
+ struct rseq_stats stats = { };
+ unsigned int cpu;
+
+ for_each_possible_cpu(cpu) {
+ stats.exit += data_race(per_cpu(rseq_stats.exit, cpu));
+ stats.signal += data_race(per_cpu(rseq_stats.signal, cpu));
+ stats.slowpath += data_race(per_cpu(rseq_stats.slowpath, cpu));
+ stats.fastpath += data_race(per_cpu(rseq_stats.fastpath, cpu));
+ stats.ids += data_race(per_cpu(rseq_stats.ids, cpu));
+ stats.cs += data_race(per_cpu(rseq_stats.cs, cpu));
+ stats.clear += data_race(per_cpu(rseq_stats.clear, cpu));
+ stats.fixup += data_race(per_cpu(rseq_stats.fixup, cpu));
}
- /* Check that the pointer value fits in the user-space process space. */
- if (ptr >= TASK_SIZE)
- return -EINVAL;
- urseq_cs = (struct rseq_cs __user *)(unsigned long)ptr;
- if (copy_from_user(rseq_cs, urseq_cs, sizeof(*rseq_cs)))
- return -EFAULT;
- if (rseq_cs->start_ip >= TASK_SIZE ||
- rseq_cs->start_ip + rseq_cs->post_commit_offset >= TASK_SIZE ||
- rseq_cs->abort_ip >= TASK_SIZE ||
- rseq_cs->version > 0)
- return -EINVAL;
- /* Check for overflow. */
- if (rseq_cs->start_ip + rseq_cs->post_commit_offset < rseq_cs->start_ip)
- return -EINVAL;
- /* Ensure that abort_ip is not in the critical section. */
- if (rseq_cs->abort_ip - rseq_cs->start_ip < rseq_cs->post_commit_offset)
- return -EINVAL;
+ seq_printf(m, "exit: %16lu\n", stats.exit);
+ seq_printf(m, "signal: %16lu\n", stats.signal);
+ seq_printf(m, "slowp: %16lu\n", stats.slowpath);
+ seq_printf(m, "fastp: %16lu\n", stats.fastpath);
+ seq_printf(m, "ids: %16lu\n", stats.ids);
+ seq_printf(m, "cs: %16lu\n", stats.cs);
+ seq_printf(m, "clear: %16lu\n", stats.clear);
+ seq_printf(m, "fixup: %16lu\n", stats.fixup);
+ return 0;
+}
- usig = (u32 __user *)(unsigned long)(rseq_cs->abort_ip - sizeof(u32));
- ret = get_user(sig, usig);
- if (ret)
- return ret;
+static int rseq_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, rseq_stats_show, inode->i_private);
+}
- if (current->rseq_sig != sig) {
- printk_ratelimited(KERN_WARNING
- "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n",
- sig, current->rseq_sig, current->pid, usig);
- return -EINVAL;
- }
+static const struct file_operations stat_ops = {
+ .open = rseq_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init rseq_stats_init(struct dentry *root_dir)
+{
+ debugfs_create_file("stats", 0444, root_dir, NULL, &stat_ops);
return 0;
}
+#else
+static inline void rseq_stats_init(struct dentry *root_dir) { }
+#endif /* CONFIG_RSEQ_STATS */
-static bool rseq_warn_flags(const char *str, u32 flags)
+static int rseq_debug_show(struct seq_file *m, void *p)
{
- u32 test_flags;
-
- if (!flags)
- return false;
- test_flags = flags & RSEQ_CS_NO_RESTART_FLAGS;
- if (test_flags)
- pr_warn_once("Deprecated flags (%u) in %s ABI structure", test_flags, str);
- test_flags = flags & ~RSEQ_CS_NO_RESTART_FLAGS;
- if (test_flags)
- pr_warn_once("Unknown flags (%u) in %s ABI structure", test_flags, str);
- return true;
+ bool on = static_branch_unlikely(&rseq_debug_enabled);
+
+ seq_printf(m, "%d\n", on);
+ return 0;
}
-static int rseq_need_restart(struct task_struct *t, u32 cs_flags)
+static ssize_t rseq_debug_write(struct file *file, const char __user *ubuf,
+ size_t count, loff_t *ppos)
{
- u32 flags, event_mask;
- int ret;
+ bool on;
- if (rseq_warn_flags("rseq_cs", cs_flags))
+ if (kstrtobool_from_user(ubuf, count, &on))
return -EINVAL;
- /* Get thread flags. */
- ret = get_user(flags, &t->rseq->flags);
- if (ret)
- return ret;
+ rseq_control_debug(on);
+ return count;
+}
- if (rseq_warn_flags("rseq", flags))
- return -EINVAL;
+static int rseq_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, rseq_debug_show, inode->i_private);
+}
- /*
- * Load and clear event mask atomically with respect to
- * scheduler preemption and membarrier IPIs.
- */
- scoped_guard(RSEQ_EVENT_GUARD) {
- event_mask = t->rseq_event_mask;
- t->rseq_event_mask = 0;
- }
+static const struct file_operations debug_ops = {
+ .open = rseq_debug_open,
+ .read = seq_read,
+ .write = rseq_debug_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int __init rseq_debugfs_init(void)
+{
+ struct dentry *root_dir = debugfs_create_dir("rseq", NULL);
- return !!event_mask;
+ debugfs_create_file("debug", 0644, root_dir, NULL, &debug_ops);
+ rseq_stats_init(root_dir);
+ return 0;
}
+__initcall(rseq_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
-static int clear_rseq_cs(struct rseq __user *rseq)
+static bool rseq_set_ids(struct task_struct *t, struct rseq_ids *ids, u32 node_id)
{
- /*
- * The rseq_cs field is set to NULL on preemption or signal
- * delivery on top of rseq assembly block, as well as on top
- * of code outside of the rseq assembly block. This performs
- * a lazy clear of the rseq_cs field.
- *
- * Set rseq_cs to NULL.
- */
-#ifdef CONFIG_64BIT
- return put_user(0UL, &rseq->rseq_cs);
-#else
- if (clear_user(&rseq->rseq_cs, sizeof(rseq->rseq_cs)))
- return -EFAULT;
- return 0;
-#endif
+ return rseq_set_ids_get_csaddr(t, ids, node_id, NULL);
}
-/*
- * Unsigned comparison will be true when ip >= start_ip, and when
- * ip < start_ip + post_commit_offset.
- */
-static bool in_rseq_cs(unsigned long ip, struct rseq_cs *rseq_cs)
+static bool rseq_handle_cs(struct task_struct *t, struct pt_regs *regs)
{
- return ip - rseq_cs->start_ip < rseq_cs->post_commit_offset;
+ struct rseq __user *urseq = t->rseq.usrptr;
+ u64 csaddr;
+
+ scoped_user_read_access(urseq, efault)
+ unsafe_get_user(csaddr, &urseq->rseq_cs, efault);
+ if (likely(!csaddr))
+ return true;
+ return rseq_update_user_cs(t, regs, csaddr);
+efault:
+ return false;
}
-static int rseq_ip_fixup(struct pt_regs *regs)
+static void rseq_slowpath_update_usr(struct pt_regs *regs)
{
- unsigned long ip = instruction_pointer(regs);
+ /*
+ * Preserve rseq state and user_irq state. The generic entry code
+ * clears user_irq on the way out, the non-generic entry
+ * architectures are not having user_irq.
+ */
+ const struct rseq_event evt_mask = { .has_rseq = true, .user_irq = true, };
struct task_struct *t = current;
- struct rseq_cs rseq_cs;
- int ret;
+ struct rseq_ids ids;
+ u32 node_id;
+ bool event;
+
+ if (unlikely(t->flags & PF_EXITING))
+ return;
- ret = rseq_get_rseq_cs(t, &rseq_cs);
- if (ret)
- return ret;
+ rseq_stat_inc(rseq_stats.slowpath);
/*
- * Handle potentially not being within a critical section.
- * If not nested over a rseq critical section, restart is useless.
- * Clear the rseq_cs pointer and return.
+ * Read and clear the event pending bit first. If the task
+ * was not preempted or migrated or a signal is on the way,
+ * there is no point in doing any of the heavy lifting here
+ * on production kernels. In that case TIF_NOTIFY_RESUME
+ * was raised by some other functionality.
+ *
+ * This is correct because the read/clear operation is
+ * guarded against scheduler preemption, which makes it CPU
+ * local atomic. If the task is preempted right after
+ * re-enabling preemption then TIF_NOTIFY_RESUME is set
+ * again and this function is invoked another time _before_
+ * the task is able to return to user mode.
+ *
+ * On a debug kernel, invoke the fixup code unconditionally
+ * with the result handed in to allow the detection of
+ * inconsistencies.
*/
- if (!in_rseq_cs(ip, &rseq_cs))
- return clear_rseq_cs(t->rseq);
- ret = rseq_need_restart(t, rseq_cs.flags);
- if (ret <= 0)
- return ret;
- ret = clear_rseq_cs(t->rseq);
- if (ret)
- return ret;
- trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset,
- rseq_cs.abort_ip);
- instruction_pointer_set(regs, (unsigned long)rseq_cs.abort_ip);
- return 0;
+ scoped_guard(irq) {
+ event = t->rseq.event.sched_switch;
+ t->rseq.event.all &= evt_mask.all;
+ ids.cpu_id = task_cpu(t);
+ ids.mm_cid = task_mm_cid(t);
+ }
+
+ if (!event)
+ return;
+
+ node_id = cpu_to_node(ids.cpu_id);
+
+ if (unlikely(!rseq_update_usr(t, regs, &ids, node_id))) {
+ /*
+ * Clear the errors just in case this might survive magically, but
+ * leave the rest intact.
+ */
+ t->rseq.event.error = 0;
+ force_sig(SIGSEGV);
+ }
}
-/*
- * This resume handler must always be executed between any of:
- * - preemption,
- * - signal delivery,
- * and return to user-space.
- *
- * This is how we can ensure that the entire rseq critical section
- * will issue the commit instruction only if executed atomically with
- * respect to other threads scheduled on the same CPU, and with respect
- * to signal handlers.
- */
-void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
+void __rseq_handle_slowpath(struct pt_regs *regs)
{
- struct task_struct *t = current;
- int ret, sig;
-
- if (unlikely(t->flags & PF_EXITING))
+ /*
+ * If invoked from hypervisors before entering the guest via
+ * resume_user_mode_work(), then @regs is a NULL pointer.
+ *
+ * resume_user_mode_work() clears TIF_NOTIFY_RESUME and re-raises
+ * it before returning from the ioctl() to user space when
+ * rseq_event.sched_switch is set.
+ *
+ * So it's safe to ignore here instead of pointlessly updating it
+ * in the vcpu_run() loop.
+ */
+ if (!regs)
return;
+ rseq_slowpath_update_usr(regs);
+}
+
+void __rseq_signal_deliver(int sig, struct pt_regs *regs)
+{
+ rseq_stat_inc(rseq_stats.signal);
/*
- * regs is NULL if and only if the caller is in a syscall path. Skip
- * fixup and leave rseq_cs as is so that rseq_sycall() will detect and
- * kill a misbehaving userspace on debug kernels.
+ * Don't update IDs, they are handled on exit to user if
+ * necessary. The important thing is to abort a critical section of
+ * the interrupted context as after this point the instruction
+ * pointer in @regs points to the signal handler.
*/
- if (regs) {
- ret = rseq_ip_fixup(regs);
- if (unlikely(ret < 0))
- goto error;
+ if (unlikely(!rseq_handle_cs(current, regs))) {
+ /*
+ * Clear the errors just in case this might survive
+ * magically, but leave the rest intact.
+ */
+ current->rseq.event.error = 0;
+ force_sigsegv(sig);
}
- if (unlikely(rseq_update_cpu_node_id(t)))
- goto error;
- return;
-
-error:
- sig = ksig ? ksig->sig : 0;
- force_sigsegv(sig);
}
-#ifdef CONFIG_DEBUG_RSEQ
-
/*
* Terminate the process if a syscall is issued within a restartable
* sequence.
*/
-void rseq_syscall(struct pt_regs *regs)
+void __rseq_debug_syscall_return(struct pt_regs *regs)
{
- unsigned long ip = instruction_pointer(regs);
struct task_struct *t = current;
- struct rseq_cs rseq_cs;
+ u64 csaddr;
- if (!t->rseq)
+ if (!t->rseq.event.has_rseq)
return;
- if (rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
- force_sig(SIGSEGV);
+ if (get_user(csaddr, &t->rseq.usrptr->rseq_cs))
+ goto fail;
+ if (likely(!csaddr))
+ return;
+ if (unlikely(csaddr >= TASK_SIZE))
+ goto fail;
+ if (rseq_debug_update_user_cs(t, regs, csaddr))
+ return;
+fail:
+ force_sig(SIGSEGV);
}
+#ifdef CONFIG_DEBUG_RSEQ
+/* Kept around to keep GENERIC_ENTRY=n architectures supported. */
+void rseq_syscall(struct pt_regs *regs)
+{
+ __rseq_debug_syscall_return(regs);
+}
#endif
+static bool rseq_reset_ids(void)
+{
+ struct rseq_ids ids = {
+ .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
+ .mm_cid = 0,
+ };
+
+ /*
+ * If this fails, terminate it because this leaves the kernel in
+ * stupid state as exit to user space will try to fixup the ids
+ * again.
+ */
+ if (rseq_set_ids(current, &ids, 0))
+ return true;
+
+ force_sig(SIGSEGV);
+ return false;
+}
+
+/* The original rseq structure size (including padding) is 32 bytes. */
+#define ORIG_RSEQ_SIZE 32
+
/*
* sys_rseq - setup restartable sequences for caller thread.
*/
-SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
- int, flags, u32, sig)
+SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32, sig)
{
- int ret;
- u64 rseq_cs;
-
if (flags & RSEQ_FLAG_UNREGISTER) {
if (flags & ~RSEQ_FLAG_UNREGISTER)
return -EINVAL;
/* Unregister rseq for current thread. */
- if (current->rseq != rseq || !current->rseq)
+ if (current->rseq.usrptr != rseq || !current->rseq.usrptr)
return -EINVAL;
- if (rseq_len != current->rseq_len)
+ if (rseq_len != current->rseq.len)
return -EINVAL;
- if (current->rseq_sig != sig)
+ if (current->rseq.sig != sig)
return -EPERM;
- ret = rseq_reset_rseq_cpu_node_id(current);
- if (ret)
- return ret;
- current->rseq = NULL;
- current->rseq_sig = 0;
- current->rseq_len = 0;
+ if (!rseq_reset_ids())
+ return -EFAULT;
+ rseq_reset(current);
return 0;
}
if (unlikely(flags))
return -EINVAL;
- if (current->rseq) {
+ if (current->rseq.usrptr) {
/*
* If rseq is already registered, check whether
* the provided address differs from the prior
* one.
*/
- if (current->rseq != rseq || rseq_len != current->rseq_len)
+ if (current->rseq.usrptr != rseq || rseq_len != current->rseq.len)
return -EINVAL;
- if (current->rseq_sig != sig)
+ if (current->rseq.sig != sig)
return -EPERM;
/* Already registered. */
return -EBUSY;
@@ -531,43 +440,39 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
if (!access_ok(rseq, rseq_len))
return -EFAULT;
- /*
- * If the rseq_cs pointer is non-NULL on registration, clear it to
- * avoid a potential segfault on return to user-space. The proper thing
- * to do would have been to fail the registration but this would break
- * older libcs that reuse the rseq area for new threads without
- * clearing the fields.
- */
- if (rseq_get_rseq_cs_ptr_val(rseq, &rseq_cs))
- return -EFAULT;
- if (rseq_cs && clear_rseq_cs(rseq))
- return -EFAULT;
+ scoped_user_write_access(rseq, efault) {
+ /*
+ * If the rseq_cs pointer is non-NULL on registration, clear it to
+ * avoid a potential segfault on return to user-space. The proper thing
+ * to do would have been to fail the registration but this would break
+ * older libcs that reuse the rseq area for new threads without
+ * clearing the fields. Don't bother reading it, just reset it.
+ */
+ unsafe_put_user(0UL, &rseq->rseq_cs, efault);
+ /* Initialize IDs in user space */
+ unsafe_put_user(RSEQ_CPU_ID_UNINITIALIZED, &rseq->cpu_id_start, efault);
+ unsafe_put_user(RSEQ_CPU_ID_UNINITIALIZED, &rseq->cpu_id, efault);
+ unsafe_put_user(0U, &rseq->node_id, efault);
+ unsafe_put_user(0U, &rseq->mm_cid, efault);
+ }
-#ifdef CONFIG_DEBUG_RSEQ
- /*
- * Initialize the in-kernel rseq fields copy for validation of
- * read-only fields.
- */
- if (get_user(rseq_kernel_fields(current)->cpu_id_start, &rseq->cpu_id_start) ||
- get_user(rseq_kernel_fields(current)->cpu_id, &rseq->cpu_id) ||
- get_user(rseq_kernel_fields(current)->node_id, &rseq->node_id) ||
- get_user(rseq_kernel_fields(current)->mm_cid, &rseq->mm_cid))
- return -EFAULT;
-#endif
/*
* Activate the registration by setting the rseq area address, length
* and signature in the task struct.
*/
- current->rseq = rseq;
- current->rseq_len = rseq_len;
- current->rseq_sig = sig;
+ current->rseq.usrptr = rseq;
+ current->rseq.len = rseq_len;
+ current->rseq.sig = sig;
/*
* If rseq was previously inactive, and has just been
* registered, ensure the cpu_id_start and cpu_id fields
* are updated before returning to user-space.
*/
- rseq_set_notify_resume(current);
-
+ current->rseq.event.has_rseq = true;
+ rseq_force_update();
return 0;
+
+efault:
+ return -EFAULT;
}