summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/binfmt_elf.c3
-rw-r--r--include/linux/rseq.h12
-rw-r--r--include/uapi/linux/rseq.h26
-rw-r--r--kernel/rseq.c3
4 files changed, 38 insertions, 6 deletions
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 8e89cc5b2820..fb857faaf0d6 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -47,6 +47,7 @@
#include <linux/dax.h>
#include <linux/uaccess.h>
#include <uapi/linux/rseq.h>
+#include <linux/rseq.h>
#include <asm/param.h>
#include <asm/page.h>
@@ -286,7 +287,7 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec,
}
#ifdef CONFIG_RSEQ
NEW_AUX_ENT(AT_RSEQ_FEATURE_SIZE, offsetof(struct rseq, end));
- NEW_AUX_ENT(AT_RSEQ_ALIGN, __alignof__(struct rseq));
+ NEW_AUX_ENT(AT_RSEQ_ALIGN, rseq_alloc_align());
#endif
#undef NEW_AUX_ENT
/* AT_NULL is zero; clear the rest too */
diff --git a/include/linux/rseq.h b/include/linux/rseq.h
index 7a01a0760405..b9d62fc2140d 100644
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -146,6 +146,18 @@ static inline void rseq_fork(struct task_struct *t, u64 clone_flags)
t->rseq = current->rseq;
}
+/*
+ * Value returned by getauxval(AT_RSEQ_ALIGN) and expected by rseq
+ * registration. This is the active rseq area size rounded up to next
+ * power of 2, which guarantees that the rseq structure will always be
+ * aligned on the nearest power of two large enough to contain it, even
+ * as it grows.
+ */
+static inline unsigned int rseq_alloc_align(void)
+{
+ return 1U << get_count_order(offsetof(struct rseq, end));
+}
+
#else /* CONFIG_RSEQ */
static inline void rseq_handle_slowpath(struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index 863c4a00a66b..f69344fe6c08 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -87,10 +87,17 @@ struct rseq_slice_ctrl {
};
/*
- * struct rseq is aligned on 4 * 8 bytes to ensure it is always
- * contained within a single cache-line.
+ * The original size and alignment of the allocation for struct rseq is
+ * 32 bytes.
*
- * A single struct rseq per thread is allowed.
+ * The allocation size needs to be greater or equal to
+ * max(getauxval(AT_RSEQ_FEATURE_SIZE), 32), and the allocation needs to
+ * be aligned on max(getauxval(AT_RSEQ_ALIGN), 32).
+ *
+ * As an alternative, userspace is allowed to use both the original size
+ * and alignment of 32 bytes for backward compatibility.
+ *
+ * A single active struct rseq registration per thread is allowed.
*/
struct rseq {
/*
@@ -181,9 +188,20 @@ struct rseq {
struct rseq_slice_ctrl slice_ctrl;
/*
+ * Before rseq became extensible, its original size was 32 bytes even
+ * though the active rseq area was only 20 bytes.
+ * Exposing a 32 bytes feature size would make life needlessly painful
+ * for userspace. Therefore, add a reserved byte after byte 32
+ * to bump the rseq feature size from 32 to 33.
+ * The next field to be added to the rseq area will be larger
+ * than one byte, and will replace this reserved byte.
+ */
+ __u8 __reserved;
+
+ /*
* Flexible array member at end of structure, after last feature field.
*/
char end[];
-} __attribute__((aligned(4 * sizeof(__u64))));
+} __attribute__((aligned(32)));
#endif /* _UAPI_LINUX_RSEQ_H */
diff --git a/kernel/rseq.c b/kernel/rseq.c
index e349f86cc945..38d3ef540760 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -80,6 +80,7 @@
#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <linux/types.h>
+#include <linux/rseq.h>
#include <asm/ptrace.h>
#define CREATE_TRACE_POINTS
@@ -456,7 +457,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32
*/
if (rseq_len < ORIG_RSEQ_SIZE ||
(rseq_len == ORIG_RSEQ_SIZE && !IS_ALIGNED((unsigned long)rseq, ORIG_RSEQ_SIZE)) ||
- (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
+ (rseq_len != ORIG_RSEQ_SIZE && (!IS_ALIGNED((unsigned long)rseq, rseq_alloc_align()) ||
rseq_len < offsetof(struct rseq, end))))
return -EINVAL;
if (!access_ok(rseq, rseq_len))