From f955aa8723a65759e920d4de8e5d076cef412afc Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Thu, 20 Mar 2025 10:29:21 -0700 Subject: riscv: entry: Convert ret_from_fork() to C Move the main section of ret_from_fork() to C to allow inlining of syscall_exit_to_user_mode(). Signed-off-by: Charlie Jenkins Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/all/20250320-riscv_optimize_entry-v6-1-63e187e26041@rivosinc.com --- arch/riscv/include/asm/asm-prototypes.h | 1 + arch/riscv/kernel/entry.S | 15 ++++++--------- arch/riscv/kernel/process.c | 14 ++++++++++++-- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index cd627ec289f1..733ff6097787 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -52,6 +52,7 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s); DECLARE_DO_ERROR_INFO(do_trap_ecall_m); DECLARE_DO_ERROR_INFO(do_trap_break); +asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs); asmlinkage void handle_bad_stack(struct pt_regs *regs); asmlinkage void do_page_fault(struct pt_regs *regs); asmlinkage void do_irq(struct pt_regs *regs); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 33a5a9f2a0d4..b2dc5e7c7b3a 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -319,17 +319,14 @@ SYM_CODE_END(handle_kernel_stack_overflow) ASM_NOKPROBE(handle_kernel_stack_overflow) #endif -SYM_CODE_START(ret_from_fork) +SYM_CODE_START(ret_from_fork_asm) call schedule_tail - beqz s0, 1f /* not from kernel thread */ - /* Call fn(arg) */ - move a0, s1 - jalr s0 -1: - move a0, sp /* pt_regs */ - call syscall_exit_to_user_mode + move a0, s1 /* fn_arg */ + move a1, s0 /* fn */ + move a2, sp /* pt_regs */ + call ret_from_fork j ret_from_exception -SYM_CODE_END(ret_from_fork) +SYM_CODE_END(ret_from_fork_asm) #ifdef CONFIG_IRQ_STACKS /* diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 7c244de77180..7b0a0bfe29ae 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -17,7 +17,9 @@ #include #include #include +#include +#include #include #include #include @@ -36,7 +38,7 @@ unsigned long __stack_chk_guard __read_mostly; EXPORT_SYMBOL(__stack_chk_guard); #endif -extern asmlinkage void ret_from_fork(void); +extern asmlinkage void ret_from_fork_asm(void); void noinstr arch_cpu_idle(void) { @@ -206,6 +208,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } +asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs) +{ + if (unlikely(fn)) + fn(fn_arg); + + syscall_exit_to_user_mode(regs); +} + int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; @@ -242,7 +252,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.riscv_v_flags = 0; if (has_vector() || has_xtheadvector()) riscv_v_thread_alloc(p); - p->thread.ra = (unsigned long)ret_from_fork; + p->thread.ra = (unsigned long)ret_from_fork_asm; p->thread.sp = (unsigned long)childregs; /* kernel sp */ return 0; } -- cgit v1.3 From 5b3d6103b343d59e19bd641e4c31df519f4d250d Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Thu, 20 Mar 2025 10:29:22 -0700 Subject: riscv: entry: Split ret_from_fork() into user and kernel This function was unified into a single function in commit ab9164dae273 ("riscv: entry: Consolidate ret_from_kernel_thread into ret_from_fork"). However that imposed a performance degradation. Partially reverting this commit to have ret_from_fork() split again, results in a 1% increase on the number of times fork is able to be called per second. Signed-off-by: Charlie Jenkins Signed-off-by: Thomas Gleixner Acked-by: Alexandre Ghiti Link: https://lore.kernel.org/all/20250320-riscv_optimize_entry-v6-2-63e187e26041@rivosinc.com --- arch/riscv/include/asm/asm-prototypes.h | 3 ++- arch/riscv/kernel/entry.S | 13 ++++++++++--- arch/riscv/kernel/process.c | 17 +++++++++++------ 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index 733ff6097787..bfc8ea5f9319 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -52,7 +52,8 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s); DECLARE_DO_ERROR_INFO(do_trap_ecall_m); DECLARE_DO_ERROR_INFO(do_trap_break); -asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs); +asmlinkage void ret_from_fork_kernel(void *fn_arg, int (*fn)(void *), struct pt_regs *regs); +asmlinkage void ret_from_fork_user(struct pt_regs *regs); asmlinkage void handle_bad_stack(struct pt_regs *regs); asmlinkage void do_page_fault(struct pt_regs *regs); asmlinkage void do_irq(struct pt_regs *regs); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index b2dc5e7c7b3a..0fb338000c6d 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -319,14 +319,21 @@ SYM_CODE_END(handle_kernel_stack_overflow) ASM_NOKPROBE(handle_kernel_stack_overflow) #endif -SYM_CODE_START(ret_from_fork_asm) +SYM_CODE_START(ret_from_fork_kernel_asm) call schedule_tail move a0, s1 /* fn_arg */ move a1, s0 /* fn */ move a2, sp /* pt_regs */ - call ret_from_fork + call ret_from_fork_kernel j ret_from_exception -SYM_CODE_END(ret_from_fork_asm) +SYM_CODE_END(ret_from_fork_kernel_asm) + +SYM_CODE_START(ret_from_fork_user_asm) + call schedule_tail + move a0, sp /* pt_regs */ + call ret_from_fork_user + j ret_from_exception +SYM_CODE_END(ret_from_fork_user_asm) #ifdef CONFIG_IRQ_STACKS /* diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 7b0a0bfe29ae..485ec7a80a56 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -38,7 +38,8 @@ unsigned long __stack_chk_guard __read_mostly; EXPORT_SYMBOL(__stack_chk_guard); #endif -extern asmlinkage void ret_from_fork_asm(void); +extern asmlinkage void ret_from_fork_kernel_asm(void); +extern asmlinkage void ret_from_fork_user_asm(void); void noinstr arch_cpu_idle(void) { @@ -208,14 +209,18 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } -asmlinkage void ret_from_fork(void *fn_arg, int (*fn)(void *), struct pt_regs *regs) +asmlinkage void ret_from_fork_kernel(void *fn_arg, int (*fn)(void *), struct pt_regs *regs) { - if (unlikely(fn)) - fn(fn_arg); + fn(fn_arg); syscall_exit_to_user_mode(regs); } +asmlinkage void ret_from_fork_user(struct pt_regs *regs) +{ + syscall_exit_to_user_mode(regs); +} + int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { unsigned long clone_flags = args->flags; @@ -238,6 +243,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.s[0] = (unsigned long)args->fn; p->thread.s[1] = (unsigned long)args->fn_arg; + p->thread.ra = (unsigned long)ret_from_fork_kernel_asm; } else { *childregs = *(current_pt_regs()); /* Turn off status.VS */ @@ -247,12 +253,11 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (clone_flags & CLONE_SETTLS) childregs->tp = tls; childregs->a0 = 0; /* Return value of fork() */ - p->thread.s[0] = 0; + p->thread.ra = (unsigned long)ret_from_fork_user_asm; } p->thread.riscv_v_flags = 0; if (has_vector() || has_xtheadvector()) riscv_v_thread_alloc(p); - p->thread.ra = (unsigned long)ret_from_fork_asm; p->thread.sp = (unsigned long)childregs; /* kernel sp */ return 0; } -- cgit v1.3 From 7ace1602abf21da505993d77ccbae1df2496b324 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Thu, 20 Mar 2025 10:29:23 -0700 Subject: LoongArch: entry: Migrate ret_from_fork() to C LoongArch is the only architecture that calls syscall_exit_to_user_mode() from assembly. Move the call into C so that this function can be inlined across all architectures. Signed-off-by: Charlie Jenkins Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250320-riscv_optimize_entry-v6-3-63e187e26041@rivosinc.com --- arch/loongarch/include/asm/asm-prototypes.h | 8 +++++++ arch/loongarch/kernel/entry.S | 22 +++++++++---------- arch/loongarch/kernel/process.c | 33 +++++++++++++++++++++++------ 3 files changed, 45 insertions(+), 18 deletions(-) diff --git a/arch/loongarch/include/asm/asm-prototypes.h b/arch/loongarch/include/asm/asm-prototypes.h index 51f224bcfc65..704066b4f736 100644 --- a/arch/loongarch/include/asm/asm-prototypes.h +++ b/arch/loongarch/include/asm/asm-prototypes.h @@ -12,3 +12,11 @@ __int128_t __ashlti3(__int128_t a, int b); __int128_t __ashrti3(__int128_t a, int b); __int128_t __lshrti3(__int128_t a, int b); #endif + +asmlinkage void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev, + struct pt_regs *regs); + +asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev, + struct pt_regs *regs, + int (*fn)(void *), + void *fn_arg); diff --git a/arch/loongarch/kernel/entry.S b/arch/loongarch/kernel/entry.S index 48e7e34e355e..2abc29e57381 100644 --- a/arch/loongarch/kernel/entry.S +++ b/arch/loongarch/kernel/entry.S @@ -77,24 +77,22 @@ SYM_CODE_START(handle_syscall) SYM_CODE_END(handle_syscall) _ASM_NOKPROBE(handle_syscall) -SYM_CODE_START(ret_from_fork) +SYM_CODE_START(ret_from_fork_asm) UNWIND_HINT_REGS - bl schedule_tail # a0 = struct task_struct *prev - move a0, sp - bl syscall_exit_to_user_mode + move a1, sp + bl ret_from_fork RESTORE_STATIC RESTORE_SOME RESTORE_SP_AND_RET -SYM_CODE_END(ret_from_fork) +SYM_CODE_END(ret_from_fork_asm) -SYM_CODE_START(ret_from_kernel_thread) +SYM_CODE_START(ret_from_kernel_thread_asm) UNWIND_HINT_REGS - bl schedule_tail # a0 = struct task_struct *prev - move a0, s1 - jirl ra, s0, 0 - move a0, sp - bl syscall_exit_to_user_mode + move a1, sp + move a2, s0 + move a3, s1 + bl ret_from_kernel_thread RESTORE_STATIC RESTORE_SOME RESTORE_SP_AND_RET -SYM_CODE_END(ret_from_kernel_thread) +SYM_CODE_END(ret_from_kernel_thread_asm) diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 6e58f65455c7..98bc60d7c550 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ #include #include +#include #include #include #include @@ -47,6 +49,7 @@ #include #include #include +#include #include #include @@ -63,8 +66,9 @@ EXPORT_SYMBOL(__stack_chk_guard); unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); -asmlinkage void ret_from_fork(void); -asmlinkage void ret_from_kernel_thread(void); +asmlinkage void restore_and_ret(void); +asmlinkage void ret_from_fork_asm(void); +asmlinkage void ret_from_kernel_thread_asm(void); void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) { @@ -138,6 +142,23 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } +asmlinkage void noinstr __no_stack_protector ret_from_fork(struct task_struct *prev, + struct pt_regs *regs) +{ + schedule_tail(prev); + syscall_exit_to_user_mode(regs); +} + +asmlinkage void noinstr __no_stack_protector ret_from_kernel_thread(struct task_struct *prev, + struct pt_regs *regs, + int (*fn)(void *), + void *fn_arg) +{ + schedule_tail(prev); + fn(fn_arg); + syscall_exit_to_user_mode(regs); +} + /* * Copy architecture-specific thread state */ @@ -165,8 +186,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.reg03 = childksp; p->thread.reg23 = (unsigned long)args->fn; p->thread.reg24 = (unsigned long)args->fn_arg; - p->thread.reg01 = (unsigned long)ret_from_kernel_thread; - p->thread.sched_ra = (unsigned long)ret_from_kernel_thread; + p->thread.reg01 = (unsigned long)ret_from_kernel_thread_asm; + p->thread.sched_ra = (unsigned long)ret_from_kernel_thread_asm; memset(childregs, 0, sizeof(struct pt_regs)); childregs->csr_euen = p->thread.csr_euen; childregs->csr_crmd = p->thread.csr_crmd; @@ -182,8 +203,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) childregs->regs[3] = usp; p->thread.reg03 = (unsigned long) childregs; - p->thread.reg01 = (unsigned long) ret_from_fork; - p->thread.sched_ra = (unsigned long) ret_from_fork; + p->thread.reg01 = (unsigned long) ret_from_fork_asm; + p->thread.sched_ra = (unsigned long) ret_from_fork_asm; /* * New tasks lose permission to use the fpu. This accelerates context -- cgit v1.3 From e43b8bb56e537bfc8d9076793091e7679020fc9c Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Thu, 20 Mar 2025 10:29:24 -0700 Subject: entry: Inline syscall_exit_to_user_mode() Similar to commit 221a164035fd ("entry: Move syscall_enter_from_user_mode() to header file"), move syscall_exit_to_user_mode() to the header file as well. Testing was done with the byte-unixbench syscall benchmark (which calls getpid) and QEMU. On riscv I measured a 7.09246% improvement, on x86 a 2.98843% improvement, on loongarch a 6.07954% improvement, and on s390 a 11.1328% improvement. The Intel bot also reported "kernel test robot noticed a 1.9% improvement of stress-ng.seek.ops_per_sec". Signed-off-by: Charlie Jenkins Signed-off-by: Thomas Gleixner Reviewed-by: Alexandre Ghiti Link: https://lore.kernel.org/all/20250320-riscv_optimize_entry-v6-4-63e187e26041@rivosinc.com Link: https://lore.kernel.org/linux-riscv/202502051555.85ae6844-lkp@intel.com/ --- include/linux/entry-common.h | 43 ++++++++++++++++++++++++++++++++++++-- kernel/entry/common.c | 49 +------------------------------------------- 2 files changed, 42 insertions(+), 50 deletions(-) diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index fc61d0205c97..f94f3fdf15fc 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -14,6 +14,7 @@ #include #include +#include /* * Define dummy _TIF work flags if not defined by the architecture or for @@ -366,6 +367,15 @@ static __always_inline void exit_to_user_mode(void) lockdep_hardirqs_on(CALLER_ADDR0); } +/** + * syscall_exit_work - Handle work before returning to user mode + * @regs: Pointer to current pt_regs + * @work: Current thread syscall work + * + * Do one-time syscall specific work. + */ +void syscall_exit_work(struct pt_regs *regs, unsigned long work); + /** * syscall_exit_to_user_mode_work - Handle work before returning to user mode * @regs: Pointer to currents pt_regs @@ -379,7 +389,30 @@ static __always_inline void exit_to_user_mode(void) * make the final state transitions. Interrupts must stay disabled between * return from this function and the invocation of exit_to_user_mode(). */ -void syscall_exit_to_user_mode_work(struct pt_regs *regs); +static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) +{ + unsigned long work = READ_ONCE(current_thread_info()->syscall_work); + unsigned long nr = syscall_get_nr(current, regs); + + CT_WARN_ON(ct_state() != CT_STATE_KERNEL); + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) + local_irq_enable(); + } + + rseq_syscall(regs); + + /* + * Do one-time syscall specific work. If these work items are + * enabled, we want to run them exactly once per syscall exit with + * interrupts enabled. + */ + if (unlikely(work & SYSCALL_WORK_EXIT)) + syscall_exit_work(regs, work); + local_irq_disable_exit_to_user(); + exit_to_user_mode_prepare(regs); +} /** * syscall_exit_to_user_mode - Handle work before returning to user mode @@ -410,7 +443,13 @@ void syscall_exit_to_user_mode_work(struct pt_regs *regs); * exit_to_user_mode(). This function is preferred unless there is a * compelling architectural reason to use the separate functions. */ -void syscall_exit_to_user_mode(struct pt_regs *regs); +static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) +{ + instrumentation_begin(); + syscall_exit_to_user_mode_work(regs); + instrumentation_end(); + exit_to_user_mode(); +} /** * irqentry_enter_from_user_mode - Establish state before invoking the irq handler diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 20154572ede9..a8dd1f27417c 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -146,7 +146,7 @@ static inline bool report_single_step(unsigned long work) return work & SYSCALL_WORK_SYSCALL_EXIT_TRAP; } -static void syscall_exit_work(struct pt_regs *regs, unsigned long work) +void syscall_exit_work(struct pt_regs *regs, unsigned long work) { bool step; @@ -173,53 +173,6 @@ static void syscall_exit_work(struct pt_regs *regs, unsigned long work) ptrace_report_syscall_exit(regs, step); } -/* - * Syscall specific exit to user mode preparation. Runs with interrupts - * enabled. - */ -static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) -{ - unsigned long work = READ_ONCE(current_thread_info()->syscall_work); - unsigned long nr = syscall_get_nr(current, regs); - - CT_WARN_ON(ct_state() != CT_STATE_KERNEL); - - if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { - if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) - local_irq_enable(); - } - - rseq_syscall(regs); - - /* - * Do one-time syscall specific work. If these work items are - * enabled, we want to run them exactly once per syscall exit with - * interrupts enabled. - */ - if (unlikely(work & SYSCALL_WORK_EXIT)) - syscall_exit_work(regs, work); -} - -static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs) -{ - syscall_exit_to_user_mode_prepare(regs); - local_irq_disable_exit_to_user(); - exit_to_user_mode_prepare(regs); -} - -void syscall_exit_to_user_mode_work(struct pt_regs *regs) -{ - __syscall_exit_to_user_mode_work(regs); -} - -__visible noinstr void syscall_exit_to_user_mode(struct pt_regs *regs) -{ - instrumentation_begin(); - __syscall_exit_to_user_mode_work(regs); - instrumentation_end(); - exit_to_user_mode(); -} - noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs) { enter_from_user_mode(regs); -- cgit v1.3 From 8278fd6006a02e3352d5230927c4576f53fb3b06 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Wed, 7 May 2025 00:11:30 -0700 Subject: LoongArch: entry: Fix include order Reorder some introduced include headers to keep alphabetical order. Fixes: 7ace1602abf2 ("LoongArch: entry: Migrate ret_from_fork() to C") Signed-off-by: Charlie Jenkins Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250507-loongarch_include_order-v1-1-e8aada6a3da8@rivosinc.com --- arch/loongarch/kernel/process.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 98bc60d7c550..3582f591bab2 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -13,8 +13,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -34,8 +34,8 @@ #include #include -#include #include +#include #include #include #include -- cgit v1.3