From cbad0fb2d8d97fa6dd8089c0cc729ced0abacad6 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Jan 2023 13:45:56 +0000 Subject: ftrace: Add DYNAMIC_FTRACE_WITH_CALL_OPS Architectures without dynamic ftrace trampolines incur an overhead when multiple ftrace_ops are enabled with distinct filters. in these cases, each call site calls a common trampoline which uses ftrace_ops_list_func() to iterate over all enabled ftrace functions, and so incurs an overhead relative to the size of this list (including RCU protection overhead). Architectures with dynamic ftrace trampolines avoid this overhead for call sites which have a single associated ftrace_ops. In these cases, the dynamic trampoline is customized to branch directly to the relevant ftrace function, avoiding the list overhead. On some architectures it's impractical and/or undesirable to implement dynamic ftrace trampolines. For example, arm64 has limited branch ranges and cannot always directly branch from a call site to an arbitrary address (e.g. from a kernel text address to an arbitrary module address). Calls from modules to core kernel text can be indirected via PLTs (allocated at module load time) to address this, but the same is not possible from calls from core kernel text. Using an indirect branch from a call site to an arbitrary trampoline is possible, but requires several more instructions in the function prologue (or immediately before it), and/or comes with far more complex requirements for patching. Instead, this patch adds a new option, where an architecture can associate each call site with a pointer to an ftrace_ops, placed at a fixed offset from the call site. A shared trampoline can recover this pointer and call ftrace_ops::func() without needing to go via ftrace_ops_list_func(), avoiding the associated overhead. This avoids issues with branch range limitations, and avoids the need to allocate and manipulate dynamic trampolines, making it far simpler to implement and maintain, while having similar performance characteristics. Note that this allows for dynamic ftrace_ops to be invoked directly from an architecture's ftrace_caller trampoline, whereas existing code forces the use of ftrace_ops_get_list_func(), which is in part necessary to permit the ftrace_ops to be freed once unregistered *and* to avoid branch/address-generation range limitation on some architectures (e.g. where ops->func is a module address, and may be outside of the direct branch range for callsites within the main kernel image). The CALL_OPS approach avoids this problems and is safe as: * The existing synchronization in ftrace_shutdown() using ftrace_shutdown() using synchronize_rcu_tasks_rude() (and synchronize_rcu_tasks()) ensures that no tasks hold a stale reference to an ftrace_ops (e.g. in the middle of the ftrace_caller trampoline, or while invoking ftrace_ops::func), when that ftrace_ops is unregistered. Arguably this could also be relied upon for the existing scheme, permitting dynamic ftrace_ops to be invoked directly when ops->func is in range, but this will require additional logic to handle branch range limitations, and is not handled by this patch. * Each callsite's ftrace_ops pointer literal can hold any valid kernel address, and is updated atomically. As an architecture's ftrace_caller trampoline will atomically load the ops pointer then dereference ops->func, there is no risk of invoking ops->func with a mismatches ops pointer, and updates to the ops pointer do not require special care. A subsequent patch will implement architectures support for arm64. There should be no functional change as a result of this patch alone. Signed-off-by: Mark Rutland Reviewed-by: Steven Rostedt (Google) Cc: Florent Revest Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Will Deacon Link: https://lore.kernel.org/r/20230123134603.1064407-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- include/linux/ftrace.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 99f1146614c0..366c730beaa3 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -39,6 +39,7 @@ static inline void ftrace_boot_snapshot(void) { } struct ftrace_ops; struct ftrace_regs; +struct dyn_ftrace; #ifdef CONFIG_FUNCTION_TRACER /* @@ -57,6 +58,9 @@ void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); void arch_ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs); #endif +extern const struct ftrace_ops ftrace_nop_ops; +extern const struct ftrace_ops ftrace_list_ops; +struct ftrace_ops *ftrace_find_unique_ops(struct dyn_ftrace *rec); #endif /* CONFIG_FUNCTION_TRACER */ /* Main tracing buffer and events set up */ @@ -391,8 +395,6 @@ struct ftrace_func_entry { unsigned long direct; /* for direct lookup only */ }; -struct dyn_ftrace; - #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS extern int ftrace_direct_func_count; int register_ftrace_direct(unsigned long ip, unsigned long addr); @@ -563,6 +565,8 @@ bool is_ftrace_trampoline(unsigned long addr); * IPMODIFY - the record allows for the IP address to be changed. * DISABLED - the record is not ready to be touched yet * DIRECT - there is a direct function to call + * CALL_OPS - the record can use callsite-specific ops + * CALL_OPS_EN - the function is set up to use callsite-specific ops * * When a new ftrace_ops is registered and wants a function to save * pt_regs, the rec->flags REGS is set. When the function has been @@ -580,9 +584,11 @@ enum { FTRACE_FL_DISABLED = (1UL << 25), FTRACE_FL_DIRECT = (1UL << 24), FTRACE_FL_DIRECT_EN = (1UL << 23), + FTRACE_FL_CALL_OPS = (1UL << 22), + FTRACE_FL_CALL_OPS_EN = (1UL << 21), }; -#define FTRACE_REF_MAX_SHIFT 23 +#define FTRACE_REF_MAX_SHIFT 21 #define FTRACE_REF_MAX ((1UL << FTRACE_REF_MAX_SHIFT) - 1) #define ftrace_rec_count(rec) ((rec)->flags & FTRACE_REF_MAX) @@ -820,7 +826,8 @@ static inline int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) */ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#if defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS) || \ + defined(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS) /** * ftrace_modify_call - convert from one addr to another (no nop) * @rec: the call site record (e.g. mcount/fentry) @@ -833,6 +840,9 @@ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); * what we expect it to be, and then on success of the compare, * it should write to the location. * + * When using call ops, this is called when the associated ops change, even + * when (addr == old_addr). + * * The code segment at @rec->ip should be a caller to @old_addr * * Return must be: -- cgit v1.2.3 From c27cd083cfb9d392f304657ed00fcde1136704e7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Jan 2023 13:45:57 +0000 Subject: Compiler attributes: GCC cold function alignment workarounds Contemporary versions of GCC (e.g. GCC 12.2.0) drop the alignment specified by '-falign-functions=N' for functions marked with the __cold__ attribute, and potentially for callees of __cold__ functions as these may be implicitly marked as __cold__ by the compiler. LLVM appears to respect '-falign-functions=N' in such cases. This has been reported to GCC in bug 88345: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345 ... which also covers alignment being dropped when '-Os' is used, which will be addressed in a separate patch. Currently, use of '-falign-functions=N' is limited to CONFIG_FUNCTION_ALIGNMENT, which is largely used for performance and/or analysis reasons (e.g. with CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B), but isn't necessary for correct functionality. However, this dropped alignment isn't great for the performance and/or analysis cases. Subsequent patches will use CONFIG_FUNCTION_ALIGNMENT as part of arm64's ftrace implementation, which will require all instrumented functions to be aligned to at least 8-bytes. This patch works around the dropped alignment by avoiding the use of the __cold__ attribute when CONFIG_FUNCTION_ALIGNMENT is non-zero, and by specifically aligning abort(), which GCC implicitly marks as __cold__. As the __cold macro is now dependent upon config options (which is against the policy described at the top of compiler_attributes.h), it is moved into compiler_types.h. I've tested this by building and booting a kernel configured with defconfig + CONFIG_EXPERT=y + CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B=y, and looking for misaligned text symbols in /proc/kallsyms: * arm64: Before: # uname -rm 6.2.0-rc3 aarch64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 5009 After: # uname -rm 6.2.0-rc3-00001-g2a2bedf8bfa9 aarch64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 919 * x86_64: Before: # uname -rm 6.2.0-rc3 x86_64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 11537 After: # uname -rm 6.2.0-rc3-00001-g2a2bedf8bfa9 x86_64 # grep ' [Tt] ' /proc/kallsyms | grep -iv '[048c]0 [Tt] ' | wc -l 2805 There's clearly a substantial reduction in the number of misaligned symbols. From manual inspection, the remaining unaligned text labels are a combination of ACPICA functions (due to the use of '-Os'), static call trampolines, and non-function labels in assembly, which will be dealt with in subsequent patches. Signed-off-by: Mark Rutland Cc: Florent Revest Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Will Deacon Cc: Miguel Ojeda Cc: Nick Desaulniers Link: https://lore.kernel.org/r/20230123134603.1064407-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- include/linux/compiler_attributes.h | 6 ------ include/linux/compiler_types.h | 27 +++++++++++++++++++++++++++ kernel/exit.c | 9 ++++++++- 3 files changed, 35 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 898b3458b24a..b83126452c65 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -75,12 +75,6 @@ # define __assume_aligned(a, ...) #endif -/* - * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute - * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute - */ -#define __cold __attribute__((__cold__)) - /* * Note the long name. * diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 7c1afe0f4129..aab34e30128e 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -79,6 +79,33 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } /* Attributes */ #include +#if CONFIG_FUNCTION_ALIGNMENT > 0 +#define __function_aligned __aligned(CONFIG_FUNCTION_ALIGNMENT) +#else +#define __function_aligned +#endif + +/* + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute + * + * When -falign-functions=N is in use, we must avoid the cold attribute as + * contemporary versions of GCC drop the alignment for cold functions. Worse, + * GCC can implicitly mark callees of cold functions as cold themselves, so + * it's not sufficient to add __function_aligned here as that will not ensure + * that callees are correctly aligned. + * + * See: + * + * https://lore.kernel.org/lkml/Y77%2FqVgvaJidFpYt@FVFF77S0Q05N + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c9 + */ +#if !defined(CONFIG_CC_IS_GCC) || (CONFIG_FUNCTION_ALIGNMENT == 0) +#define __cold __attribute__((__cold__)) +#else +#define __cold +#endif + /* Builtins */ /* diff --git a/kernel/exit.c b/kernel/exit.c index 15dc2ec80c46..c8e0375705f4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1898,7 +1898,14 @@ bool thread_group_exited(struct pid *pid) } EXPORT_SYMBOL(thread_group_exited); -__weak void abort(void) +/* + * This needs to be __function_aligned as GCC implicitly makes any + * implementation of abort() cold and drops alignment specified by + * -falign-functions=N. + * + * See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88345#c11 + */ +__weak __function_aligned void abort(void) { BUG(); -- cgit v1.2.3