From b0f970c50d439df46ade159950201faba36da10b Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 29 Mar 2024 21:28:25 +0800 Subject: Documentation: kdump: clean up the outdated description After commit 443cbaf9e2fd ("crash: split vmcoreinfo exporting code out from crash_core.c"), Kconfig item CRASH_CORE has gone away in kernel. Items VMCORE_INFO and CRASH_RESERVE are used instead. So clean up the outdated description about CRASH_CORE and update it accordingly. Link: https://lkml.kernel.org/r/20240329132825.1102459-3-bhe@redhat.com Signed-off-by: Baoquan He Cc: Jonathan Corbet Cc: Geert Uytterhoeven Cc: Huacai Chen Cc: WANG Xuerui Signed-off-by: Andrew Morton --- Documentation/admin-guide/kdump/kdump.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Documentation/admin-guide') diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 0302a93b1d40..5376890adbeb 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -136,10 +136,6 @@ System kernel config options CONFIG_KEXEC_CORE=y - Subsequently, CRASH_CORE is selected by KEXEC_CORE:: - - CONFIG_CRASH_CORE=y - 2) Enable "sysfs file system support" in "Filesystem" -> "Pseudo filesystems." This is usually enabled by default:: @@ -168,6 +164,10 @@ Dump-capture kernel config options (Arch Independent) CONFIG_CRASH_DUMP=y + And this will select VMCORE_INFO and CRASH_RESERVE:: + CONFIG_VMCORE_INFO=y + CONFIG_CRASH_RESERVE=y + 2) Enable "/proc/vmcore support" under "Filesystems" -> "Pseudo filesystems":: CONFIG_PROC_VMCORE=y -- cgit v1.2.3 From 393fb313a2e150b768e4850658679e2afff431e9 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 29 Apr 2024 23:02:36 -0700 Subject: watchdog: allow nmi watchdog to use raw perf event NMI watchdog permanently consumes one hardware counters per CPU on the system. For systems that use many hardware counters, this causes more aggressive time multiplexing of perf events. OTOH, some CPUs (mostly Intel) support "ref-cycles" event, which is rarely used. Add kernel cmdline arg nmi_watchdog=rNNN to configure the watchdog to use raw event. For example, on Intel CPUs, we can use "r300" to configure the watchdog to use ref-cycles event. If the raw event does not work, fall back to use "cycles". [akpm@linux-foundation.org: fix kerneldoc] Link: https://lkml.kernel.org/r/20240430060236.1878002-2-song@kernel.org Signed-off-by: Song Liu Cc: Peter Zijlstra Cc: "Matthew Wilcox (Oracle)" Signed-off-by: Andrew Morton --- Documentation/admin-guide/kernel-parameters.txt | 5 +-- include/linux/nmi.h | 2 ++ kernel/watchdog.c | 2 ++ kernel/watchdog_perf.c | 46 +++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) (limited to 'Documentation/admin-guide') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 902ecd92a29f..1fa79a3d0d1a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3773,10 +3773,12 @@ Format: [state][,regs][,debounce][,die] nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels - Format: [panic,][nopanic,][num] + Format: [panic,][nopanic,][rNNN,][num] Valid num: 0 or 1 0 - turn hardlockup detector in nmi_watchdog off 1 - turn hardlockup detector in nmi_watchdog on + rNNN - configure the watchdog with raw perf event 0xNNN + When panic is specified, panic when an NMI watchdog timeout occurs (or 'nopanic' to not panic on an NMI watchdog, if CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is set) @@ -7464,4 +7466,3 @@ memory, and other data can't be written using xmon commands. off xmon is disabled. - diff --git a/include/linux/nmi.h b/include/linux/nmi.h index f53438eae815..a8dfb38c9bb6 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -105,10 +105,12 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_cleanup(void); +extern void hardlockup_config_perf_event(const char *str); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } +static inline void hardlockup_config_perf_event(const char *str) { } #endif void watchdog_hardlockup_stop(void); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7f54484de16f..ab0129b15f25 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -80,6 +80,8 @@ next: watchdog_hardlockup_user_enabled = 0; else if (!strncmp(str, "1", 1)) watchdog_hardlockup_user_enabled = 1; + else if (!strncmp(str, "r", 1)) + hardlockup_config_perf_event(str + 1); while (*(str++)) { if (*str == ',') { str++; diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c index 8ea00c4a24b2..5f7d1f0d4268 100644 --- a/kernel/watchdog_perf.c +++ b/kernel/watchdog_perf.c @@ -90,6 +90,14 @@ static struct perf_event_attr wd_hw_attr = { .disabled = 1, }; +static struct perf_event_attr fallback_wd_hw_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, +}; + /* Callback function for perf event subsystem */ static void watchdog_overflow_callback(struct perf_event *event, struct perf_sample_data *data, @@ -122,6 +130,13 @@ static int hardlockup_detector_event_create(void) /* Try to register using hardware perf events */ evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); + if (IS_ERR(evt)) { + wd_attr = &fallback_wd_hw_attr; + wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); + evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL, + watchdog_overflow_callback, NULL); + } + if (IS_ERR(evt)) { pr_debug("Perf event create on CPU %d failed with %ld\n", cpu, PTR_ERR(evt)); @@ -259,3 +274,34 @@ int __init watchdog_hardlockup_probe(void) } return ret; } + +/** + * hardlockup_config_perf_event - Overwrite config of wd_hw_attr. + * + * @str: number which identifies the raw perf event to use + */ +void __init hardlockup_config_perf_event(const char *str) +{ + u64 config; + char buf[24]; + char *comma = strchr(str, ','); + + if (!comma) { + if (kstrtoull(str, 16, &config)) + return; + } else { + unsigned int len = comma - str; + + if (len >= sizeof(buf)) + return; + + if (strscpy(buf, str, sizeof(buf)) < 0) + return; + buf[len] = 0; + if (kstrtoull(buf, 16, &config)) + return; + } + + wd_hw_attr.type = PERF_TYPE_RAW; + wd_hw_attr.config = config; +} -- cgit v1.2.3