From 9b492cf58077a0254eb4b9574029ac6e79add9f9 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Mon, 23 May 2016 16:24:10 -0700 Subject: kexec: introduce a protection mechanism for the crashkernel reserved memory For the cases that some kernel (module) path stamps the crash reserved memory(already mapped by the kernel) where has been loaded the second kernel data, the kdump kernel will probably fail to boot when panic happens (or even not happens) leaving the culprit at large, this is unacceptable. The patch introduces a mechanism for detecting such cases: 1) After each crash kexec loading, it simply marks the reserved memory regions readonly since we no longer access it after that. When someone stamps the region, the first kernel will panic and trigger the kdump. The weak arch_kexec_protect_crashkres() is introduced to do the actual protection. 2) To allow multiple loading, once 1) was done we also need to remark the reserved memory to readwrite each time a system call related to kdump is made. The weak arch_kexec_unprotect_crashkres() is introduced to do the actual protection. The architecture can make its specific implementation by overriding arch_kexec_protect_crashkres() and arch_kexec_unprotect_crashkres(). Signed-off-by: Xunlei Pang Cc: Eric Biederman Cc: Dave Young Cc: Minfei Huang Cc: Vivek Goyal Cc: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'kernel/kexec.c') diff --git a/kernel/kexec.c b/kernel/kexec.c index ee70aef5cd81..b44cb3f5a15c 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -167,8 +167,12 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, return -EBUSY; dest_image = &kexec_image; - if (flags & KEXEC_ON_CRASH) + if (flags & KEXEC_ON_CRASH) { dest_image = &kexec_crash_image; + if (kexec_crash_image) + arch_kexec_unprotect_crashkres(); + } + if (nr_segments > 0) { unsigned long i; @@ -211,6 +215,9 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, image = xchg(dest_image, image); out: + if ((flags & KEXEC_ON_CRASH) && kexec_crash_image) + arch_kexec_protect_crashkres(); + mutex_unlock(&kexec_mutex); kimage_free(image); -- cgit v1.2.3 From 917a35605f09c0d16aeb2e92c7fbff562e19a116 Mon Sep 17 00:00:00 2001 From: Minfei Huang Date: Mon, 23 May 2016 16:24:16 -0700 Subject: kexec: make a pair of map/unmap reserved pages in error path For some arch, kexec shall map the reserved pages, then use them, when we try to start the kdump service. kexec may return directly, without unmaping the reserved pages, if it fails during starting service. To fix it, we make a pair of map/unmap reserved pages both in generic path and error path. This patch only affects s390. Other architecturess don't implement the interface of crash_unmap_reserved_pages and crash_map_reserved_pages. It isn't a urgent patch. Kernel can work well without any risk, although the reserved pages are not unmapped before returning in error path. Signed-off-by: Minfei Huang Cc: Vivek Goyal Cc: "Eric W. Biederman" Cc: Xunlei Pang Cc: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/kexec.c') diff --git a/kernel/kexec.c b/kernel/kexec.c index b44cb3f5a15c..4b49aa71304f 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -194,22 +194,25 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, segments, flags); } if (result) - goto out; + goto unmap_page; if (flags & KEXEC_PRESERVE_CONTEXT) image->preserve_context = 1; result = machine_kexec_prepare(image); if (result) - goto out; + goto unmap_page; for (i = 0; i < nr_segments; i++) { result = kimage_load_segment(image, &image->segment[i]); if (result) - goto out; + goto unmap_page; } kimage_terminate(image); +unmap_page: if (flags & KEXEC_ON_CRASH) crash_unmap_reserved_pages(); + if (result) + goto out; } /* Install the new kernel, and Uninstall the old */ image = xchg(dest_image, image); -- cgit v1.2.3 From 0eea08678ebe9f7d8ef98fed974a5bf1a0dd2dd2 Mon Sep 17 00:00:00 2001 From: Minfei Huang Date: Mon, 23 May 2016 16:24:19 -0700 Subject: kexec: do a cleanup for function kexec_load There are a lof of work to be done in function kexec_load, not only for allocating structs and loading initram, but also for some misc. To make it more clear, wrap a new function do_kexec_load which is used to allocate structs and load initram. And the pre-work will be done in kexec_load. Signed-off-by: Minfei Huang Cc: Vivek Goyal Cc: "Eric W. Biederman" Cc: Xunlei Pang Cc: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/kexec.c | 125 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 69 insertions(+), 56 deletions(-) (limited to 'kernel/kexec.c') diff --git a/kernel/kexec.c b/kernel/kexec.c index 4b49aa71304f..b73dc211fcfd 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -103,6 +103,74 @@ out_free_image: return ret; } +static int do_kexec_load(unsigned long entry, unsigned long nr_segments, + struct kexec_segment __user *segments, unsigned long flags) +{ + struct kimage **dest_image, *image; + unsigned long i; + int ret; + + if (flags & KEXEC_ON_CRASH) { + dest_image = &kexec_crash_image; + if (kexec_crash_image) + arch_kexec_unprotect_crashkres(); + } else { + dest_image = &kexec_image; + } + + if (nr_segments == 0) { + /* Uninstall image */ + kimage_free(xchg(dest_image, NULL)); + return 0; + } + if (flags & KEXEC_ON_CRASH) { + /* + * Loading another kernel to switch to if this one + * crashes. Free any current crash dump kernel before + * we corrupt it. + */ + kimage_free(xchg(&kexec_crash_image, NULL)); + } + + ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags); + if (ret) + return ret; + + if (flags & KEXEC_ON_CRASH) + crash_map_reserved_pages(); + + if (flags & KEXEC_PRESERVE_CONTEXT) + image->preserve_context = 1; + + ret = machine_kexec_prepare(image); + if (ret) + goto out; + + for (i = 0; i < nr_segments; i++) { + ret = kimage_load_segment(image, &image->segment[i]); + if (ret) + goto out; + } + + kimage_terminate(image); + + /* Install the new kernel and uninstall the old */ + image = xchg(dest_image, image); + +out: + if ((flags & KEXEC_ON_CRASH) && kexec_crash_image) + arch_kexec_protect_crashkres(); + + /* + * Once the reserved memory is mapped, we should unmap this memory + * before returning + */ + if (flags & KEXEC_ON_CRASH) + crash_unmap_reserved_pages(); + kimage_free(image); + return ret; +} + /* * Exec Kernel system call: for obvious reasons only root may call it. * @@ -127,7 +195,6 @@ out_free_image: SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, struct kexec_segment __user *, segments, unsigned long, flags) { - struct kimage **dest_image, *image; int result; /* We only trust the superuser with rebooting the system. */ @@ -152,9 +219,6 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, if (nr_segments > KEXEC_SEGMENT_MAX) return -EINVAL; - image = NULL; - result = 0; - /* Because we write directly to the reserved memory * region when loading crash kernels we need a mutex here to * prevent multiple crash kernels from attempting to load @@ -166,63 +230,12 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, if (!mutex_trylock(&kexec_mutex)) return -EBUSY; - dest_image = &kexec_image; - if (flags & KEXEC_ON_CRASH) { - dest_image = &kexec_crash_image; - if (kexec_crash_image) - arch_kexec_unprotect_crashkres(); - } - - if (nr_segments > 0) { - unsigned long i; - - if (flags & KEXEC_ON_CRASH) { - /* - * Loading another kernel to switch to if this one - * crashes. Free any current crash dump kernel before - * we corrupt it. - */ - - kimage_free(xchg(&kexec_crash_image, NULL)); - result = kimage_alloc_init(&image, entry, nr_segments, - segments, flags); - crash_map_reserved_pages(); - } else { - /* Loading another kernel to reboot into. */ - - result = kimage_alloc_init(&image, entry, nr_segments, - segments, flags); - } - if (result) - goto unmap_page; - - if (flags & KEXEC_PRESERVE_CONTEXT) - image->preserve_context = 1; - result = machine_kexec_prepare(image); - if (result) - goto unmap_page; - - for (i = 0; i < nr_segments; i++) { - result = kimage_load_segment(image, &image->segment[i]); - if (result) - goto unmap_page; - } - kimage_terminate(image); -unmap_page: - if (flags & KEXEC_ON_CRASH) - crash_unmap_reserved_pages(); - if (result) - goto out; - } - /* Install the new kernel, and Uninstall the old */ - image = xchg(dest_image, image); + result = do_kexec_load(entry, nr_segments, segments, flags); -out: if ((flags & KEXEC_ON_CRASH) && kexec_crash_image) arch_kexec_protect_crashkres(); mutex_unlock(&kexec_mutex); - kimage_free(image); return result; } -- cgit v1.2.3 From 7a0058ec78602da02b34fa2ae3afc523e90d1ab2 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Mon, 23 May 2016 16:24:22 -0700 Subject: s390/kexec: consolidate crash_map/unmap_reserved_pages() and arch_kexec_protect(unprotect)_crashkres() Commit 3f625002581b ("kexec: introduce a protection mechanism for the crashkernel reserved memory") is a similar mechanism for protecting the crash kernel reserved memory to previous crash_map/unmap_reserved_pages() implementation, the new one is more generic in name and cleaner in code (besides, some arch may not be allowed to unmap the pgtable). Therefore, this patch consolidates them, and uses the new arch_kexec_protect(unprotect)_crashkres() to replace former crash_map/unmap_reserved_pages() which by now has been only used by S390. The consolidation work needs the crash memory to be mapped initially, this is done in machine_kdump_pm_init() which is after reserve_crashkernel(). Once kdump kernel is loaded, the new arch_kexec_protect_crashkres() implemented for S390 will actually unmap the pgtable like before. Signed-off-by: Xunlei Pang Signed-off-by: Michael Holzheu Acked-by: Michael Holzheu Cc: Heiko Carstens Cc: "Eric W. Biederman" Cc: Minfei Huang Cc: Vivek Goyal Cc: Dave Young Cc: Baoquan He Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/kernel/machine_kexec.c | 28 ++++++++++++++++++---------- include/linux/kexec.h | 2 -- kernel/kexec.c | 12 ------------ kernel/kexec_core.c | 11 ++--------- 4 files changed, 20 insertions(+), 33 deletions(-) (limited to 'kernel/kexec.c') diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 2f1b7217c25c..0e64f08d3d69 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -43,13 +43,13 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, switch (action) { case PM_SUSPEND_PREPARE: case PM_HIBERNATION_PREPARE: - if (crashk_res.start) - crash_map_reserved_pages(); + if (kexec_crash_image) + arch_kexec_unprotect_crashkres(); break; case PM_POST_SUSPEND: case PM_POST_HIBERNATION: - if (crashk_res.start) - crash_unmap_reserved_pages(); + if (kexec_crash_image) + arch_kexec_protect_crashkres(); break; default: return NOTIFY_DONE; @@ -60,6 +60,8 @@ static int machine_kdump_pm_cb(struct notifier_block *nb, unsigned long action, static int __init machine_kdump_pm_init(void) { pm_notifier(machine_kdump_pm_cb, 0); + /* Create initial mapping for crashkernel memory */ + arch_kexec_unprotect_crashkres(); return 0; } arch_initcall(machine_kdump_pm_init); @@ -146,6 +148,8 @@ static int kdump_csum_valid(struct kimage *image) #endif } +#ifdef CONFIG_CRASH_DUMP + /* * Map or unmap crashkernel memory */ @@ -167,21 +171,25 @@ static void crash_map_pages(int enable) } /* - * Map crashkernel memory + * Unmap crashkernel memory */ -void crash_map_reserved_pages(void) +void arch_kexec_protect_crashkres(void) { - crash_map_pages(1); + if (crashk_res.end) + crash_map_pages(0); } /* - * Unmap crashkernel memory + * Map crashkernel memory */ -void crash_unmap_reserved_pages(void) +void arch_kexec_unprotect_crashkres(void) { - crash_map_pages(0); + if (crashk_res.end) + crash_map_pages(1); } +#endif + /* * Give back memory to hypervisor before new kdump is loaded */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 643ff4a3fbf6..e8acb2b43dd9 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -230,8 +230,6 @@ extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); void crash_save_cpu(struct pt_regs *regs, int cpu); void crash_save_vmcoreinfo(void); -void crash_map_reserved_pages(void); -void crash_unmap_reserved_pages(void); void arch_crash_save_vmcoreinfo(void); __printf(1, 2) void vmcoreinfo_append_str(const char *fmt, ...); diff --git a/kernel/kexec.c b/kernel/kexec.c index b73dc211fcfd..4384672d3245 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -136,9 +136,6 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments, if (ret) return ret; - if (flags & KEXEC_ON_CRASH) - crash_map_reserved_pages(); - if (flags & KEXEC_PRESERVE_CONTEXT) image->preserve_context = 1; @@ -161,12 +158,6 @@ out: if ((flags & KEXEC_ON_CRASH) && kexec_crash_image) arch_kexec_protect_crashkres(); - /* - * Once the reserved memory is mapped, we should unmap this memory - * before returning - */ - if (flags & KEXEC_ON_CRASH) - crash_unmap_reserved_pages(); kimage_free(image); return ret; } @@ -232,9 +223,6 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, result = do_kexec_load(entry, nr_segments, segments, flags); - if ((flags & KEXEC_ON_CRASH) && kexec_crash_image) - arch_kexec_protect_crashkres(); - mutex_unlock(&kexec_mutex); return result; diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 48b73cc8e425..56b3ed0927b0 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -954,7 +954,6 @@ int crash_shrink_memory(unsigned long new_size) start = roundup(start, KEXEC_CRASH_MEM_ALIGN); end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); - crash_map_reserved_pages(); crash_free_reserved_phys_range(end, crashk_res.end); if ((start == end) && (crashk_res.parent != NULL)) @@ -968,7 +967,6 @@ int crash_shrink_memory(unsigned long new_size) crashk_res.end = end - 1; insert_resource(&iomem_resource, ram_res); - crash_unmap_reserved_pages(); unlock: mutex_unlock(&kexec_mutex); @@ -1553,17 +1551,12 @@ int kernel_kexec(void) } /* - * Add and remove page tables for crashkernel memory + * Protection mechanism for crashkernel reserved memory after + * the kdump kernel is loaded. * * Provide an empty default implementation here -- architecture * code may override this */ -void __weak crash_map_reserved_pages(void) -{} - -void __weak crash_unmap_reserved_pages(void) -{} - void __weak arch_kexec_protect_crashkres(void) {} -- cgit v1.2.3