From 17f60a7da150fdd0cfb9756f86a262daa72c835f Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:07:50 -0400 Subject: capabilites: allow the application of capability limits to usermode helpers There is no way to limit the capabilities of usermodehelpers. This problem reared its head recently when someone complained that any user with cap_net_admin was able to load arbitrary kernel modules, even though the user didn't have cap_sys_module. The reason is because the actual load is done by a usermode helper and those always have the full cap set. This patch addes new sysctls which allow us to bound the permissions of usermode helpers. /proc/sys/kernel/usermodehelper/bset /proc/sys/kernel/usermodehelper/inheritable You must have CAP_SYS_MODULE and CAP_SETPCAP to change these (changes are &= ONLY). When the kernel launches a usermodehelper it will do so with these as the bset and pI. -v2: make globals static create spinlock to protect globals -v3: require both CAP_SETPCAP and CAP_SYS_MODULE -v4: fix the typo s/CAP_SET_PCAP/CAP_SETPCAP/ because I didn't commit Signed-off-by: Eric Paris No-objection-from: Serge E. Hallyn Acked-by: David Howells Acked-by: Serge E. Hallyn Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- kernel/sysctl.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c0bb32414b17..965134bed6cd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -615,6 +616,11 @@ static struct ctl_table kern_table[] = { .mode = 0555, .child = random_table, }, + { + .procname = "usermodehelper", + .mode = 0555, + .child = usermodehelper_table, + }, { .procname = "overflowuid", .data = &overflowuid, -- cgit v1.2.3 From 571d76acdab95876aeff869ab6449f826c23aa43 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Mon, 16 May 2011 14:23:44 -0400 Subject: arch/tile: support signal "exception-trace" hook This change adds support for /proc/sys/debug/exception-trace to tile. Like x86 and sparc, by default it is set to "1", generating a one-line printk whenever a user process crashes. By setting it to "2", we get a much more complete userspace diagnostic at crash time, including a user-space backtrace, register dump, and memory dump around the address of the crash. Some vestiges of the Tilera-internal version of this support are removed with this patch (the show_crashinfo variable and the arch_coredump_signal function). We retain a "crashinfo" boot parameter which allows you to set the boot-time value of exception-trace. Signed-off-by: Chris Metcalf --- arch/tile/include/asm/processor.h | 7 --- arch/tile/include/asm/signal.h | 4 ++ arch/tile/kernel/compat_signal.c | 4 +- arch/tile/kernel/signal.c | 128 ++++++++++++++++++++++++++++++++++++-- arch/tile/kernel/single_step.c | 4 ++ arch/tile/kernel/traps.c | 1 + arch/tile/mm/fault.c | 24 ++++--- kernel/sysctl.c | 2 +- 8 files changed, 151 insertions(+), 23 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h index d6b43ddfcc04..34c1e01ffb5e 100644 --- a/arch/tile/include/asm/processor.h +++ b/arch/tile/include/asm/processor.h @@ -257,10 +257,6 @@ static inline void cpu_relax(void) barrier(); } -struct siginfo; -extern void arch_coredump_signal(struct siginfo *, struct pt_regs *); -#define arch_coredump_signal arch_coredump_signal - /* Info on this processor (see fs/proc/cpuinfo.c) */ struct seq_operations; extern const struct seq_operations cpuinfo_op; @@ -271,9 +267,6 @@ extern char chip_model[64]; /* Data on which physical memory controller corresponds to which NUMA node. */ extern int node_controller[]; -/* Do we dump information to the console when a user application crashes? */ -extern int show_crashinfo; - #if CHIP_HAS_CBOX_HOME_MAP() /* Does the heap allocator return hash-for-home pages by default? */ extern int hash_default; diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h index 81d92a45cd4b..1e1e616783eb 100644 --- a/arch/tile/include/asm/signal.h +++ b/arch/tile/include/asm/signal.h @@ -28,6 +28,10 @@ struct pt_regs; int restore_sigcontext(struct pt_regs *, struct sigcontext __user *); int setup_sigcontext(struct sigcontext __user *, struct pt_regs *); void do_signal(struct pt_regs *regs); +void signal_fault(const char *type, struct pt_regs *, + void __user *frame, int sig); +void trace_unhandled_signal(const char *type, struct pt_regs *regs, + unsigned long address, int signo); #endif #endif /* _ASM_TILE_SIGNAL_H */ diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index dbb0dfc7bece..a7869ad62776 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -317,7 +317,7 @@ long compat_sys_rt_sigreturn(struct pt_regs *regs) return 0; badframe: - force_sig(SIGSEGV, current); + signal_fault("bad sigreturn frame", regs, frame, 0); return 0; } @@ -431,6 +431,6 @@ int compat_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return 0; give_sigsegv: - force_sigsegv(sig, current); + signal_fault("bad setup frame", regs, frame, sig); return -EFAULT; } diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 1260321155f1..bedaf4e9f3a7 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -39,7 +39,6 @@ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - SYSCALL_DEFINE3(sigaltstack, const stack_t __user *, uss, stack_t __user *, uoss, struct pt_regs *, regs) { @@ -78,6 +77,13 @@ int restore_sigcontext(struct pt_regs *regs, return err; } +void signal_fault(const char *type, struct pt_regs *regs, + void __user *frame, int sig) +{ + trace_unhandled_signal(type, regs, (unsigned long)frame, SIGSEGV); + force_sigsegv(sig, current); +} + /* The assembly shim for this function arranges to ignore the return value. */ SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs) { @@ -105,7 +111,7 @@ SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs) return 0; badframe: - force_sig(SIGSEGV, current); + signal_fault("bad sigreturn frame", regs, frame, 0); return 0; } @@ -231,7 +237,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return 0; give_sigsegv: - force_sigsegv(sig, current); + signal_fault("bad setup frame", regs, frame, sig); return -EFAULT; } @@ -245,7 +251,6 @@ static int handle_signal(unsigned long sig, siginfo_t *info, { int ret; - /* Are we from a system call? */ if (regs->faultnum == INT_SWINT_1) { /* If so, check system call restarting.. */ @@ -363,3 +368,118 @@ done: /* Avoid double syscall restart if there are nested signals. */ regs->faultnum = INT_SWINT_1_SIGRETURN; } + +int show_unhandled_signals = 1; + +static int __init crashinfo(char *str) +{ + unsigned long val; + const char *word; + + if (*str == '\0') + val = 2; + else if (*str != '=' || strict_strtoul(++str, 0, &val) != 0) + return 0; + show_unhandled_signals = val; + switch (show_unhandled_signals) { + case 0: + word = "No"; + break; + case 1: + word = "One-line"; + break; + default: + word = "Detailed"; + break; + } + pr_info("%s crash reports will be generated on the console\n", word); + return 1; +} +__setup("crashinfo", crashinfo); + +static void dump_mem(void __user *address) +{ + void __user *addr; + enum { region_size = 256, bytes_per_line = 16 }; + int i, j, k; + int found_readable_mem = 0; + + pr_err("\n"); + if (!access_ok(VERIFY_READ, address, 1)) { + pr_err("Not dumping at address 0x%lx (kernel address)\n", + (unsigned long)address); + return; + } + + addr = (void __user *) + (((unsigned long)address & -bytes_per_line) - region_size/2); + if (addr > address) + addr = NULL; + for (i = 0; i < region_size; + addr += bytes_per_line, i += bytes_per_line) { + unsigned char buf[bytes_per_line]; + char line[100]; + if (copy_from_user(buf, addr, bytes_per_line)) + continue; + if (!found_readable_mem) { + pr_err("Dumping memory around address 0x%lx:\n", + (unsigned long)address); + found_readable_mem = 1; + } + j = sprintf(line, REGFMT":", (unsigned long)addr); + for (k = 0; k < bytes_per_line; ++k) + j += sprintf(&line[j], " %02x", buf[k]); + pr_err("%s\n", line); + } + if (!found_readable_mem) + pr_err("No readable memory around address 0x%lx\n", + (unsigned long)address); +} + +void trace_unhandled_signal(const char *type, struct pt_regs *regs, + unsigned long address, int sig) +{ + struct task_struct *tsk = current; + + if (show_unhandled_signals == 0) + return; + + /* If the signal is handled, don't show it here. */ + if (!is_global_init(tsk)) { + void __user *handler = + tsk->sighand->action[sig-1].sa.sa_handler; + if (handler != SIG_IGN && handler != SIG_DFL) + return; + } + + /* Rate-limit the one-line output, not the detailed output. */ + if (show_unhandled_signals <= 1 && !printk_ratelimit()) + return; + + printk("%s%s[%d]: %s at %lx pc "REGFMT" signal %d", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), type, address, regs->pc, sig); + + print_vma_addr(KERN_CONT " in ", regs->pc); + + printk(KERN_CONT "\n"); + + if (show_unhandled_signals > 1) { + switch (sig) { + case SIGILL: + case SIGFPE: + case SIGSEGV: + case SIGBUS: + pr_err("User crash: signal %d," + " trap %ld, address 0x%lx\n", + sig, regs->faultnum, address); + show_regs(regs); + dump_mem((void __user *)address); + break; + default: + pr_err("User crash: signal %d, trap %ld\n", + sig, regs->faultnum); + break; + } + } +} diff --git a/arch/tile/kernel/single_step.c b/arch/tile/kernel/single_step.c index 86df5a239b70..4032ca8e51b6 100644 --- a/arch/tile/kernel/single_step.c +++ b/arch/tile/kernel/single_step.c @@ -186,6 +186,8 @@ static tile_bundle_bits rewrite_load_store_unaligned( .si_code = SEGV_MAPERR, .si_addr = addr }; + trace_unhandled_signal("segfault", regs, + (unsigned long)addr, SIGSEGV); force_sig_info(info.si_signo, &info, current); return (tile_bundle_bits) 0; } @@ -196,6 +198,8 @@ static tile_bundle_bits rewrite_load_store_unaligned( .si_code = BUS_ADRALN, .si_addr = addr }; + trace_unhandled_signal("unaligned trap", regs, + (unsigned long)addr, SIGBUS); force_sig_info(info.si_signo, &info, current); return (tile_bundle_bits) 0; } diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 5474fc2e77e8..f9803dfa7357 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -308,6 +308,7 @@ void __kprobes do_trap(struct pt_regs *regs, int fault_num, info.si_addr = (void __user *)address; if (signo == SIGILL) info.si_trapno = fault_num; + trace_unhandled_signal("trap", regs, address, signo); force_sig_info(signo, &info, current); } diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 24ca54a0703b..25b7b90fd620 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -43,8 +43,11 @@ #include -static noinline void force_sig_info_fault(int si_signo, int si_code, - unsigned long address, int fault_num, struct task_struct *tsk) +static noinline void force_sig_info_fault(const char *type, int si_signo, + int si_code, unsigned long address, + int fault_num, + struct task_struct *tsk, + struct pt_regs *regs) { siginfo_t info; @@ -59,6 +62,7 @@ static noinline void force_sig_info_fault(int si_signo, int si_code, info.si_code = si_code; info.si_addr = (void __user *)address; info.si_trapno = fault_num; + trace_unhandled_signal(type, regs, address, si_signo); force_sig_info(si_signo, &info, tsk); } @@ -71,11 +75,12 @@ SYSCALL_DEFINE2(cmpxchg_badaddr, unsigned long, address, struct pt_regs *, regs) { if (address >= PAGE_OFFSET) - force_sig_info_fault(SIGSEGV, SEGV_MAPERR, address, - INT_DTLB_MISS, current); + force_sig_info_fault("atomic segfault", SIGSEGV, SEGV_MAPERR, + address, INT_DTLB_MISS, current, regs); else - force_sig_info_fault(SIGBUS, BUS_ADRALN, address, - INT_UNALIGN_DATA, current); + force_sig_info_fault("atomic alignment fault", SIGBUS, + BUS_ADRALN, address, + INT_UNALIGN_DATA, current, regs); /* * Adjust pc to point at the actual instruction, which is unusual @@ -471,8 +476,8 @@ bad_area_nosemaphore: */ local_irq_enable(); - force_sig_info_fault(SIGSEGV, si_code, address, - fault_num, tsk); + force_sig_info_fault("segfault", SIGSEGV, si_code, address, + fault_num, tsk, regs); return 0; } @@ -547,7 +552,8 @@ do_sigbus: if (is_kernel_mode) goto no_context; - force_sig_info_fault(SIGBUS, BUS_ADRERR, address, fault_num, tsk); + force_sig_info_fault("bus error", SIGBUS, BUS_ADRERR, address, + fault_num, tsk, regs); return 0; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c0bb32414b17..aaec9342a33c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1496,7 +1496,7 @@ static struct ctl_table fs_table[] = { static struct ctl_table debug_table[] = { #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \ - defined(CONFIG_S390) + defined(CONFIG_S390) || defined(CONFIG_TILE) { .procname = "exception-trace", .data = &show_unhandled_signals, -- cgit v1.2.3 From 586692a5a5fc5740c8a46abc0f2365495c2d7c5f Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Sun, 22 May 2011 22:10:22 -0700 Subject: watchdog: Disable watchdog when thresh is zero This restores the previous behavior of softlock_thresh. Currently, setting watchdog_thresh to zero causes the watchdog kthreads to consume a lot of CPU. In addition, the logic of proc_dowatchdog_thresh and proc_dowatchdog_enabled has been factored into proc_dowatchdog. Signed-off-by: Mandeep Singh Baines Cc: Marcin Slusarz Cc: Don Zickus Cc: Peter Zijlstra Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/1306127423-3347-3-git-send-email-msb@chromium.org Signed-off-by: Ingo Molnar LKML-Reference: <20110517071018.GE22305@elte.hu> --- include/linux/nmi.h | 5 +++-- include/linux/sched.h | 1 - kernel/sysctl.c | 12 ++++++++---- kernel/watchdog.c | 25 +++++++++---------------- 4 files changed, 20 insertions(+), 23 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index c536f8545f74..5317b8b2198f 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -47,9 +47,10 @@ static inline bool trigger_all_cpu_backtrace(void) int hw_nmi_is_cpu_stuck(struct pt_regs *); u64 hw_nmi_get_sample_period(void); extern int watchdog_enabled; +extern int watchdog_thresh; struct ctl_table; -extern int proc_dowatchdog_enabled(struct ctl_table *, int , - void __user *, size_t *, loff_t *); +extern int proc_dowatchdog(struct ctl_table *, int , + void __user *, size_t *, loff_t *); #endif #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 12211e1666e2..d8b2d0bec0d8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -315,7 +315,6 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; -extern int softlockup_thresh; void lockup_detector_init(void); #else static inline void touch_softlockup_watchdog(void) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c0bb32414b17..3dd0c46fa3bb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -730,14 +730,16 @@ static struct ctl_table kern_table[] = { .data = &watchdog_enabled, .maxlen = sizeof (int), .mode = 0644, - .proc_handler = proc_dowatchdog_enabled, + .proc_handler = proc_dowatchdog, + .extra1 = &zero, + .extra2 = &one, }, { .procname = "watchdog_thresh", - .data = &softlockup_thresh, + .data = &watchdog_thresh, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dowatchdog_thresh, + .proc_handler = proc_dowatchdog, .extra1 = &neg_one, .extra2 = &sixty, }, @@ -755,7 +757,9 @@ static struct ctl_table kern_table[] = { .data = &watchdog_enabled, .maxlen = sizeof (int), .mode = 0644, - .proc_handler = proc_dowatchdog_enabled, + .proc_handler = proc_dowatchdog, + .extra1 = &zero, + .extra2 = &one, }, #endif #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index cf0e09f452e7..60301916f62e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -28,7 +28,7 @@ #include int watchdog_enabled = 1; -int __read_mostly softlockup_thresh = 60; +int __read_mostly watchdog_thresh = 60; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); @@ -105,12 +105,12 @@ static unsigned long get_timestamp(int this_cpu) static unsigned long get_sample_period(void) { /* - * convert softlockup_thresh from seconds to ns + * convert watchdog_thresh from seconds to ns * the divide by 5 is to give hrtimer 5 chances to * increment before the hardlockup detector generates * a warning */ - return softlockup_thresh * (NSEC_PER_SEC / 5); + return watchdog_thresh * (NSEC_PER_SEC / 5); } /* Commands for resetting the watchdog */ @@ -182,7 +182,7 @@ static int is_softlockup(unsigned long touch_ts) unsigned long now = get_timestamp(smp_processor_id()); /* Warn about unreasonable delays: */ - if (time_after(now, touch_ts + softlockup_thresh)) + if (time_after(now, touch_ts + watchdog_thresh)) return now - touch_ts; return 0; @@ -501,19 +501,19 @@ static void watchdog_disable_all_cpus(void) /* sysctl functions */ #ifdef CONFIG_SYSCTL /* - * proc handler for /proc/sys/kernel/nmi_watchdog + * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh */ -int proc_dowatchdog_enabled(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) +int proc_dowatchdog(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret; - ret = proc_dointvec(table, write, buffer, length, ppos); + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write) goto out; - if (watchdog_enabled) + if (watchdog_enabled && watchdog_thresh) watchdog_enable_all_cpus(); else watchdog_disable_all_cpus(); @@ -521,13 +521,6 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write, out: return ret; } - -int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - return proc_dointvec_minmax(table, write, buffer, lenp, ppos); -} #endif /* CONFIG_SYSCTL */ -- cgit v1.2.3 From aa4a221875873d2a1f9656cb7fd7e545e952b4fa Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Fri, 3 Jun 2011 17:54:40 -0400 Subject: perf: Comment /proc/sys/kernel/perf_event_paranoid to be part of user ABI Turns out that distro packages use this file as an indicator of the perf event subsystem - this is easier to check for from scripts than the existence of the system call. This is easy enough to keep around for the kernel, so add a comment to make sure it stays so. Signed-off-by: Vince Weaver Cc: David Ahern Cc: Peter Zijlstra Cc: paulus@samba.org Cc: acme@redhat.com Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/alpine.DEB.2.00.1106031751170.29381@cl320.eecs.utk.edu Signed-off-by: Ingo Molnar --- kernel/sysctl.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4fc92445a29c..f175d98bd355 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -938,6 +938,12 @@ static struct ctl_table kern_table[] = { }, #endif #ifdef CONFIG_PERF_EVENTS + /* + * User-space scripts rely on the existence of this file + * as a feature check for perf_events being enabled. + * + * So it's an ABI, do not remove! + */ { .procname = "perf_event_paranoid", .data = &sysctl_perf_event_paranoid, -- cgit v1.2.3