From 8ded2bbc1845e19c771eb55209aab166ef011243 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Wed, 25 Jul 2012 10:40:34 -0400 Subject: posix_types.h: Cleanup stale __NFDBITS and related definitions Recently, glibc made a change to suppress sign-conversion warnings in FD_SET (glibc commit ceb9e56b3d1). This uncovered an issue with the kernel's definition of __NFDBITS if applications #include after including . A build failure would be seen when passing the -Werror=sign-compare and -D_FORTIFY_SOURCE=2 flags to gcc. It was suggested that the kernel should either match the glibc definition of __NFDBITS or remove that entirely. The current in-kernel uses of __NFDBITS can be replaced with BITS_PER_LONG, and there are no uses of the related __FDELT and __FDMASK defines. Given that, we'll continue the cleanup that was started with commit 8b3d1cda4f5f ("posix_types: Remove fd_set macros") and drop the remaining unused macros. Additionally, linux/time.h has similar macros defined that expand to nothing so we'll remove those at the same time. Reported-by: Jeff Law Suggested-by: Linus Torvalds CC: Signed-off-by: Josh Boyer [ .. and fix up whitespace as per akpm ] Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index da27b91ff1e8..e95aeeddd25c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1020,7 +1020,7 @@ static void flush_old_files(struct files_struct * files) unsigned long set, i; j++; - i = j * __NFDBITS; + i = j * BITS_PER_LONG; fdt = files_fdtable(files); if (i >= fdt->max_fds) break; -- cgit v1.2.3 From e4fad8e5d220e3dfb1050eee752ee5058f29a232 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Jul 2012 15:33:25 +0400 Subject: consolidate pipe file creation Signed-off-by: Al Viro --- fs/exec.c | 19 ++++-------- fs/pipe.c | 75 ++++++++++++++++------------------------------- include/linux/fs.h | 3 -- include/linux/pipe_fs_i.h | 2 ++ 4 files changed, 34 insertions(+), 65 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index da27b91ff1e8..b800fb87f6ce 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2069,25 +2069,18 @@ static void wait_for_dump_helpers(struct file *file) */ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new) { - struct file *rp, *wp; + struct file *files[2]; struct fdtable *fdt; struct coredump_params *cp = (struct coredump_params *)info->data; struct files_struct *cf = current->files; + int err = create_pipe_files(files, 0); + if (err) + return err; - wp = create_write_pipe(0); - if (IS_ERR(wp)) - return PTR_ERR(wp); - - rp = create_read_pipe(wp, 0); - if (IS_ERR(rp)) { - free_write_pipe(wp); - return PTR_ERR(rp); - } - - cp->file = wp; + cp->file = files[1]; sys_close(0); - fd_install(0, rp); + fd_install(0, files[0]); spin_lock(&cf->file_lock); fdt = files_fdtable(cf); __set_open_fd(0, fdt); diff --git a/fs/pipe.c b/fs/pipe.c index 49c1065256fd..7523d9d2a998 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1016,18 +1016,16 @@ fail_inode: return NULL; } -struct file *create_write_pipe(int flags) +int create_pipe_files(struct file **res, int flags) { int err; - struct inode *inode; + struct inode *inode = get_pipe_inode(); struct file *f; struct path path; - struct qstr name = { .name = "" }; + static struct qstr name = { .name = "" }; - err = -ENFILE; - inode = get_pipe_inode(); if (!inode) - goto err; + return -ENFILE; err = -ENOMEM; path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name); @@ -1041,62 +1039,43 @@ struct file *create_write_pipe(int flags) f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops); if (!f) goto err_dentry; - f->f_mapping = inode->i_mapping; f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); - f->f_version = 0; - return f; + res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops); + if (!res[0]) + goto err_file; + + path_get(&path); + res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); + res[1] = f; + return 0; - err_dentry: +err_file: + put_filp(f); +err_dentry: free_pipe_info(inode); path_put(&path); - return ERR_PTR(err); + return err; - err_inode: +err_inode: free_pipe_info(inode); iput(inode); - err: - return ERR_PTR(err); -} - -void free_write_pipe(struct file *f) -{ - free_pipe_info(f->f_dentry->d_inode); - path_put(&f->f_path); - put_filp(f); -} - -struct file *create_read_pipe(struct file *wrf, int flags) -{ - /* Grab pipe from the writer */ - struct file *f = alloc_file(&wrf->f_path, FMODE_READ, - &read_pipefifo_fops); - if (!f) - return ERR_PTR(-ENFILE); - - path_get(&wrf->f_path); - f->f_flags = O_RDONLY | (flags & O_NONBLOCK); - - return f; + return err; } int do_pipe_flags(int *fd, int flags) { - struct file *fw, *fr; + struct file *files[2]; int error; int fdw, fdr; if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT)) return -EINVAL; - fw = create_write_pipe(flags); - if (IS_ERR(fw)) - return PTR_ERR(fw); - fr = create_read_pipe(fw, flags); - error = PTR_ERR(fr); - if (IS_ERR(fr)) - goto err_write_pipe; + error = create_pipe_files(files, flags); + if (error) + return error; error = get_unused_fd_flags(flags); if (error < 0) @@ -1109,8 +1088,8 @@ int do_pipe_flags(int *fd, int flags) fdw = error; audit_fd_pair(fdr, fdw); - fd_install(fdr, fr); - fd_install(fdw, fw); + fd_install(fdr, files[0]); + fd_install(fdw, files[1]); fd[0] = fdr; fd[1] = fdw; @@ -1119,10 +1098,8 @@ int do_pipe_flags(int *fd, int flags) err_fdr: put_unused_fd(fdr); err_read_pipe: - path_put(&fr->f_path); - put_filp(fr); - err_write_pipe: - free_write_pipe(fw); + fput(files[0]); + fput(files[1]); return error; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 8fabb037a48d..478237844648 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2326,9 +2326,6 @@ static inline void i_readcount_inc(struct inode *inode) } #endif extern int do_pipe_flags(int *, int); -extern struct file *create_read_pipe(struct file *f, int flags); -extern struct file *create_write_pipe(int flags); -extern void free_write_pipe(struct file *); extern int kernel_read(struct file *, loff_t, char *, unsigned long); extern struct file * open_exec(const char *); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index e1ac1ce16fb0..e16dcb31f0c7 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -162,4 +162,6 @@ void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); long pipe_fcntl(struct file *, unsigned int, unsigned long arg); struct pipe_inode_info *get_pipe_info(struct file *file); +int create_pipe_files(struct file **, int); + #endif -- cgit v1.2.3 From 9520628e8ceb69fa9a4aee6b57f22675d9e1b709 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 30 Jul 2012 14:39:15 -0700 Subject: fs: make dumpable=2 require fully qualified path When the suid_dumpable sysctl is set to "2", and there is no core dump pipe defined in the core_pattern sysctl, a local user can cause core files to be written to root-writable directories, potentially with user-controlled content. This means an admin can unknowningly reintroduce a variation of CVE-2006-2451, allowing local users to gain root privileges. $ cat /proc/sys/fs/suid_dumpable 2 $ cat /proc/sys/kernel/core_pattern core $ ulimit -c unlimited $ cd / $ ls -l core ls: cannot access core: No such file or directory $ touch core touch: cannot touch `core': Permission denied $ OHAI="evil-string-here" ping localhost >/dev/null 2>&1 & $ pid=$! $ sleep 1 $ kill -SEGV $pid $ ls -l core -rw------- 1 root kees 458752 Jun 21 11:35 core $ sudo strings core | grep evil OHAI=evil-string-here While cron has been fixed to abort reading a file when there is any parse error, there are still other sensitive directories that will read any file present and skip unparsable lines. Instead of introducing a suid_dumpable=3 mode and breaking all users of mode 2, this only disables the unsafe portion of mode 2 (writing to disk via relative path). Most users of mode 2 (e.g. Chrome OS) already use a core dump pipe handler, so this change will not break them. For the situations where a pipe handler is not defined but mode 2 is still active, crash dumps will only be written to fully qualified paths. If a relative path is defined (e.g. the default "core" pattern), dump attempts will trigger a printk yelling about the lack of a fully qualified path. Signed-off-by: Kees Cook Cc: Alexander Viro Cc: Alan Cox Cc: "Eric W. Biederman" Cc: Doug Ledford Cc: Serge Hallyn Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/fs.txt | 18 ++++++++++++------ fs/exec.c | 17 ++++++++++++++--- 2 files changed, 26 insertions(+), 9 deletions(-) (limited to 'fs/exec.c') diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt index 13d6166d7a27..8c235b6e4246 100644 --- a/Documentation/sysctl/fs.txt +++ b/Documentation/sysctl/fs.txt @@ -163,16 +163,22 @@ This value can be used to query and set the core dump mode for setuid or otherwise protected/tainted binaries. The modes are 0 - (default) - traditional behaviour. Any process which has changed - privilege levels or is execute only will not be dumped + privilege levels or is execute only will not be dumped. 1 - (debug) - all processes dump core when possible. The core dump is owned by the current user and no security is applied. This is intended for system debugging situations only. Ptrace is unchecked. + This is insecure as it allows regular users to examine the memory + contents of privileged processes. 2 - (suidsafe) - any binary which normally would not be dumped is dumped - readable by root only. This allows the end user to remove - such a dump but not access it directly. For security reasons - core dumps in this mode will not overwrite one another or - other files. This mode is appropriate when administrators are - attempting to debug problems in a normal environment. + anyway, but only if the "core_pattern" kernel sysctl is set to + either a pipe handler or a fully qualified path. (For more details + on this limitation, see CVE-2006-2451.) This mode is appropriate + when administrators are attempting to debug problems in a normal + environment, and either have a core dump pipe handler that knows + to treat privileged core dumps with care, or specific directory + defined for catching core dumps. If a core dump happens without + a pipe handler or fully qualifid path, a message will be emitted + to syslog warning about the lack of a correct setting. ============================================================== diff --git a/fs/exec.c b/fs/exec.c index e95aeeddd25c..95aae3f9c036 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2111,6 +2111,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) int retval = 0; int flag = 0; int ispipe; + bool need_nonrelative = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { .signr = signr, @@ -2136,14 +2137,16 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) if (!cred) goto fail; /* - * We cannot trust fsuid as being the "true" uid of the - * process nor do we know its entire history. We only know it - * was tainted so we dump it as root in mode 2. + * We cannot trust fsuid as being the "true" uid of the process + * nor do we know its entire history. We only know it was tainted + * so we dump it as root in mode 2, and only into a controlled + * environment (pipe handler or fully qualified path). */ if (__get_dumpable(cprm.mm_flags) == 2) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ + need_nonrelative = true; } retval = coredump_wait(exit_code, &core_state); @@ -2223,6 +2226,14 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) if (cprm.limit < binfmt->min_coredump) goto fail_unlock; + if (need_nonrelative && cn.corename[0] != '/') { + printk(KERN_WARNING "Pid %d(%s) can only dump core "\ + "to fully qualified path!\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Skipping core dump\n"); + goto fail_unlock; + } + cprm.file = filp_open(cn.corename, O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, 0600); -- cgit v1.2.3 From 54b501992dd2a839e94e76aa392c392b55080ce8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 30 Jul 2012 14:39:18 -0700 Subject: coredump: warn about unsafe suid_dumpable / core_pattern combo When suid_dumpable=2, detect unsafe core_pattern settings and warn when they are seen. Signed-off-by: Kees Cook Suggested-by: Andrew Morton Cc: Alexander Viro Cc: Alan Cox Cc: "Eric W. Biederman" Cc: Doug Ledford Cc: Serge Hallyn Cc: James Morris Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 10 +++++----- include/linux/sched.h | 5 +++++ kernel/sysctl.c | 37 +++++++++++++++++++++++++++++++++++-- 3 files changed, 45 insertions(+), 7 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 95aae3f9c036..5af8390e0fae 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2002,17 +2002,17 @@ static void coredump_finish(struct mm_struct *mm) void set_dumpable(struct mm_struct *mm, int value) { switch (value) { - case 0: + case SUID_DUMPABLE_DISABLED: clear_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case 1: + case SUID_DUMPABLE_ENABLED: set_bit(MMF_DUMPABLE, &mm->flags); smp_wmb(); clear_bit(MMF_DUMP_SECURELY, &mm->flags); break; - case 2: + case SUID_DUMPABLE_SAFE: set_bit(MMF_DUMP_SECURELY, &mm->flags); smp_wmb(); set_bit(MMF_DUMPABLE, &mm->flags); @@ -2025,7 +2025,7 @@ static int __get_dumpable(unsigned long mm_flags) int ret; ret = mm_flags & MMF_DUMPABLE_MASK; - return (ret >= 2) ? 2 : ret; + return (ret > SUID_DUMPABLE_ENABLED) ? SUID_DUMPABLE_SAFE : ret; } int get_dumpable(struct mm_struct *mm) @@ -2142,7 +2142,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) * so we dump it as root in mode 2, and only into a controlled * environment (pipe handler or fully qualified path). */ - if (__get_dumpable(cprm.mm_flags) == 2) { + if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ diff --git a/include/linux/sched.h b/include/linux/sched.h index a721cef7e2d4..1e26a5e45aa6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -406,6 +406,11 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} extern void set_dumpable(struct mm_struct *mm, int value); extern int get_dumpable(struct mm_struct *mm); +/* get/set_dumpable() values */ +#define SUID_DUMPABLE_DISABLED 0 +#define SUID_DUMPABLE_ENABLED 1 +#define SUID_DUMPABLE_SAFE 2 + /* mm flags */ /* dumpable bits */ #define MMF_DUMPABLE 0 /* core dump is permitted */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4ab11879aeb4..b46f496405e4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -174,6 +174,11 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); #endif +static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_dostring_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + #ifdef CONFIG_MAGIC_SYSRQ /* Note: sysrq code uses it's own private copy */ static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; @@ -410,7 +415,7 @@ static struct ctl_table kern_table[] = { .data = core_pattern, .maxlen = CORENAME_MAX_SIZE, .mode = 0644, - .proc_handler = proc_dostring, + .proc_handler = proc_dostring_coredump, }, { .procname = "core_pipe_limit", @@ -1498,7 +1503,7 @@ static struct ctl_table fs_table[] = { .data = &suid_dumpable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec_minmax_coredump, .extra1 = &zero, .extra2 = &two, }, @@ -2009,6 +2014,34 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, do_proc_dointvec_minmax_conv, ¶m); } +static void validate_coredump_safety(void) +{ + if (suid_dumpable == SUID_DUMPABLE_SAFE && + core_pattern[0] != '/' && core_pattern[0] != '|') { + printk(KERN_WARNING "Unsafe core_pattern used with "\ + "suid_dumpable=2. Pipe handler or fully qualified "\ + "core dump path required.\n"); + } +} + +static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (!error) + validate_coredump_safety(); + return error; +} + +static int proc_dostring_coredump(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dostring(table, write, buffer, lenp, ppos); + if (!error) + validate_coredump_safety(); + return error; +} + static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos, -- cgit v1.2.3 From 108ceeb020bb3558fe175a3fc8b60fd6c1a2a279 Mon Sep 17 00:00:00 2001 From: Jovi Zhang Date: Mon, 30 Jul 2012 14:42:23 -0700 Subject: coredump: fix wrong comments on core limits of pipe coredump case In commit 898b374af6f7 ("exec: replace call_usermodehelper_pipe with use of umh init function and resolve limit"), the core limits recursive check value was changed from 0 to 1, but the corresponding comments were not updated. Signed-off-by: Jovi Zhang Cc: Oleg Nesterov Cc: Neil Horman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs/exec.c') diff --git a/fs/exec.c b/fs/exec.c index 5af8390e0fae..3684353ebd5f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -2174,15 +2174,16 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) } if (cprm.limit == 1) { - /* + /* See umh_pipe_setup() which sets RLIMIT_CORE = 1. + * * Normally core limits are irrelevant to pipes, since * we're not writing to the file system, but we use - * cprm.limit of 1 here as a speacial value. Any - * non-1 limit gets set to RLIM_INFINITY below, but - * a limit of 0 skips the dump. This is a consistent - * way to catch recursive crashes. We can still crash - * if the core_pattern binary sets RLIM_CORE = !1 - * but it runs as root, and can do lots of stupid things + * cprm.limit of 1 here as a speacial value, this is a + * consistent way to catch recursive crashes. + * We can still crash if the core_pattern binary sets + * RLIM_CORE = !1, but it runs as root, and can do + * lots of stupid things. + * * Note that we use task_tgid_vnr here to grab the pid * of the process group leader. That way we get the * right pid if a thread in a multi-threaded -- cgit v1.2.3