From 2d514487faf188938a4ee4fb3464eeecfbdcf8eb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 21 Dec 2011 12:17:04 -0800 Subject: security: Yama LSM This adds the Yama Linux Security Module to collect DAC security improvements (specifically just ptrace restrictions for now) that have existed in various forms over the years and have been carried outside the mainline kernel by other Linux distributions like Openwall and grsecurity. Signed-off-by: Kees Cook Acked-by: John Johansen Signed-off-by: James Morris --- include/linux/prctl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/prctl.h') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 7ddc7f1b480f..4d0e5bc5458c 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -114,4 +114,10 @@ # define PR_SET_MM_START_BRK 6 # define PR_SET_MM_BRK 7 +/* + * Set specific pid that is allowed to ptrace the current task. + * A value of 0 mean "no process". + */ +#define PR_SET_PTRACER 0x59616d61 + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From bf06189e4d14641c0148bea16e9dd24943862215 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 14 Feb 2012 16:48:09 -0800 Subject: Yama: add PR_SET_PTRACER_ANY For a process to entirely disable Yama ptrace restrictions, it can use the special PR_SET_PTRACER_ANY pid to indicate that any otherwise allowed process may ptrace it. This is stronger than calling PR_SET_PTRACER with pid "1" because it includes processes in external pid namespaces. This is currently needed by the Chrome renderer, since its crash handler (Breakpad) runs external to the renderer's pid namespace. Signed-off-by: Kees Cook Signed-off-by: James Morris --- Documentation/security/Yama.txt | 7 ++++++- include/linux/prctl.h | 1 + security/yama/yama_lsm.c | 8 ++++++-- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux/prctl.h') diff --git a/Documentation/security/Yama.txt b/Documentation/security/Yama.txt index 4f0b7896a21d..a9511f179069 100644 --- a/Documentation/security/Yama.txt +++ b/Documentation/security/Yama.txt @@ -41,7 +41,12 @@ other process (and its descendents) are allowed to call PTRACE_ATTACH against it. Only one such declared debugging process can exists for each inferior at a time. For example, this is used by KDE, Chromium, and Firefox's crash handlers, and by Wine for allowing only Wine processes -to ptrace each other. +to ptrace each other. If a process wishes to entirely disable these ptrace +restrictions, it can call prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, ...) +so that any otherwise allowed process (even those in external pid namespaces) +may attach. + +The sysctl settings are: 0 - classic ptrace permissions: a process can PTRACE_ATTACH to any other process running under the same uid, as long as it is dumpable (i.e. diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 4d0e5bc5458c..a0413ac3abe8 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -119,5 +119,6 @@ * A value of 0 mean "no process". */ #define PR_SET_PTRACER 0x59616d61 +# define PR_SET_PTRACER_ANY ((unsigned long)-1) #endif /* _LINUX_PRCTL_H */ diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index dd4d36067c50..573723843a04 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -84,7 +84,7 @@ static void yama_ptracer_del(struct task_struct *tracer, spin_lock_bh(&ptracer_relations_lock); list_for_each_entry_safe(relation, safe, &ptracer_relations, node) if (relation->tracee == tracee || - relation->tracer == tracer) { + (tracer && relation->tracer == tracer)) { list_del(&relation->node); kfree(relation); } @@ -138,6 +138,8 @@ static int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3, if (arg2 == 0) { yama_ptracer_del(NULL, myself); rc = 0; + } else if (arg2 == PR_SET_PTRACER_ANY) { + rc = yama_ptracer_add(NULL, myself); } else { struct task_struct *tracer; @@ -208,6 +210,7 @@ static int ptracer_exception_found(struct task_struct *tracer, int rc = 0; struct ptrace_relation *relation; struct task_struct *parent = NULL; + bool found = false; spin_lock_bh(&ptracer_relations_lock); rcu_read_lock(); @@ -216,10 +219,11 @@ static int ptracer_exception_found(struct task_struct *tracer, list_for_each_entry(relation, &ptracer_relations, node) if (relation->tracee == tracee) { parent = relation->tracer; + found = true; break; } - if (task_is_descendant(parent, tracer)) + if (found && (parent == NULL || task_is_descendant(parent, tracer))) rc = 1; rcu_read_unlock(); spin_unlock_bh(&ptracer_relations_lock); -- cgit v1.2.3 From ebec18a6d3aa1e7d84aab16225e87fd25170ec2b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 23 Mar 2012 15:01:54 -0700 Subject: prctl: add PR_{SET,GET}_CHILD_SUBREAPER to allow simple process supervision Userspace service managers/supervisors need to track their started services. Many services daemonize by double-forking and get implicitly re-parented to PID 1. The service manager will no longer be able to receive the SIGCHLD signals for them, and is no longer in charge of reaping the children with wait(). All information about the children is lost at the moment PID 1 cleans up the re-parented processes. With this prctl, a service manager process can mark itself as a sort of 'sub-init', able to stay as the parent for all orphaned processes created by the started services. All SIGCHLD signals will be delivered to the service manager. Receiving SIGCHLD and doing wait() is in cases of a service-manager much preferred over any possible asynchronous notification about specific PIDs, because the service manager has full access to the child process data in /proc and the PID can not be re-used until the wait(), the service-manager itself is in charge of, has happened. As a side effect, the relevant parent PID information does not get lost by a double-fork, which results in a more elaborate process tree and 'ps' output: before: # ps afx 253 ? Ss 0:00 /bin/dbus-daemon --system --nofork 294 ? Sl 0:00 /usr/libexec/polkit-1/polkitd 328 ? S 0:00 /usr/sbin/modem-manager 608 ? Sl 0:00 /usr/libexec/colord 658 ? Sl 0:00 /usr/libexec/upowerd 819 ? Sl 0:00 /usr/libexec/imsettings-daemon 916 ? Sl 0:00 /usr/libexec/udisks-daemon 917 ? S 0:00 \_ udisks-daemon: not polling any devices after: # ps afx 294 ? Ss 0:00 /bin/dbus-daemon --system --nofork 426 ? Sl 0:00 \_ /usr/libexec/polkit-1/polkitd 449 ? S 0:00 \_ /usr/sbin/modem-manager 635 ? Sl 0:00 \_ /usr/libexec/colord 705 ? Sl 0:00 \_ /usr/libexec/upowerd 959 ? Sl 0:00 \_ /usr/libexec/udisks-daemon 960 ? S 0:00 | \_ udisks-daemon: not polling any devices 977 ? Sl 0:00 \_ /usr/libexec/packagekitd This prctl is orthogonal to PID namespaces. PID namespaces are isolated from each other, while a service management process usually requires the services to live in the same namespace, to be able to talk to each other. Users of this will be the systemd per-user instance, which provides init-like functionality for the user's login session and D-Bus, which activates bus services on-demand. Both need init-like capabilities to be able to properly keep track of the services they start. Many thanks to Oleg for several rounds of review and insights. [akpm@linux-foundation.org: fix comment layout and spelling] [akpm@linux-foundation.org: add lengthy code comment from Oleg] Reviewed-by: Oleg Nesterov Signed-off-by: Lennart Poettering Signed-off-by: Kay Sievers Acked-by: Valdis Kletnieks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 3 +++ include/linux/sched.h | 12 ++++++++++++ kernel/exit.c | 33 ++++++++++++++++++++++++++++----- kernel/fork.c | 3 +++ kernel/sys.c | 8 ++++++++ 5 files changed, 54 insertions(+), 5 deletions(-) (limited to 'include/linux/prctl.h') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index a0413ac3abe8..e0cfec2490aa 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -121,4 +121,7 @@ #define PR_SET_PTRACER 0x59616d61 # define PR_SET_PTRACER_ANY ((unsigned long)-1) +#define PR_SET_CHILD_SUBREAPER 36 +#define PR_GET_CHILD_SUBREAPER 37 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 0c147a4260a5..0c3854b0d4b1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -553,6 +553,18 @@ struct signal_struct { int group_stop_count; unsigned int flags; /* see SIGNAL_* flags below */ + /* + * PR_SET_CHILD_SUBREAPER marks a process, like a service + * manager, to re-parent orphan (double-forking) child processes + * to this process instead of 'init'. The service manager is + * able to receive SIGCHLD signals and is able to investigate + * the process until it calls wait(). All children of this + * process will inherit a flag if they should look for a + * child_subreaper process at exit. + */ + unsigned int is_child_subreaper:1; + unsigned int has_child_subreaper:1; + /* POSIX.1b Interval Timers */ struct list_head posix_timers; diff --git a/kernel/exit.c b/kernel/exit.c index 16b07bfac224..456329fd4ea3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -687,11 +687,11 @@ static void exit_mm(struct task_struct * tsk) } /* - * When we die, we re-parent all our children. - * Try to give them to another thread in our thread - * group, and if no such member exists, give it to - * the child reaper process (ie "init") in our pid - * space. + * When we die, we re-parent all our children, and try to: + * 1. give them to another thread in our thread group, if such a member exists + * 2. give it to the first ancestor process which prctl'd itself as a + * child_subreaper for its children (like a service manager) + * 3. give it to the init process (PID 1) in our pid namespace */ static struct task_struct *find_new_reaper(struct task_struct *father) __releases(&tasklist_lock) @@ -722,6 +722,29 @@ static struct task_struct *find_new_reaper(struct task_struct *father) * forget_original_parent() must move them somewhere. */ pid_ns->child_reaper = init_pid_ns.child_reaper; + } else if (father->signal->has_child_subreaper) { + struct task_struct *reaper; + + /* + * Find the first ancestor marked as child_subreaper. + * Note that the code below checks same_thread_group(reaper, + * pid_ns->child_reaper). This is what we need to DTRT in a + * PID namespace. However we still need the check above, see + * http://marc.info/?l=linux-kernel&m=131385460420380 + */ + for (reaper = father->real_parent; + reaper != &init_task; + reaper = reaper->real_parent) { + if (same_thread_group(reaper, pid_ns->child_reaper)) + break; + if (!reaper->signal->is_child_subreaper) + continue; + thread = reaper; + do { + if (!(thread->flags & PF_EXITING)) + return reaper; + } while_each_thread(reaper, thread); + } } return pid_ns->child_reaper; diff --git a/kernel/fork.c b/kernel/fork.c index 37674ec55cde..b9372a0bff18 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1051,6 +1051,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->oom_score_adj = current->signal->oom_score_adj; sig->oom_score_adj_min = current->signal->oom_score_adj_min; + sig->has_child_subreaper = current->signal->has_child_subreaper || + current->signal->is_child_subreaper; + mutex_init(&sig->cred_guard_mutex); return 0; diff --git a/kernel/sys.c b/kernel/sys.c index 888d227fd195..9eb7fcab8df6 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1962,6 +1962,14 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_MM: error = prctl_set_mm(arg2, arg3, arg4, arg5); break; + case PR_SET_CHILD_SUBREAPER: + me->signal->is_child_subreaper = !!arg2; + error = 0; + break; + case PR_GET_CHILD_SUBREAPER: + error = put_user(me->signal->is_child_subreaper, + (int __user *) arg2); + break; default: error = -EINVAL; break; -- cgit v1.2.3