Merge digitalimplant.org:/home/mochel/src/linux-2.6-virgin

into digitalimplant.org:/home/mochel/src/linux-2.6-power
author: Patrick Mochel <mochel@digitalimplant.org> 2004-09-02 15:40:36 -0700
committer: Patrick Mochel <mochel@digitalimplant.org> 2004-09-02 15:40:36 -0700
commit: 80e1f7fa6e60e7a32409f121775ec510aad60df2 (patch)
tree: e47bbaebb1e649d3d40f386ab675504b647c5a7a /kernel
parent: 30611d8282d0cd850132b5db013fedf24d6e07b9 (diff)
parent: 3411df4ee64e032426f09392526ca74179aceee5 (diff)
10 files changed, 690 insertions, 182 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 47f98594e9e5..a032595fd58c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_IKCONFIG_PROC) += configs.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_AUDIT) += audit.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+obj-$(CONFIG_KPROBES) += kprobes.o
 
 ifneq ($(CONFIG_IA64),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/capability.c b/kernel/capability.c
index 1c5c35718450..7e864e2ccf6a 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -89,14 +89,12 @@ static inline void cap_set_pg(int pgrp, kernel_cap_t *effective,
 			      kernel_cap_t *permitted)
 {
 	task_t *g, *target;
-	struct list_head *l;
-	struct pid *pid;
 
-	for_each_task_pid(pgrp, PIDTYPE_PGID, g, l, pid) {
+	do_each_task_pid(pgrp, PIDTYPE_PGID, g) {
 		target = g;
 		while_each_thread(g, target)
 			security_capset_set(target, effective, inheritable, permitted);
-	}
+	} while_each_task_pid(pgrp, PIDTYPE_PGID, g);
 }
 
 /*
diff --git a/kernel/compat.c b/kernel/compat.c
index 481ac0d4bb98..8bfae57e7d66 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -160,10 +160,39 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
 	 */
 	if (tbuf) {
 		struct compat_tms tmp;
-		tmp.tms_utime = compat_jiffies_to_clock_t(current->utime);
-		tmp.tms_stime = compat_jiffies_to_clock_t(current->stime);
-		tmp.tms_cutime = compat_jiffies_to_clock_t(current->cutime);
-		tmp.tms_cstime = compat_jiffies_to_clock_t(current->cstime);
+		struct task_struct *tsk = current;
+		struct task_struct *t;
+		unsigned long utime, stime, cutime, cstime;
+
+		read_lock(&tasklist_lock);
+		utime = tsk->signal->utime;
+		stime = tsk->signal->stime;
+		t = tsk;
+		do {
+			utime += t->utime;
+			stime += t->stime;
+			t = next_thread(t);
+		} while (t != tsk);
+
+		/*
+		 * While we have tasklist_lock read-locked, no dying thread
+		 * can be updating current->signal->[us]time.  Instead,
+		 * we got their counts included in the live thread loop.
+		 * However, another thread can come in right now and
+		 * do a wait call that updates current->signal->c[us]time.
+		 * To make sure we always see that pair updated atomically,
+		 * we take the siglock around fetching them.
+		 */
+		spin_lock_irq(&tsk->sighand->siglock);
+		cutime = tsk->signal->cutime;
+		cstime = tsk->signal->cstime;
+		spin_unlock_irq(&tsk->sighand->siglock);
+		read_unlock(&tasklist_lock);
+
+		tmp.tms_utime = compat_jiffies_to_clock_t(utime);
+		tmp.tms_stime = compat_jiffies_to_clock_t(stime);
+		tmp.tms_cutime = compat_jiffies_to_clock_t(cutime);
+		tmp.tms_cstime = compat_jiffies_to_clock_t(cstime);
 		if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
 			return -EFAULT;
 	}
@@ -310,7 +339,7 @@ asmlinkage long compat_sys_getrlimit (unsigned int resource,
 	return ret;
 }
 
-static long put_compat_rusage(struct compat_rusage __user *ru, struct rusage *r)
+int put_compat_rusage(const struct rusage *r, struct compat_rusage __user *ru)
 {
 	if (!access_ok(VERIFY_WRITE, ru, sizeof(*ru)) ||
 	    __put_user(r->ru_utime.tv_sec, &ru->ru_utime.tv_sec) ||
@@ -348,7 +377,7 @@ asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru)
 	if (ret)
 		return ret;
 
-	if (put_compat_rusage(ru, &r))
+	if (put_compat_rusage(&r, ru))
 		return -EFAULT;
 
 	return 0;
@@ -374,7 +403,7 @@ compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options,
 		set_fs (old_fs);
 
 		if (ret > 0) {
-			if (put_compat_rusage(ru, &r)) 
+			if (put_compat_rusage(&r, ru))
 				return -EFAULT;
 			if (stat_addr && put_user(status, stat_addr))
 				return -EFAULT;
diff --git a/kernel/exit.c b/kernel/exit.c
index c6ceaaee8a2e..731b9ccd236b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -89,12 +89,6 @@ repeat:
 		zap_leader = (leader->exit_signal == -1);
 	}
 
-	p->parent->cutime += p->utime + p->cutime;
-	p->parent->cstime += p->stime + p->cstime;
-	p->parent->cmin_flt += p->min_flt + p->cmin_flt;
-	p->parent->cmaj_flt += p->maj_flt + p->cmaj_flt;
-	p->parent->cnvcsw += p->nvcsw + p->cnvcsw;
-	p->parent->cnivcsw += p->nivcsw + p->cnivcsw;
 	sched_exit(p);
 	write_unlock_irq(&tasklist_lock);
 	spin_unlock(&p->proc_lock);
@@ -130,16 +124,15 @@ void unhash_process(struct task_struct *p)
 int session_of_pgrp(int pgrp)
 {
 	struct task_struct *p;
-	struct list_head *l;
-	struct pid *pid;
 	int sid = -1;
 
 	read_lock(&tasklist_lock);
-	for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid)
+	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
 		if (p->signal->session > 0) {
 			sid = p->signal->session;
 			goto out;
 		}
+	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
 	p = find_task_by_pid(pgrp);
 	if (p)
 		sid = p->signal->session;
@@ -160,11 +153,9 @@ out:
 static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
 {
 	struct task_struct *p;
-	struct list_head *l;
-	struct pid *pid;
 	int ret = 1;
 
-	for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
+	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
 		if (p == ignored_task
 				|| p->state >= TASK_ZOMBIE 
 				|| p->real_parent->pid == 1)
@@ -174,7 +165,7 @@ static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
 			ret = 0;
 			break;
 		}
-	}
+	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
 	return ret;	/* (sighing) "Often!" */
 }
 
@@ -193,10 +184,8 @@ static inline int has_stopped_jobs(int pgrp)
 {
 	int retval = 0;
 	struct task_struct *p;
-	struct list_head *l;
-	struct pid *pid;
 
-	for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
+	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
 		if (p->state != TASK_STOPPED)
 			continue;
 
@@ -212,7 +201,7 @@ static inline int has_stopped_jobs(int pgrp)
 
 		retval = 1;
 		break;
-	}
+	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
 	return retval;
 }
 
@@ -855,9 +844,6 @@ asmlinkage long sys_exit(int error_code)
 
 task_t fastcall *next_thread(const task_t *p)
 {
-	const struct pid_link *link = p->pids + PIDTYPE_TGID;
-	const struct list_head *tmp, *head = &link->pidptr->task_list;
-
 #ifdef CONFIG_SMP
 	if (!p->sighand)
 		BUG();
@@ -865,11 +851,7 @@ task_t fastcall *next_thread(const task_t *p)
 				!rwlock_is_locked(&tasklist_lock))
 		BUG();
 #endif
-	tmp = link->pid_chain.next;
-	if (tmp == head)
-		tmp = head->next;
-
-	return pid_task(tmp, PIDTYPE_TGID);
+	return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
 }
 
 EXPORT_SYMBOL(next_thread);
@@ -957,16 +939,64 @@ static int eligible_child(pid_t pid, int options, task_t *p)
 	return 1;
 }
 
+static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid,
+			       int why, int status,
+			       struct siginfo __user *infop)
+{
+	int retval = getrusage(p, RUSAGE_BOTH, &infop->si_rusage);
+	put_task_struct(p);
+	if (!retval)
+		retval = put_user(SIGCHLD, &infop->si_signo);
+	if (!retval)
+		retval = put_user(0, &infop->si_errno);
+	if (!retval)
+		retval = put_user((short)why, &infop->si_code);
+	if (!retval)
+		retval = put_user(pid, &infop->si_pid);
+	if (!retval)
+		retval = put_user(uid, &infop->si_uid);
+	if (!retval)
+		retval = put_user(status, &infop->si_status);
+	if (!retval)
+		retval = pid;
+	return retval;
+}
+
 /*
  * Handle sys_wait4 work for one task in state TASK_ZOMBIE.  We hold
  * read_lock(&tasklist_lock) on entry.  If we return zero, we still hold
  * the lock and this task is uninteresting.  If we return nonzero, we have
  * released the lock and the system call should return.
  */
-static int wait_task_zombie(task_t *p, unsigned int __user *stat_addr, struct rusage __user *ru)
+static int wait_task_zombie(task_t *p, int noreap,
+			    struct siginfo __user *infop,
+			    int __user *stat_addr, struct rusage __user *ru)
 {
 	unsigned long state;
 	int retval;
+	int status;
+
+	if (unlikely(noreap)) {
+		pid_t pid = p->pid;
+		uid_t uid = p->uid;
+		int exit_code = p->exit_code;
+		int why, status;
+
+		if (unlikely(p->state != TASK_ZOMBIE))
+			return 0;
+		if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
+			return 0;
+		get_task_struct(p);
+		read_unlock(&tasklist_lock);
+		if ((exit_code & 0x7f) == 0) {
+			why = CLD_EXITED;
+			status = exit_code >> 8;
+		} else {
+			why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
+			status = exit_code & 0x7f;
+		}
+		return wait_noreap_copyout(p, pid, uid, why, status, infop);
+	}
 
 	/*
 	 * Try to move the task's state to DEAD
@@ -977,12 +1007,45 @@ static int wait_task_zombie(task_t *p, unsigned int __user *stat_addr, struct ru
 		BUG_ON(state != TASK_DEAD);
 		return 0;
 	}
-	if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
+	if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) {
 		/*
 		 * This can only happen in a race with a ptraced thread
 		 * dying on another processor.
 		 */
 		return 0;
+	}
+
+	if (likely(p->real_parent == p->parent) && likely(p->signal)) {
+		/*
+		 * The resource counters for the group leader are in its
+		 * own task_struct.  Those for dead threads in the group
+		 * are in its signal_struct, as are those for the child
+		 * processes it has previously reaped.  All these
+		 * accumulate in the parent's signal_struct c* fields.
+		 *
+		 * We don't bother to take a lock here to protect these
+		 * p->signal fields, because they are only touched by
+		 * __exit_signal, which runs with tasklist_lock
+		 * write-locked anyway, and so is excluded here.  We do
+		 * need to protect the access to p->parent->signal fields,
+		 * as other threads in the parent group can be right
+		 * here reaping other children at the same time.
+		 */
+		spin_lock_irq(&p->parent->sighand->siglock);
+		p->parent->signal->cutime +=
+			p->utime + p->signal->utime + p->signal->cutime;
+		p->parent->signal->cstime +=
+			p->stime + p->signal->stime + p->signal->cstime;
+		p->parent->signal->cmin_flt +=
+			p->min_flt + p->signal->min_flt + p->signal->cmin_flt;
+		p->parent->signal->cmaj_flt +=
+			p->maj_flt + p->signal->maj_flt + p->signal->cmaj_flt;
+		p->parent->signal->cnvcsw +=
+			p->nvcsw + p->signal->nvcsw + p->signal->cnvcsw;
+		p->parent->signal->cnivcsw +=
+			p->nivcsw + p->signal->nivcsw + p->signal->cnivcsw;
+		spin_unlock_irq(&p->parent->sighand->siglock);
+	}
 
 	/*
 	 * Now we are sure this task is interesting, and no other
@@ -991,12 +1054,32 @@ static int wait_task_zombie(task_t *p, unsigned int __user *stat_addr, struct ru
 	read_unlock(&tasklist_lock);
 
 	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
-	if (!retval && stat_addr) {
-		if (p->signal->group_exit)
-			retval = put_user(p->signal->group_exit_code, stat_addr);
-		else
-			retval = put_user(p->exit_code, stat_addr);
+	status = p->signal->group_exit
+		? p->signal->group_exit_code : p->exit_code;
+	if (!retval && stat_addr)
+		retval = put_user(status, stat_addr);
+	if (!retval && infop)
+		retval = put_user(SIGCHLD, &infop->si_signo);
+	if (!retval && infop)
+		retval = put_user(0, &infop->si_errno);
+	if (!retval && infop) {
+		int why;
+
+		if ((status & 0x7f) == 0) {
+			why = CLD_EXITED;
+			status >>= 8;
+		} else {
+			why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
+			status &= 0x7f;
+		}
+		retval = put_user((short)why, &infop->si_code);
+		if (!retval)
+			retval = put_user(status, &infop->si_status);
 	}
+	if (!retval && infop)
+		retval = put_user(p->pid, &infop->si_pid);
+	if (!retval && infop)
+		retval = put_user(p->uid, &infop->si_uid);
 	if (retval) {
 		p->state = TASK_ZOMBIE;
 		return retval;
@@ -1009,8 +1092,9 @@ static int wait_task_zombie(task_t *p, unsigned int __user *stat_addr, struct ru
 			__ptrace_unlink(p);
 			p->state = TASK_ZOMBIE;
 			/*
-			 * If this is not a detached task, notify the parent.  If it's
-			 * still not detached after that, don't release it now.
+			 * If this is not a detached task, notify the parent.
+			 * If it's still not detached after that, don't release
+			 * it now.
 			 */
 			if (p->exit_signal != -1) {
 				do_notify_parent(p, p->exit_signal);
@@ -1032,9 +1116,9 @@ static int wait_task_zombie(task_t *p, unsigned int __user *stat_addr, struct ru
  * the lock and this task is uninteresting.  If we return nonzero, we have
  * released the lock and the system call should return.
  */
-static int wait_task_stopped(task_t *p, int delayed_group_leader,
-			     unsigned int __user *stat_addr,
-			     struct rusage __user *ru)
+static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap,
+			     struct siginfo __user *infop,
+			     int __user *stat_addr, struct rusage __user *ru)
 {
 	int retval, exit_code;
 
@@ -1057,6 +1141,21 @@ static int wait_task_stopped(task_t *p, int delayed_group_leader,
 	 */
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
+
+	if (unlikely(noreap)) {
+		pid_t pid = p->pid;
+		uid_t uid = p->uid;
+		int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
+
+		exit_code = p->exit_code;
+		if (unlikely(!exit_code) ||
+		    unlikely(p->state > TASK_STOPPED))
+			goto bail_ref;
+		return wait_noreap_copyout(p, pid, uid,
+					   why, (exit_code << 8) | 0x7f,
+					   infop);
+	}
+
 	write_lock_irq(&tasklist_lock);
 
 	/*
@@ -1082,6 +1181,7 @@ static int wait_task_stopped(task_t *p, int delayed_group_leader,
 		 * resumed, or it resumed and then died.
 		 */
 		write_unlock_irq(&tasklist_lock);
+bail_ref:
 		put_task_struct(p);
 		read_lock(&tasklist_lock);
 		return 0;
@@ -1096,6 +1196,20 @@ static int wait_task_stopped(task_t *p, int delayed_group_leader,
 	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 	if (!retval && stat_addr)
 		retval = put_user((exit_code << 8) | 0x7f, stat_addr);
+	if (!retval && infop)
+		retval = put_user(SIGCHLD, &infop->si_signo);
+	if (!retval && infop)
+		retval = put_user(0, &infop->si_errno);
+	if (!retval && infop)
+		retval = put_user((short)((p->ptrace & PT_PTRACED)
+					  ? CLD_TRAPPED : CLD_STOPPED),
+				  &infop->si_code);
+	if (!retval && infop)
+		retval = put_user(exit_code, &infop->si_status);
+	if (!retval && infop)
+		retval = put_user(p->pid, &infop->si_pid);
+	if (!retval && infop)
+		retval = put_user(p->uid, &infop->si_uid);
 	if (!retval)
 		retval = p->pid;
 	put_task_struct(p);
@@ -1104,15 +1218,13 @@ static int wait_task_stopped(task_t *p, int delayed_group_leader,
 	return retval;
 }
 
-asmlinkage long sys_wait4(pid_t pid,unsigned int __user *stat_addr, int options, struct rusage __user *ru)
+static long do_wait(pid_t pid, int options, struct siginfo __user *infop,
+		    int __user *stat_addr, struct rusage __user *ru)
 {
 	DECLARE_WAITQUEUE(wait, current);
 	struct task_struct *tsk;
 	int flag, retval;
 
-	if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
-		return -EINVAL;
-
 	add_wait_queue(&current->wait_chldexit,&wait);
 repeat:
 	flag = 0;
@@ -1138,25 +1250,60 @@ repeat:
 				    !(p->ptrace & PT_PTRACED))
 					continue;
 				retval = wait_task_stopped(p, ret == 2,
+							   (options & WNOWAIT),
+							   infop,
 							   stat_addr, ru);
 				if (retval != 0) /* He released the lock.  */
-					goto end_wait4;
+					goto end;
 				break;
 			case TASK_ZOMBIE:
 				/*
 				 * Eligible but we cannot release it yet:
 				 */
 				if (ret == 2)
+					goto check_continued;
+				if (!likely(options & WEXITED))
 					continue;
-				retval = wait_task_zombie(p, stat_addr, ru);
+				retval = wait_task_zombie(
+					p, (options & WNOWAIT),
+					infop, stat_addr, ru);
 				if (retval != 0) /* He released the lock.  */
-					goto end_wait4;
+					goto end;
+				break;
+			case TASK_DEAD:
+				continue;
+			default:
+check_continued:
+				if (!unlikely(options & WCONTINUED))
+					continue;
+				if (unlikely(!p->signal))
+					continue;
+				spin_lock_irq(&p->sighand->siglock);
+				if (p->signal->stop_state < 0) {
+					pid_t pid;
+					uid_t uid;
+
+					if (!(options & WNOWAIT))
+						p->signal->stop_state = 0;
+					spin_unlock_irq(&p->sighand->siglock);
+					pid = p->pid;
+					uid = p->uid;
+					get_task_struct(p);
+					read_unlock(&tasklist_lock);
+					retval = wait_noreap_copyout(p, pid,
+							uid, CLD_CONTINUED,
+							SIGCONT, infop);
+					BUG_ON(retval == 0);
+					goto end;
+				}
+				spin_unlock_irq(&p->sighand->siglock);
 				break;
 			}
 		}
 		if (!flag) {
-			list_for_each (_p,&tsk->ptrace_children) {
-				p = list_entry(_p,struct task_struct,ptrace_list);
+			list_for_each(_p, &tsk->ptrace_children) {
+				p = list_entry(_p, struct task_struct,
+						ptrace_list);
 				if (!eligible_child(pid, options, p))
 					continue;
 				flag = 1;
@@ -1169,24 +1316,84 @@ repeat:
 		if (tsk->signal != current->signal)
 			BUG();
 	} while (tsk != current);
+
 	read_unlock(&tasklist_lock);
 	if (flag) {
 		retval = 0;
 		if (options & WNOHANG)
-			goto end_wait4;
+			goto end;
 		retval = -ERESTARTSYS;
 		if (signal_pending(current))
-			goto end_wait4;
+			goto end;
 		schedule();
 		goto repeat;
 	}
 	retval = -ECHILD;
-end_wait4:
+end:
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&current->wait_chldexit,&wait);
+	if (infop) {
+		if (retval > 0)
+		retval = 0;
+		else {
+			/*
+			 * For a WNOHANG return, clear out all the fields
+			 * we would set so the user can easily tell the
+			 * difference.
+			 */
+			if (!retval)
+				retval = put_user(0, &infop->si_signo);
+			if (!retval)
+				retval = put_user(0, &infop->si_errno);
+			if (!retval)
+				retval = put_user(0, &infop->si_code);
+			if (!retval)
+				retval = put_user(0, &infop->si_pid);
+			if (!retval)
+				retval = put_user(0, &infop->si_uid);
+			if (!retval)
+				retval = put_user(0, &infop->si_status);
+		}
+	}
 	return retval;
 }
 
+asmlinkage long sys_waitid(int which, pid_t pid,
+			   struct siginfo __user *infop, int options)
+{
+	if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED))
+		return -EINVAL;
+	if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
+		return -EINVAL;
+
+	switch (which) {
+	case P_ALL:
+		pid = -1;
+		break;
+	case P_PID:
+		if (pid <= 0)
+			return -EINVAL;
+		break;
+	case P_PGID:
+		if (pid <= 0)
+			return -EINVAL;
+		pid = -pid;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return do_wait(pid, options, infop, NULL, &infop->si_rusage);
+}
+
+asmlinkage long sys_wait4(pid_t pid, unsigned int __user *stat_addr,
+				int options, struct rusage __user *ru)
+{
+	if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
+		return -EINVAL;
+	return do_wait(pid, options | WEXITED, NULL, stat_addr, ru);
+}
+
 #ifdef __ARCH_WANT_SYS_WAITPID
 
 /*
diff --git a/kernel/fork.c b/kernel/fork.c
index fed7a0772660..78db8811c834 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -559,8 +559,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
 	int retval;
 
 	tsk->min_flt = tsk->maj_flt = 0;
-	tsk->cmin_flt = tsk->cmaj_flt = 0;
-	tsk->nvcsw = tsk->nivcsw = tsk->cnvcsw = tsk->cnivcsw = 0;
+	tsk->nvcsw = tsk->nivcsw = 0;
 
 	tsk->mm = NULL;
 	tsk->active_mm = NULL;
@@ -867,6 +866,10 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 	sig->leader = 0;	/* session leadership doesn't inherit */
 	sig->tty_old_pgrp = 0;
 
+	sig->utime = sig->stime = sig->cutime = sig->cstime = 0;
+	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
+	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
+
 	return 0;
 }
 
@@ -986,7 +989,6 @@ static task_t *copy_process(unsigned long clone_flags,
 	p->real_timer.data = (unsigned long) p;
 
 	p->utime = p->stime = 0;
-	p->cutime = p->cstime = 0;
 	p->lock_depth = -1;		/* -1 = no lock */
 	p->start_time = get_jiffies_64();
 	p->security = NULL;
@@ -1118,18 +1120,17 @@ static task_t *copy_process(unsigned long clone_flags,
 	}
 
 	SET_LINKS(p);
-	if (p->ptrace & PT_PTRACED)
+	if (unlikely(p->ptrace & PT_PTRACED))
 		__ptrace_link(p, current->parent);
 
 	attach_pid(p, PIDTYPE_PID, p->pid);
+	attach_pid(p, PIDTYPE_TGID, p->tgid);
 	if (thread_group_leader(p)) {
-		attach_pid(p, PIDTYPE_TGID, p->tgid);
 		attach_pid(p, PIDTYPE_PGID, process_group(p));
 		attach_pid(p, PIDTYPE_SID, p->signal->session);
 		if (p->pid)
 			__get_cpu_var(process_counts)++;
-	} else
-		link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid);
+	}
 
 	nr_threads++;
 	write_unlock_irq(&tasklist_lock);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
new file mode 100644
index 000000000000..01436a31c690
--- /dev/null
+++ b/kernel/kprobes.c
@@ -0,0 +1,146 @@
+/*
+ *  Kernel Probes (KProbes)
+ *  kernel/kprobes.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct	Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ *		Probes initial implementation (includes suggestions from
+ *		Rusty Russell).
+ * 2004-Aug	Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
+ *		hlists and exceptions notifier as suggested by Andi Kleen.
+ * 2004-July	Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
+ *		interface to access function arguments.
+ */
+#include <linux/kprobes.h>
+#include <linux/spinlock.h>
+#include <linux/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <asm/cacheflush.h>
+#include <asm/errno.h>
+#include <asm/kdebug.h>
+
+#define KPROBE_HASH_BITS 6
+#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
+
+static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
+
+unsigned int kprobe_cpu = NR_CPUS;
+static spinlock_t kprobe_lock = SPIN_LOCK_UNLOCKED;
+
+/* Locks kprobe: irqs must be disabled */
+void lock_kprobes(void)
+{
+	spin_lock(&kprobe_lock);
+	kprobe_cpu = smp_processor_id();
+}
+
+void unlock_kprobes(void)
+{
+	kprobe_cpu = NR_CPUS;
+	spin_unlock(&kprobe_lock);
+}
+
+/* You have to be holding the kprobe_lock */
+struct kprobe *get_kprobe(void *addr)
+{
+	struct hlist_head *head;
+	struct hlist_node *node;
+
+	head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
+	hlist_for_each(node, head) {
+		struct kprobe *p = hlist_entry(node, struct kprobe, hlist);
+		if (p->addr == addr)
+			return p;
+	}
+	return NULL;
+}
+
+int register_kprobe(struct kprobe *p)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kprobe_lock, flags);
+	INIT_HLIST_NODE(&p->hlist);
+	if (get_kprobe(p->addr)) {
+		ret = -EEXIST;
+		goto out;
+	}
+	hlist_add_head(&p->hlist,
+		       &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
+
+	arch_prepare_kprobe(p);
+	p->opcode = *p->addr;
+	*p->addr = BREAKPOINT_INSTRUCTION;
+	flush_icache_range((unsigned long) p->addr,
+			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+      out:
+	spin_unlock_irqrestore(&kprobe_lock, flags);
+	return ret;
+}
+
+void unregister_kprobe(struct kprobe *p)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&kprobe_lock, flags);
+	*p->addr = p->opcode;
+	hlist_del(&p->hlist);
+	flush_icache_range((unsigned long) p->addr,
+			   (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+	spin_unlock_irqrestore(&kprobe_lock, flags);
+}
+
+static struct notifier_block kprobe_exceptions_nb = {
+	.notifier_call = kprobe_exceptions_notify,
+};
+
+int register_jprobe(struct jprobe *jp)
+{
+	/* Todo: Verify probepoint is a function entry point */
+	jp->kp.pre_handler = setjmp_pre_handler;
+	jp->kp.break_handler = longjmp_break_handler;
+
+	return register_kprobe(&jp->kp);
+}
+
+void unregister_jprobe(struct jprobe *jp)
+{
+	unregister_kprobe(&jp->kp);
+}
+
+static int __init init_kprobes(void)
+{
+	int i, err = 0;
+
+	/* FIXME allocate the probe table, currently defined statically */
+	/* initialize all list heads */
+	for (i = 0; i < KPROBE_TABLE_SIZE; i++)
+		INIT_HLIST_HEAD(&kprobe_table[i]);
+
+	err = register_die_notifier(&kprobe_exceptions_nb);
+	return err;
+}
+
+__initcall(init_kprobes);
+
+EXPORT_SYMBOL_GPL(register_kprobe);
+EXPORT_SYMBOL_GPL(unregister_kprobe);
+EXPORT_SYMBOL_GPL(register_jprobe);
+EXPORT_SYMBOL_GPL(unregister_jprobe);
+EXPORT_SYMBOL_GPL(jprobe_return);
diff --git a/kernel/module.c b/kernel/module.c
index aa91a98ffb0b..8c17a4ce0707 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1538,9 +1538,6 @@ static struct module *load_module(void __user *umod,
 	secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 	sechdrs[0].sh_addr = 0;
 
-	/* And these should exist, but gcc whinges if we don't init them */
-	symindex = strindex = 0;
-
 	for (i = 1; i < hdr->e_shnum; i++) {
 		if (sechdrs[i].sh_type != SHT_NOBITS
 		    && len < sechdrs[i].sh_offset + sechdrs[i].sh_size)
@@ -1572,6 +1569,13 @@ static struct module *load_module(void __user *umod,
 	}
 	mod = (void *)sechdrs[modindex].sh_addr;
 
+	if (symindex == 0) {
+		printk(KERN_WARNING "%s: module has no symbols (stripped?)\n",
+		       mod->name);
+		err = -ENOEXEC;
+		goto free_hdr;
+	}
+
 	/* Optional sections */
 	exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab");
 	gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl");
diff --git a/kernel/pid.c b/kernel/pid.c
index 6ed44f56ca45..57527f0cda5e 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -27,7 +27,7 @@
 #include <linux/hash.h>
 
 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
-static struct list_head *pid_hash[PIDTYPE_MAX];
+static struct hlist_head *pid_hash[PIDTYPE_MAX];
 static int pidhash_shift;
 
 int pid_max = PID_MAX_DEFAULT;
@@ -146,73 +146,66 @@ failure:
 	return -1;
 }
 
-fastcall struct pid *find_pid(enum pid_type type, int nr)
+struct pid * fastcall find_pid(enum pid_type type, int nr)
 {
-	struct list_head *elem, *bucket = &pid_hash[type][pid_hashfn(nr)];
+	struct hlist_node *elem;
 	struct pid *pid;
 
-	__list_for_each(elem, bucket) {
-		pid = list_entry(elem, struct pid, hash_chain);
+	hlist_for_each_entry(pid, elem,
+			&pid_hash[type][pid_hashfn(nr)], pid_chain) {
 		if (pid->nr == nr)
 			return pid;
 	}
 	return NULL;
 }
 
-void fastcall link_pid(task_t *task, struct pid_link *link, struct pid *pid)
-{
-	atomic_inc(&pid->count);
-	list_add_tail(&link->pid_chain, &pid->task_list);
-	link->pidptr = pid;
-}
-
 int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
 {
-	struct pid *pid = find_pid(type, nr);
-
-	if (pid)
-		atomic_inc(&pid->count);
-	else {
-		pid = &task->pids[type].pid;
-		pid->nr = nr;
-		atomic_set(&pid->count, 1);
-		INIT_LIST_HEAD(&pid->task_list);
-		pid->task = task;
-		get_task_struct(task);
-		list_add(&pid->hash_chain, &pid_hash[type][pid_hashfn(nr)]);
+	struct pid *pid, *task_pid;
+
+	task_pid = &task->pids[type];
+	pid = find_pid(type, nr);
+	if (pid == NULL) {
+		hlist_add_head(&task_pid->pid_chain,
+				&pid_hash[type][pid_hashfn(nr)]);
+		INIT_LIST_HEAD(&task_pid->pid_list);
+	} else {
+		INIT_HLIST_NODE(&task_pid->pid_chain);
+		list_add_tail(&task_pid->pid_list, &pid->pid_list);
 	}
-	list_add_tail(&task->pids[type].pid_chain, &pid->task_list);
-	task->pids[type].pidptr = pid;
+	task_pid->nr = nr;
 
 	return 0;
 }
 
 static inline int __detach_pid(task_t *task, enum pid_type type)
 {
-	struct pid_link *link = task->pids + type;
-	struct pid *pid = link->pidptr;
+	struct pid *pid, *pid_next;
 	int nr;
 
-	list_del(&link->pid_chain);
-	if (!atomic_dec_and_test(&pid->count))
-		return 0;
-
+	pid = &task->pids[type];
+	if (!hlist_unhashed(&pid->pid_chain)) {
+		hlist_del(&pid->pid_chain);
+		if (!list_empty(&pid->pid_list)) {
+			pid_next = list_entry(pid->pid_list.next,
+						struct pid, pid_list);
+			/* insert next pid from pid_list to hash */
+			hlist_add_head(&pid_next->pid_chain,
+				&pid_hash[type][pid_hashfn(pid_next->nr)]);
+		}
+	}
+	list_del(&pid->pid_list);
 	nr = pid->nr;
-	list_del(&pid->hash_chain);
-	put_task_struct(pid->task);
+	pid->nr = 0;
 
 	return nr;
 }
 
-static void _detach_pid(task_t *task, enum pid_type type)
-{
-	__detach_pid(task, type);
-}
-
 void fastcall detach_pid(task_t *task, enum pid_type type)
 {
-	int nr = __detach_pid(task, type);
+	int nr;
 
+	nr = __detach_pid(task, type);
 	if (!nr)
 		return;
 
@@ -222,16 +215,18 @@ void fastcall detach_pid(task_t *task, enum pid_type type)
 	free_pidmap(nr);
 }
 
-task_t *find_task_by_pid(int nr)
+task_t *find_task_by_pid_type(int type, int nr)
 {
-	struct pid *pid = find_pid(PIDTYPE_PID, nr);
+	struct pid *pid;
 
+	pid = find_pid(type, nr);
 	if (!pid)
 		return NULL;
-	return pid_task(pid->task_list.next, PIDTYPE_PID);
+
+	return pid_task(&pid->pid_list, type);
 }
 
-EXPORT_SYMBOL(find_task_by_pid);
+EXPORT_SYMBOL(find_task_by_pid_type);
 
 /*
  * This function switches the PIDs if a non-leader thread calls
@@ -240,13 +235,13 @@ EXPORT_SYMBOL(find_task_by_pid);
  */
 void switch_exec_pids(task_t *leader, task_t *thread)
 {
-	_detach_pid(leader, PIDTYPE_PID);
-	_detach_pid(leader, PIDTYPE_TGID);
-	_detach_pid(leader, PIDTYPE_PGID);
-	_detach_pid(leader, PIDTYPE_SID);
+	__detach_pid(leader, PIDTYPE_PID);
+	__detach_pid(leader, PIDTYPE_TGID);
+	__detach_pid(leader, PIDTYPE_PGID);
+	__detach_pid(leader, PIDTYPE_SID);
 
-	_detach_pid(thread, PIDTYPE_PID);
-	_detach_pid(thread, PIDTYPE_TGID);
+	__detach_pid(thread, PIDTYPE_PID);
+	__detach_pid(thread, PIDTYPE_TGID);
 
 	leader->pid = leader->tgid = thread->pid;
 	thread->pid = thread->tgid;
@@ -271,15 +266,15 @@ void switch_exec_pids(task_t *leader, task_t *thread)
 void __init pidhash_init(void)
 {
 	int i, j, pidhash_size;
-	unsigned long megabytes = max_pfn >> (20 - PAGE_SHIFT);
+	unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
 
 	pidhash_shift = max(4, fls(megabytes * 4));
 	pidhash_shift = min(12, pidhash_shift);
 	pidhash_size = 1 << pidhash_shift;
 
-	printk("PID hash table entries: %d (order %d: %Zd bytes)\n",
+	printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
 		pidhash_size, pidhash_shift,
-		pidhash_size * sizeof(struct list_head));
+		PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head));
 
 	for (i = 0; i < PIDTYPE_MAX; i++) {
 		pid_hash[i] = alloc_bootmem(pidhash_size *
@@ -287,7 +282,7 @@ void __init pidhash_init(void)
 		if (!pid_hash[i])
 			panic("Could not alloc pidhash!\n");
 		for (j = 0; j < pidhash_size; j++)
-			INIT_LIST_HEAD(&pid_hash[i][j]);
+			INIT_HLIST_HEAD(&pid_hash[i][j]);
 	}
 }
 
diff --git a/kernel/signal.c b/kernel/signal.c
index e5d6cbc50c1e..8b05f0b8c2dc 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -26,6 +26,8 @@
 #include <asm/unistd.h>
 #include <asm/siginfo.h>
 
+extern void k_getrusage(struct task_struct *, int, struct rusage *);
+
 /*
  * SLAB caches for signal bits.
  */
@@ -367,6 +369,22 @@ void __exit_signal(struct task_struct *tsk)
 		if (tsk == sig->curr_target)
 			sig->curr_target = next_thread(tsk);
 		tsk->signal = NULL;
+		/*
+		 * Accumulate here the counters for all threads but the
+		 * group leader as they die, so they can be added into
+		 * the process-wide totals when those are taken.
+		 * The group leader stays around as a zombie as long
+		 * as there are other threads.  When it gets reaped,
+		 * the exit.c code will add its counts into these totals.
+		 * We won't ever get here for the group leader, since it
+		 * will have been the last reference on the signal_struct.
+		 */
+		sig->utime += tsk->utime;
+		sig->stime += tsk->stime;
+		sig->min_flt += tsk->min_flt;
+		sig->maj_flt += tsk->maj_flt;
+		sig->nvcsw += tsk->nvcsw;
+		sig->nivcsw += tsk->nivcsw;
 		spin_unlock(&sighand->siglock);
 		sig = NULL;	/* Marker for below.  */
 	}
@@ -660,12 +678,15 @@ static void handle_stop_signal(int sig, struct task_struct *p)
 			 * the SIGCHLD was pending on entry to this kill.
 			 */
 			p->signal->group_stop_count = 0;
+			p->signal->stop_state = 1;
+			spin_unlock(&p->sighand->siglock);
 			if (p->ptrace & PT_PTRACED)
 				do_notify_parent_cldstop(p, p->parent);
 			else
 				do_notify_parent_cldstop(
 					p->group_leader,
 					p->group_leader->real_parent);
+			spin_lock(&p->sighand->siglock);
 		}
 		rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending);
 		t = p;
@@ -696,6 +717,23 @@ static void handle_stop_signal(int sig, struct task_struct *p)
 
 			t = next_thread(t);
 		} while (t != p);
+
+		if (p->signal->stop_state > 0) {
+			/*
+			 * We were in fact stopped, and are now continued.
+			 * Notify the parent with CLD_CONTINUED.
+			 */
+			p->signal->stop_state = -1;
+			p->signal->group_exit_code = 0;
+			spin_unlock(&p->sighand->siglock);
+			if (p->ptrace & PT_PTRACED)
+				do_notify_parent_cldstop(p, p->parent);
+			else
+				do_notify_parent_cldstop(
+					p->group_leader,
+					p->group_leader->real_parent);
+			spin_lock(&p->sighand->siglock);
+		}
 	}
 }
 
@@ -1072,8 +1110,6 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
 {
 	struct task_struct *p;
-	struct list_head *l;
-	struct pid *pid;
 	int retval, success;
 
 	if (pgrp <= 0)
@@ -1081,11 +1117,11 @@ int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
 
 	success = 0;
 	retval = -ESRCH;
-	for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
+	do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
 		int err = group_send_sig_info(sig, info, p);
 		success |= !err;
 		retval = err;
-	}
+	} while_each_task_pid(pgrp, PIDTYPE_PGID, p);
 	return success ? 0 : retval;
 }
 
@@ -1112,8 +1148,6 @@ int
 kill_sl_info(int sig, struct siginfo *info, pid_t sid)
 {
 	int err, retval = -EINVAL;
-	struct pid *pid;
-	struct list_head *l;
 	struct task_struct *p;
 
 	if (sid <= 0)
@@ -1121,13 +1155,13 @@ kill_sl_info(int sig, struct siginfo *info, pid_t sid)
 
 	retval = -ESRCH;
 	read_lock(&tasklist_lock);
-	for_each_task_pid(sid, PIDTYPE_SID, p, l, pid) {
+	do_each_task_pid(sid, PIDTYPE_SID, p) {
 		if (!p->signal->leader)
 			continue;
 		err = group_send_sig_info(sig, info, p);
 		if (retval)
 			retval = err;
-	}
+	} while_each_task_pid(sid, PIDTYPE_SID, p);
 	read_unlock(&tasklist_lock);
 out:
 	return retval;
@@ -1455,8 +1489,8 @@ void do_notify_parent(struct task_struct *tsk, int sig)
 	if (sig == -1)
 		BUG();
 
-	BUG_ON(tsk->group_leader != tsk && tsk->group_leader->state != TASK_ZOMBIE && !tsk->ptrace);
-	BUG_ON(tsk->group_leader == tsk && !thread_group_empty(tsk) && !tsk->ptrace);
+	BUG_ON(!tsk->ptrace &&
+	       (tsk->group_leader != tsk || !thread_group_empty(tsk)));
 
 	info.si_signo = sig;
 	info.si_errno = 0;
@@ -1464,8 +1498,9 @@ void do_notify_parent(struct task_struct *tsk, int sig)
 	info.si_uid = tsk->uid;
 
 	/* FIXME: find out whether or not this is supposed to be c*time. */
-	info.si_utime = tsk->utime;
-	info.si_stime = tsk->stime;
+	info.si_utime = tsk->utime + tsk->signal->utime;
+	info.si_stime = tsk->stime + tsk->signal->stime;
+	k_getrusage(tsk, RUSAGE_BOTH, &info.si_rusage);
 
 	status = tsk->exit_code & 0x7f;
 	why = SI_KERNEL;	/* shouldn't happen */
@@ -1555,9 +1590,16 @@ do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent)
 	/* FIXME: find out whether or not this is supposed to be c*time. */
 	info.si_utime = tsk->utime;
 	info.si_stime = tsk->stime;
+	k_getrusage(tsk, RUSAGE_BOTH, &info.si_rusage);
 
-	info.si_status = tsk->exit_code & 0x7f;
-	info.si_code = CLD_STOPPED;
+	info.si_status = (tsk->signal ? tsk->signal->group_exit_code :
+			  tsk->exit_code) & 0x7f;
+	if (info.si_status == 0) {
+		info.si_status = SIGCONT;
+		info.si_code = CLD_CONTINUED;
+	} else {
+		info.si_code = CLD_STOPPED;
+	}
 
 	sighand = parent->sighand;
 	spin_lock_irqsave(&sighand->siglock, flags);
@@ -1623,14 +1665,17 @@ do_signal_stop(int signr)
 		stop_count = --sig->group_stop_count;
 		current->exit_code = signr;
 		set_current_state(TASK_STOPPED);
+		if (stop_count == 0)
+			sig->stop_state = 1;
 		spin_unlock_irq(&sighand->siglock);
 	}
 	else if (thread_group_empty(current)) {
 		/*
 		 * Lock must be held through transition to stopped state.
 		 */
-		current->exit_code = signr;
+		current->exit_code = current->signal->group_exit_code = signr;
 		set_current_state(TASK_STOPPED);
+		sig->stop_state = 1;
 		spin_unlock_irq(&sighand->siglock);
 	}
 	else {
@@ -1696,6 +1741,8 @@ do_signal_stop(int signr)
 
 		current->exit_code = signr;
 		set_current_state(TASK_STOPPED);
+		if (stop_count == 0)
+			sig->stop_state = 1;
 
 		spin_unlock_irq(&sighand->siglock);
 		read_unlock(&tasklist_lock);
@@ -1736,6 +1783,8 @@ static inline int handle_group_stop(void)
 	 * without any associated signal being in our queue.
 	 */
 	stop_count = --current->signal->group_stop_count;
+	if (stop_count == 0)
+		current->signal->stop_state = 1;
 	current->exit_code = current->signal->group_exit_code;
 	set_current_state(TASK_STOPPED);
 	spin_unlock_irq(&current->sighand->siglock);
@@ -2098,6 +2147,8 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
 		err |= __put_user(from->si_status, &to->si_status);
 		err |= __put_user(from->si_utime, &to->si_utime);
 		err |= __put_user(from->si_stime, &to->si_stime);
+		err |= __copy_to_user(&to->si_rusage, &from->si_rusage,
+				      sizeof(to->si_rusage));
 		break;
 	case __SI_RT: /* This is not generated by the kernel as of now. */
 	case __SI_MESGQ: /* But this is */
diff --git a/kernel/sys.c b/kernel/sys.c
index 1bbc66a60b8c..a4b29df201f6 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -310,8 +310,6 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
 {
 	struct task_struct *g, *p;
 	struct user_struct *user;
-	struct pid *pid;
-	struct list_head *l;
 	int error = -EINVAL;
 
 	if (which > 2 || which < 0)
@@ -336,8 +334,9 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
 		case PRIO_PGRP:
 			if (!who)
 				who = process_group(current);
-			for_each_task_pid(who, PIDTYPE_PGID, p, l, pid)
+			do_each_task_pid(who, PIDTYPE_PGID, p) {
 				error = set_one_prio(p, niceval, error);
+			} while_each_task_pid(who, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
 			if (!who)
@@ -371,8 +370,6 @@ out:
 asmlinkage long sys_getpriority(int which, int who)
 {
 	struct task_struct *g, *p;
-	struct list_head *l;
-	struct pid *pid;
 	struct user_struct *user;
 	long niceval, retval = -ESRCH;
 
@@ -394,11 +391,11 @@ asmlinkage long sys_getpriority(int which, int who)
 		case PRIO_PGRP:
 			if (!who)
 				who = process_group(current);
-			for_each_task_pid(who, PIDTYPE_PGID, p, l, pid) {
+			do_each_task_pid(who, PIDTYPE_PGID, p) {
 				niceval = 20 - task_nice(p);
 				if (niceval > retval)
 					retval = niceval;
-			}
+			} while_each_task_pid(who, PIDTYPE_PGID, p);
 			break;
 		case PRIO_USER:
 			if (!who)
@@ -947,10 +944,39 @@ asmlinkage long sys_times(struct tms __user * tbuf)
 	 */
 	if (tbuf) {
 		struct tms tmp;
-		tmp.tms_utime = jiffies_to_clock_t(current->utime);
-		tmp.tms_stime = jiffies_to_clock_t(current->stime);
-		tmp.tms_cutime = jiffies_to_clock_t(current->cutime);
-		tmp.tms_cstime = jiffies_to_clock_t(current->cstime);
+		struct task_struct *tsk = current;
+		struct task_struct *t;
+		unsigned long utime, stime, cutime, cstime;
+
+		read_lock(&tasklist_lock);
+		utime = tsk->signal->utime;
+		stime = tsk->signal->stime;
+		t = tsk;
+		do {
+			utime += t->utime;
+			stime += t->stime;
+			t = next_thread(t);
+		} while (t != tsk);
+
+		/*
+		 * While we have tasklist_lock read-locked, no dying thread
+		 * can be updating current->signal->[us]time.  Instead,
+		 * we got their counts included in the live thread loop.
+		 * However, another thread can come in right now and
+		 * do a wait call that updates current->signal->c[us]time.
+		 * To make sure we always see that pair updated atomically,
+		 * we take the siglock around fetching them.
+		 */
+		spin_lock_irq(&tsk->sighand->siglock);
+		cutime = tsk->signal->cutime;
+		cstime = tsk->signal->cstime;
+		spin_unlock_irq(&tsk->sighand->siglock);
+		read_unlock(&tasklist_lock);
+
+		tmp.tms_utime = jiffies_to_clock_t(utime);
+		tmp.tms_stime = jiffies_to_clock_t(stime);
+		tmp.tms_cutime = jiffies_to_clock_t(cutime);
+		tmp.tms_cstime = jiffies_to_clock_t(cstime);
 		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
 			return -EFAULT;
 	}
@@ -1015,12 +1041,11 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
 
 	if (pgid != pid) {
 		struct task_struct *p;
-		struct pid *pid;
-		struct list_head *l;
 
-		for_each_task_pid(pgid, PIDTYPE_PGID, p, l, pid)
+		do_each_task_pid(pgid, PIDTYPE_PGID, p) {
 			if (p->signal->session == current->signal->session)
 				goto ok_pgid;
+		} while_each_task_pid(pgid, PIDTYPE_PGID, p);
 		goto out;
 	}
 
@@ -1533,50 +1558,101 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
  * a lot simpler!  (Which we're not doing right now because we're not
  * measuring them yet).
  *
- * This is SMP safe.  Either we are called from sys_getrusage on ourselves
- * below (we know we aren't going to exit/disappear and only we change our
- * rusage counters), or we are called from wait4() on a process which is
- * either stopped or zombied.  In the zombied case the task won't get
- * reaped till shortly after the call to getrusage(), in both cases the
- * task being examined is in a frozen state so the counters won't change.
+ * This expects to be called with tasklist_lock read-locked or better,
+ * and the siglock not locked.  It may momentarily take the siglock.
+ *
+ * When sampling multiple threads for RUSAGE_GROUP, under SMP we might have
+ * races with threads incrementing their own counters.  But since word
+ * reads are atomic, we either get new values or old values and we don't
+ * care which for the sums.  We always take the siglock to protect reading
+ * the c* fields from p->signal from races with exit.c updating those
+ * fields when reaping, so a sample either gets all the additions of a
+ * given child after it's reaped, or none so this sample is before reaping.
  */
-int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
+
+void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 {
-	struct rusage r;
+	struct task_struct *t;
+	unsigned long flags;
+	unsigned long utime, stime;
+
+	memset((char *) r, 0, sizeof *r);
+
+	if (unlikely(!p->signal))
+		return;
 
-	memset((char *) &r, 0, sizeof(r));
 	switch (who) {
 		case RUSAGE_SELF:
-			jiffies_to_timeval(p->utime, &r.ru_utime);
-			jiffies_to_timeval(p->stime, &r.ru_stime);
-			r.ru_nvcsw = p->nvcsw;
-			r.ru_nivcsw = p->nivcsw;
-			r.ru_minflt = p->min_flt;
-			r.ru_majflt = p->maj_flt;
+			jiffies_to_timeval(p->utime, &r->ru_utime);
+			jiffies_to_timeval(p->stime, &r->ru_stime);
+			r->ru_nvcsw = p->nvcsw;
+			r->ru_nivcsw = p->nivcsw;
+			r->ru_minflt = p->min_flt;
+			r->ru_majflt = p->maj_flt;
 			break;
 		case RUSAGE_CHILDREN:
-			jiffies_to_timeval(p->cutime, &r.ru_utime);
-			jiffies_to_timeval(p->cstime, &r.ru_stime);
-			r.ru_nvcsw = p->cnvcsw;
-			r.ru_nivcsw = p->cnivcsw;
-			r.ru_minflt = p->cmin_flt;
-			r.ru_majflt = p->cmaj_flt;
+			spin_lock_irqsave(&p->sighand->siglock, flags);
+			utime = p->signal->cutime;
+			stime = p->signal->cstime;
+			r->ru_nvcsw = p->signal->cnvcsw;
+			r->ru_nivcsw = p->signal->cnivcsw;
+			r->ru_minflt = p->signal->cmin_flt;
+			r->ru_majflt = p->signal->cmaj_flt;
+			spin_unlock_irqrestore(&p->sighand->siglock, flags);
+			jiffies_to_timeval(utime, &r->ru_utime);
+			jiffies_to_timeval(stime, &r->ru_stime);
 			break;
-		default:
-			jiffies_to_timeval(p->utime + p->cutime, &r.ru_utime);
-			jiffies_to_timeval(p->stime + p->cstime, &r.ru_stime);
-			r.ru_nvcsw = p->nvcsw + p->cnvcsw;
-			r.ru_nivcsw = p->nivcsw + p->cnivcsw;
-			r.ru_minflt = p->min_flt + p->cmin_flt;
-			r.ru_majflt = p->maj_flt + p->cmaj_flt;
+		case RUSAGE_GROUP:
+			spin_lock_irqsave(&p->sighand->siglock, flags);
+			utime = stime = 0;
+			goto sum_group;
+		case RUSAGE_BOTH:
+			spin_lock_irqsave(&p->sighand->siglock, flags);
+			utime = p->signal->cutime;
+			stime = p->signal->cstime;
+			r->ru_nvcsw = p->signal->cnvcsw;
+			r->ru_nivcsw = p->signal->cnivcsw;
+			r->ru_minflt = p->signal->cmin_flt;
+			r->ru_majflt = p->signal->cmaj_flt;
+		sum_group:
+			utime += p->signal->utime;
+			stime += p->signal->stime;
+			r->ru_nvcsw += p->signal->nvcsw;
+			r->ru_nivcsw += p->signal->nivcsw;
+			r->ru_minflt += p->signal->min_flt;
+			r->ru_majflt += p->signal->maj_flt;
+			t = p;
+			do {
+				utime += t->utime;
+				stime += t->stime;
+				r->ru_nvcsw += t->nvcsw;
+				r->ru_nivcsw += t->nivcsw;
+				r->ru_minflt += t->min_flt;
+				r->ru_majflt += t->maj_flt;
+				t = next_thread(t);
+			} while (t != p);
+			spin_unlock_irqrestore(&p->sighand->siglock, flags);
+			jiffies_to_timeval(utime, &r->ru_utime);
+			jiffies_to_timeval(stime, &r->ru_stime);
 			break;
+		default:
+			BUG();
 	}
+}
+
+int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
+{
+	struct rusage r;
+	read_lock(&tasklist_lock);
+	k_getrusage(p, who, &r);
+	read_unlock(&tasklist_lock);
 	return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
 }
 
 asmlinkage long sys_getrusage(int who, struct rusage __user *ru)
 {
-	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
+	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN
+	    && who != RUSAGE_GROUP)
 		return -EINVAL;
 	return getrusage(current, who, ru);
 }
author	Patrick Mochel <mochel@digitalimplant.org>	2004-09-02 15:40:36 -0700
committer	Patrick Mochel <mochel@digitalimplant.org>	2004-09-02 15:40:36 -0700
commit	80e1f7fa6e60e7a32409f121775ec510aad60df2 (patch)
tree	e47bbaebb1e649d3d40f386ab675504b647c5a7a /kernel
parent	30611d8282d0cd850132b5db013fedf24d6e07b9 (diff)
parent	3411df4ee64e032426f09392526ca74179aceee5 (diff)