summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorPatrick Mochel <mochel@digitalimplant.org>2004-09-02 15:40:36 -0700
committerPatrick Mochel <mochel@digitalimplant.org>2004-09-02 15:40:36 -0700
commit80e1f7fa6e60e7a32409f121775ec510aad60df2 (patch)
treee47bbaebb1e649d3d40f386ab675504b647c5a7a /kernel
parent30611d8282d0cd850132b5db013fedf24d6e07b9 (diff)
parent3411df4ee64e032426f09392526ca74179aceee5 (diff)
Merge digitalimplant.org:/home/mochel/src/linux-2.6-virgin
into digitalimplant.org:/home/mochel/src/linux-2.6-power
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/capability.c6
-rw-r--r--kernel/compat.c43
-rw-r--r--kernel/exit.c305
-rw-r--r--kernel/fork.c15
-rw-r--r--kernel/kprobes.c146
-rw-r--r--kernel/module.c10
-rw-r--r--kernel/pid.c103
-rw-r--r--kernel/signal.c81
-rw-r--r--kernel/sys.c162
10 files changed, 690 insertions, 182 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 47f98594e9e5..a032595fd58c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_IKCONFIG_PROC) += configs.o
obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
obj-$(CONFIG_AUDIT) += audit.o
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+obj-$(CONFIG_KPROBES) += kprobes.o
ifneq ($(CONFIG_IA64),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/capability.c b/kernel/capability.c
index 1c5c35718450..7e864e2ccf6a 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -89,14 +89,12 @@ static inline void cap_set_pg(int pgrp, kernel_cap_t *effective,
kernel_cap_t *permitted)
{
task_t *g, *target;
- struct list_head *l;
- struct pid *pid;
- for_each_task_pid(pgrp, PIDTYPE_PGID, g, l, pid) {
+ do_each_task_pid(pgrp, PIDTYPE_PGID, g) {
target = g;
while_each_thread(g, target)
security_capset_set(target, effective, inheritable, permitted);
- }
+ } while_each_task_pid(pgrp, PIDTYPE_PGID, g);
}
/*
diff --git a/kernel/compat.c b/kernel/compat.c
index 481ac0d4bb98..8bfae57e7d66 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -160,10 +160,39 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
*/
if (tbuf) {
struct compat_tms tmp;
- tmp.tms_utime = compat_jiffies_to_clock_t(current->utime);
- tmp.tms_stime = compat_jiffies_to_clock_t(current->stime);
- tmp.tms_cutime = compat_jiffies_to_clock_t(current->cutime);
- tmp.tms_cstime = compat_jiffies_to_clock_t(current->cstime);
+ struct task_struct *tsk = current;
+ struct task_struct *t;
+ unsigned long utime, stime, cutime, cstime;
+
+ read_lock(&tasklist_lock);
+ utime = tsk->signal->utime;
+ stime = tsk->signal->stime;
+ t = tsk;
+ do {
+ utime += t->utime;
+ stime += t->stime;
+ t = next_thread(t);
+ } while (t != tsk);
+
+ /*
+ * While we have tasklist_lock read-locked, no dying thread
+ * can be updating current->signal->[us]time. Instead,
+ * we got their counts included in the live thread loop.
+ * However, another thread can come in right now and
+ * do a wait call that updates current->signal->c[us]time.
+ * To make sure we always see that pair updated atomically,
+ * we take the siglock around fetching them.
+ */
+ spin_lock_irq(&tsk->sighand->siglock);
+ cutime = tsk->signal->cutime;
+ cstime = tsk->signal->cstime;
+ spin_unlock_irq(&tsk->sighand->siglock);
+ read_unlock(&tasklist_lock);
+
+ tmp.tms_utime = compat_jiffies_to_clock_t(utime);
+ tmp.tms_stime = compat_jiffies_to_clock_t(stime);
+ tmp.tms_cutime = compat_jiffies_to_clock_t(cutime);
+ tmp.tms_cstime = compat_jiffies_to_clock_t(cstime);
if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
return -EFAULT;
}
@@ -310,7 +339,7 @@ asmlinkage long compat_sys_getrlimit (unsigned int resource,
return ret;
}
-static long put_compat_rusage(struct compat_rusage __user *ru, struct rusage *r)
+int put_compat_rusage(const struct rusage *r, struct compat_rusage __user *ru)
{
if (!access_ok(VERIFY_WRITE, ru, sizeof(*ru)) ||
__put_user(r->ru_utime.tv_sec, &ru->ru_utime.tv_sec) ||
@@ -348,7 +377,7 @@ asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru)
if (ret)
return ret;
- if (put_compat_rusage(ru, &r))
+ if (put_compat_rusage(&r, ru))
return -EFAULT;
return 0;
@@ -374,7 +403,7 @@ compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options,
set_fs (old_fs);
if (ret > 0) {
- if (put_compat_rusage(ru, &r))
+ if (put_compat_rusage(&r, ru))
return -EFAULT;
if (stat_addr && put_user(status, stat_addr))
return -EFAULT;
diff --git a/kernel/exit.c b/kernel/exit.c
index c6ceaaee8a2e..731b9ccd236b 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -89,12 +89,6 @@ repeat:
zap_leader = (leader->exit_signal == -1);
}
- p->parent->cutime += p->utime + p->cutime;
- p->parent->cstime += p->stime + p->cstime;
- p->parent->cmin_flt += p->min_flt + p->cmin_flt;
- p->parent->cmaj_flt += p->maj_flt + p->cmaj_flt;
- p->parent->cnvcsw += p->nvcsw + p->cnvcsw;
- p->parent->cnivcsw += p->nivcsw + p->cnivcsw;
sched_exit(p);
write_unlock_irq(&tasklist_lock);
spin_unlock(&p->proc_lock);
@@ -130,16 +124,15 @@ void unhash_process(struct task_struct *p)
int session_of_pgrp(int pgrp)
{
struct task_struct *p;
- struct list_head *l;
- struct pid *pid;
int sid = -1;
read_lock(&tasklist_lock);
- for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid)
+ do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
if (p->signal->session > 0) {
sid = p->signal->session;
goto out;
}
+ } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
p = find_task_by_pid(pgrp);
if (p)
sid = p->signal->session;
@@ -160,11 +153,9 @@ out:
static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
{
struct task_struct *p;
- struct list_head *l;
- struct pid *pid;
int ret = 1;
- for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
+ do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
if (p == ignored_task
|| p->state >= TASK_ZOMBIE
|| p->real_parent->pid == 1)
@@ -174,7 +165,7 @@ static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
ret = 0;
break;
}
- }
+ } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
return ret; /* (sighing) "Often!" */
}
@@ -193,10 +184,8 @@ static inline int has_stopped_jobs(int pgrp)
{
int retval = 0;
struct task_struct *p;
- struct list_head *l;
- struct pid *pid;
- for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
+ do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
if (p->state != TASK_STOPPED)
continue;
@@ -212,7 +201,7 @@ static inline int has_stopped_jobs(int pgrp)
retval = 1;
break;
- }
+ } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
return retval;
}
@@ -855,9 +844,6 @@ asmlinkage long sys_exit(int error_code)
task_t fastcall *next_thread(const task_t *p)
{
- const struct pid_link *link = p->pids + PIDTYPE_TGID;
- const struct list_head *tmp, *head = &link->pidptr->task_list;
-
#ifdef CONFIG_SMP
if (!p->sighand)
BUG();
@@ -865,11 +851,7 @@ task_t fastcall *next_thread(const task_t *p)
!rwlock_is_locked(&tasklist_lock))
BUG();
#endif
- tmp = link->pid_chain.next;
- if (tmp == head)
- tmp = head->next;
-
- return pid_task(tmp, PIDTYPE_TGID);
+ return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
}
EXPORT_SYMBOL(next_thread);
@@ -957,16 +939,64 @@ static int eligible_child(pid_t pid, int options, task_t *p)
return 1;
}
+static int wait_noreap_copyout(task_t *p, pid_t pid, uid_t uid,
+ int why, int status,
+ struct siginfo __user *infop)
+{
+ int retval = getrusage(p, RUSAGE_BOTH, &infop->si_rusage);
+ put_task_struct(p);
+ if (!retval)
+ retval = put_user(SIGCHLD, &infop->si_signo);
+ if (!retval)
+ retval = put_user(0, &infop->si_errno);
+ if (!retval)
+ retval = put_user((short)why, &infop->si_code);
+ if (!retval)
+ retval = put_user(pid, &infop->si_pid);
+ if (!retval)
+ retval = put_user(uid, &infop->si_uid);
+ if (!retval)
+ retval = put_user(status, &infop->si_status);
+ if (!retval)
+ retval = pid;
+ return retval;
+}
+
/*
* Handle sys_wait4 work for one task in state TASK_ZOMBIE. We hold
* read_lock(&tasklist_lock) on entry. If we return zero, we still hold
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
-static int wait_task_zombie(task_t *p, unsigned int __user *stat_addr, struct rusage __user *ru)
+static int wait_task_zombie(task_t *p, int noreap,
+ struct siginfo __user *infop,
+ int __user *stat_addr, struct rusage __user *ru)
{
unsigned long state;
int retval;
+ int status;
+
+ if (unlikely(noreap)) {
+ pid_t pid = p->pid;
+ uid_t uid = p->uid;
+ int exit_code = p->exit_code;
+ int why, status;
+
+ if (unlikely(p->state != TASK_ZOMBIE))
+ return 0;
+ if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
+ return 0;
+ get_task_struct(p);
+ read_unlock(&tasklist_lock);
+ if ((exit_code & 0x7f) == 0) {
+ why = CLD_EXITED;
+ status = exit_code >> 8;
+ } else {
+ why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED;
+ status = exit_code & 0x7f;
+ }
+ return wait_noreap_copyout(p, pid, uid, why, status, infop);
+ }
/*
* Try to move the task's state to DEAD
@@ -977,12 +1007,45 @@ static int wait_task_zombie(task_t *p, unsigned int __user *stat_addr, struct ru
BUG_ON(state != TASK_DEAD);
return 0;
}
- if (unlikely(p->exit_signal == -1 && p->ptrace == 0))
+ if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) {
/*
* This can only happen in a race with a ptraced thread
* dying on another processor.
*/
return 0;
+ }
+
+ if (likely(p->real_parent == p->parent) && likely(p->signal)) {
+ /*
+ * The resource counters for the group leader are in its
+ * own task_struct. Those for dead threads in the group
+ * are in its signal_struct, as are those for the child
+ * processes it has previously reaped. All these
+ * accumulate in the parent's signal_struct c* fields.
+ *
+ * We don't bother to take a lock here to protect these
+ * p->signal fields, because they are only touched by
+ * __exit_signal, which runs with tasklist_lock
+ * write-locked anyway, and so is excluded here. We do
+ * need to protect the access to p->parent->signal fields,
+ * as other threads in the parent group can be right
+ * here reaping other children at the same time.
+ */
+ spin_lock_irq(&p->parent->sighand->siglock);
+ p->parent->signal->cutime +=
+ p->utime + p->signal->utime + p->signal->cutime;
+ p->parent->signal->cstime +=
+ p->stime + p->signal->stime + p->signal->cstime;
+ p->parent->signal->cmin_flt +=
+ p->min_flt + p->signal->min_flt + p->signal->cmin_flt;
+ p->parent->signal->cmaj_flt +=
+ p->maj_flt + p->signal->maj_flt + p->signal->cmaj_flt;
+ p->parent->signal->cnvcsw +=
+ p->nvcsw + p->signal->nvcsw + p->signal->cnvcsw;
+ p->parent->signal->cnivcsw +=
+ p->nivcsw + p->signal->nivcsw + p->signal->cnivcsw;
+ spin_unlock_irq(&p->parent->sighand->siglock);
+ }
/*
* Now we are sure this task is interesting, and no other
@@ -991,12 +1054,32 @@ static int wait_task_zombie(task_t *p, unsigned int __user *stat_addr, struct ru
read_unlock(&tasklist_lock);
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
- if (!retval && stat_addr) {
- if (p->signal->group_exit)
- retval = put_user(p->signal->group_exit_code, stat_addr);
- else
- retval = put_user(p->exit_code, stat_addr);
+ status = p->signal->group_exit
+ ? p->signal->group_exit_code : p->exit_code;
+ if (!retval && stat_addr)
+ retval = put_user(status, stat_addr);
+ if (!retval && infop)
+ retval = put_user(SIGCHLD, &infop->si_signo);
+ if (!retval && infop)
+ retval = put_user(0, &infop->si_errno);
+ if (!retval && infop) {
+ int why;
+
+ if ((status & 0x7f) == 0) {
+ why = CLD_EXITED;
+ status >>= 8;
+ } else {
+ why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED;
+ status &= 0x7f;
+ }
+ retval = put_user((short)why, &infop->si_code);
+ if (!retval)
+ retval = put_user(status, &infop->si_status);
}
+ if (!retval && infop)
+ retval = put_user(p->pid, &infop->si_pid);
+ if (!retval && infop)
+ retval = put_user(p->uid, &infop->si_uid);
if (retval) {
p->state = TASK_ZOMBIE;
return retval;
@@ -1009,8 +1092,9 @@ static int wait_task_zombie(task_t *p, unsigned int __user *stat_addr, struct ru
__ptrace_unlink(p);
p->state = TASK_ZOMBIE;
/*
- * If this is not a detached task, notify the parent. If it's
- * still not detached after that, don't release it now.
+ * If this is not a detached task, notify the parent.
+ * If it's still not detached after that, don't release
+ * it now.
*/
if (p->exit_signal != -1) {
do_notify_parent(p, p->exit_signal);
@@ -1032,9 +1116,9 @@ static int wait_task_zombie(task_t *p, unsigned int __user *stat_addr, struct ru
* the lock and this task is uninteresting. If we return nonzero, we have
* released the lock and the system call should return.
*/
-static int wait_task_stopped(task_t *p, int delayed_group_leader,
- unsigned int __user *stat_addr,
- struct rusage __user *ru)
+static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap,
+ struct siginfo __user *infop,
+ int __user *stat_addr, struct rusage __user *ru)
{
int retval, exit_code;
@@ -1057,6 +1141,21 @@ static int wait_task_stopped(task_t *p, int delayed_group_leader,
*/
get_task_struct(p);
read_unlock(&tasklist_lock);
+
+ if (unlikely(noreap)) {
+ pid_t pid = p->pid;
+ uid_t uid = p->uid;
+ int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED;
+
+ exit_code = p->exit_code;
+ if (unlikely(!exit_code) ||
+ unlikely(p->state > TASK_STOPPED))
+ goto bail_ref;
+ return wait_noreap_copyout(p, pid, uid,
+ why, (exit_code << 8) | 0x7f,
+ infop);
+ }
+
write_lock_irq(&tasklist_lock);
/*
@@ -1082,6 +1181,7 @@ static int wait_task_stopped(task_t *p, int delayed_group_leader,
* resumed, or it resumed and then died.
*/
write_unlock_irq(&tasklist_lock);
+bail_ref:
put_task_struct(p);
read_lock(&tasklist_lock);
return 0;
@@ -1096,6 +1196,20 @@ static int wait_task_stopped(task_t *p, int delayed_group_leader,
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
if (!retval && stat_addr)
retval = put_user((exit_code << 8) | 0x7f, stat_addr);
+ if (!retval && infop)
+ retval = put_user(SIGCHLD, &infop->si_signo);
+ if (!retval && infop)
+ retval = put_user(0, &infop->si_errno);
+ if (!retval && infop)
+ retval = put_user((short)((p->ptrace & PT_PTRACED)
+ ? CLD_TRAPPED : CLD_STOPPED),
+ &infop->si_code);
+ if (!retval && infop)
+ retval = put_user(exit_code, &infop->si_status);
+ if (!retval && infop)
+ retval = put_user(p->pid, &infop->si_pid);
+ if (!retval && infop)
+ retval = put_user(p->uid, &infop->si_uid);
if (!retval)
retval = p->pid;
put_task_struct(p);
@@ -1104,15 +1218,13 @@ static int wait_task_stopped(task_t *p, int delayed_group_leader,
return retval;
}
-asmlinkage long sys_wait4(pid_t pid,unsigned int __user *stat_addr, int options, struct rusage __user *ru)
+static long do_wait(pid_t pid, int options, struct siginfo __user *infop,
+ int __user *stat_addr, struct rusage __user *ru)
{
DECLARE_WAITQUEUE(wait, current);
struct task_struct *tsk;
int flag, retval;
- if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
- return -EINVAL;
-
add_wait_queue(&current->wait_chldexit,&wait);
repeat:
flag = 0;
@@ -1138,25 +1250,60 @@ repeat:
!(p->ptrace & PT_PTRACED))
continue;
retval = wait_task_stopped(p, ret == 2,
+ (options & WNOWAIT),
+ infop,
stat_addr, ru);
if (retval != 0) /* He released the lock. */
- goto end_wait4;
+ goto end;
break;
case TASK_ZOMBIE:
/*
* Eligible but we cannot release it yet:
*/
if (ret == 2)
+ goto check_continued;
+ if (!likely(options & WEXITED))
continue;
- retval = wait_task_zombie(p, stat_addr, ru);
+ retval = wait_task_zombie(
+ p, (options & WNOWAIT),
+ infop, stat_addr, ru);
if (retval != 0) /* He released the lock. */
- goto end_wait4;
+ goto end;
+ break;
+ case TASK_DEAD:
+ continue;
+ default:
+check_continued:
+ if (!unlikely(options & WCONTINUED))
+ continue;
+ if (unlikely(!p->signal))
+ continue;
+ spin_lock_irq(&p->sighand->siglock);
+ if (p->signal->stop_state < 0) {
+ pid_t pid;
+ uid_t uid;
+
+ if (!(options & WNOWAIT))
+ p->signal->stop_state = 0;
+ spin_unlock_irq(&p->sighand->siglock);
+ pid = p->pid;
+ uid = p->uid;
+ get_task_struct(p);
+ read_unlock(&tasklist_lock);
+ retval = wait_noreap_copyout(p, pid,
+ uid, CLD_CONTINUED,
+ SIGCONT, infop);
+ BUG_ON(retval == 0);
+ goto end;
+ }
+ spin_unlock_irq(&p->sighand->siglock);
break;
}
}
if (!flag) {
- list_for_each (_p,&tsk->ptrace_children) {
- p = list_entry(_p,struct task_struct,ptrace_list);
+ list_for_each(_p, &tsk->ptrace_children) {
+ p = list_entry(_p, struct task_struct,
+ ptrace_list);
if (!eligible_child(pid, options, p))
continue;
flag = 1;
@@ -1169,24 +1316,84 @@ repeat:
if (tsk->signal != current->signal)
BUG();
} while (tsk != current);
+
read_unlock(&tasklist_lock);
if (flag) {
retval = 0;
if (options & WNOHANG)
- goto end_wait4;
+ goto end;
retval = -ERESTARTSYS;
if (signal_pending(current))
- goto end_wait4;
+ goto end;
schedule();
goto repeat;
}
retval = -ECHILD;
-end_wait4:
+end:
current->state = TASK_RUNNING;
remove_wait_queue(&current->wait_chldexit,&wait);
+ if (infop) {
+ if (retval > 0)
+ retval = 0;
+ else {
+ /*
+ * For a WNOHANG return, clear out all the fields
+ * we would set so the user can easily tell the
+ * difference.
+ */
+ if (!retval)
+ retval = put_user(0, &infop->si_signo);
+ if (!retval)
+ retval = put_user(0, &infop->si_errno);
+ if (!retval)
+ retval = put_user(0, &infop->si_code);
+ if (!retval)
+ retval = put_user(0, &infop->si_pid);
+ if (!retval)
+ retval = put_user(0, &infop->si_uid);
+ if (!retval)
+ retval = put_user(0, &infop->si_status);
+ }
+ }
return retval;
}
+asmlinkage long sys_waitid(int which, pid_t pid,
+ struct siginfo __user *infop, int options)
+{
+ if (options & ~(WNOHANG|WNOWAIT|WEXITED|WSTOPPED|WCONTINUED))
+ return -EINVAL;
+ if (!(options & (WEXITED|WSTOPPED|WCONTINUED)))
+ return -EINVAL;
+
+ switch (which) {
+ case P_ALL:
+ pid = -1;
+ break;
+ case P_PID:
+ if (pid <= 0)
+ return -EINVAL;
+ break;
+ case P_PGID:
+ if (pid <= 0)
+ return -EINVAL;
+ pid = -pid;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return do_wait(pid, options, infop, NULL, &infop->si_rusage);
+}
+
+asmlinkage long sys_wait4(pid_t pid, unsigned int __user *stat_addr,
+ int options, struct rusage __user *ru)
+{
+ if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
+ return -EINVAL;
+ return do_wait(pid, options | WEXITED, NULL, stat_addr, ru);
+}
+
#ifdef __ARCH_WANT_SYS_WAITPID
/*
diff --git a/kernel/fork.c b/kernel/fork.c
index fed7a0772660..78db8811c834 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -559,8 +559,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
int retval;
tsk->min_flt = tsk->maj_flt = 0;
- tsk->cmin_flt = tsk->cmaj_flt = 0;
- tsk->nvcsw = tsk->nivcsw = tsk->cnvcsw = tsk->cnivcsw = 0;
+ tsk->nvcsw = tsk->nivcsw = 0;
tsk->mm = NULL;
tsk->active_mm = NULL;
@@ -867,6 +866,10 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
sig->leader = 0; /* session leadership doesn't inherit */
sig->tty_old_pgrp = 0;
+ sig->utime = sig->stime = sig->cutime = sig->cstime = 0;
+ sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
+ sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
+
return 0;
}
@@ -986,7 +989,6 @@ static task_t *copy_process(unsigned long clone_flags,
p->real_timer.data = (unsigned long) p;
p->utime = p->stime = 0;
- p->cutime = p->cstime = 0;
p->lock_depth = -1; /* -1 = no lock */
p->start_time = get_jiffies_64();
p->security = NULL;
@@ -1118,18 +1120,17 @@ static task_t *copy_process(unsigned long clone_flags,
}
SET_LINKS(p);
- if (p->ptrace & PT_PTRACED)
+ if (unlikely(p->ptrace & PT_PTRACED))
__ptrace_link(p, current->parent);
attach_pid(p, PIDTYPE_PID, p->pid);
+ attach_pid(p, PIDTYPE_TGID, p->tgid);
if (thread_group_leader(p)) {
- attach_pid(p, PIDTYPE_TGID, p->tgid);
attach_pid(p, PIDTYPE_PGID, process_group(p));
attach_pid(p, PIDTYPE_SID, p->signal->session);
if (p->pid)
__get_cpu_var(process_counts)++;
- } else
- link_pid(p, p->pids + PIDTYPE_TGID, &p->group_leader->pids[PIDTYPE_TGID].pid);
+ }
nr_threads++;
write_unlock_irq(&tasklist_lock);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
new file mode 100644
index 000000000000..01436a31c690
--- /dev/null
+++ b/kernel/kprobes.c
@@ -0,0 +1,146 @@
+/*
+ * Kernel Probes (KProbes)
+ * kernel/kprobes.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ *
+ * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
+ * Probes initial implementation (includes suggestions from
+ * Rusty Russell).
+ * 2004-Aug Updated by Prasanna S Panchamukhi <prasanna@in.ibm.com> with
+ * hlists and exceptions notifier as suggested by Andi Kleen.
+ * 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
+ * interface to access function arguments.
+ */
+#include <linux/kprobes.h>
+#include <linux/spinlock.h>
+#include <linux/hash.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <asm/cacheflush.h>
+#include <asm/errno.h>
+#include <asm/kdebug.h>
+
+#define KPROBE_HASH_BITS 6
+#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
+
+static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
+
+unsigned int kprobe_cpu = NR_CPUS;
+static spinlock_t kprobe_lock = SPIN_LOCK_UNLOCKED;
+
+/* Locks kprobe: irqs must be disabled */
+void lock_kprobes(void)
+{
+ spin_lock(&kprobe_lock);
+ kprobe_cpu = smp_processor_id();
+}
+
+void unlock_kprobes(void)
+{
+ kprobe_cpu = NR_CPUS;
+ spin_unlock(&kprobe_lock);
+}
+
+/* You have to be holding the kprobe_lock */
+struct kprobe *get_kprobe(void *addr)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+
+ head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
+ hlist_for_each(node, head) {
+ struct kprobe *p = hlist_entry(node, struct kprobe, hlist);
+ if (p->addr == addr)
+ return p;
+ }
+ return NULL;
+}
+
+int register_kprobe(struct kprobe *p)
+{
+ int ret = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kprobe_lock, flags);
+ INIT_HLIST_NODE(&p->hlist);
+ if (get_kprobe(p->addr)) {
+ ret = -EEXIST;
+ goto out;
+ }
+ hlist_add_head(&p->hlist,
+ &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
+
+ arch_prepare_kprobe(p);
+ p->opcode = *p->addr;
+ *p->addr = BREAKPOINT_INSTRUCTION;
+ flush_icache_range((unsigned long) p->addr,
+ (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+ out:
+ spin_unlock_irqrestore(&kprobe_lock, flags);
+ return ret;
+}
+
+void unregister_kprobe(struct kprobe *p)
+{
+ unsigned long flags;
+ spin_lock_irqsave(&kprobe_lock, flags);
+ *p->addr = p->opcode;
+ hlist_del(&p->hlist);
+ flush_icache_range((unsigned long) p->addr,
+ (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+ spin_unlock_irqrestore(&kprobe_lock, flags);
+}
+
+static struct notifier_block kprobe_exceptions_nb = {
+ .notifier_call = kprobe_exceptions_notify,
+};
+
+int register_jprobe(struct jprobe *jp)
+{
+ /* Todo: Verify probepoint is a function entry point */
+ jp->kp.pre_handler = setjmp_pre_handler;
+ jp->kp.break_handler = longjmp_break_handler;
+
+ return register_kprobe(&jp->kp);
+}
+
+void unregister_jprobe(struct jprobe *jp)
+{
+ unregister_kprobe(&jp->kp);
+}
+
+static int __init init_kprobes(void)
+{
+ int i, err = 0;
+
+ /* FIXME allocate the probe table, currently defined statically */
+ /* initialize all list heads */
+ for (i = 0; i < KPROBE_TABLE_SIZE; i++)
+ INIT_HLIST_HEAD(&kprobe_table[i]);
+
+ err = register_die_notifier(&kprobe_exceptions_nb);
+ return err;
+}
+
+__initcall(init_kprobes);
+
+EXPORT_SYMBOL_GPL(register_kprobe);
+EXPORT_SYMBOL_GPL(unregister_kprobe);
+EXPORT_SYMBOL_GPL(register_jprobe);
+EXPORT_SYMBOL_GPL(unregister_jprobe);
+EXPORT_SYMBOL_GPL(jprobe_return);
diff --git a/kernel/module.c b/kernel/module.c
index aa91a98ffb0b..8c17a4ce0707 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1538,9 +1538,6 @@ static struct module *load_module(void __user *umod,
secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
sechdrs[0].sh_addr = 0;
- /* And these should exist, but gcc whinges if we don't init them */
- symindex = strindex = 0;
-
for (i = 1; i < hdr->e_shnum; i++) {
if (sechdrs[i].sh_type != SHT_NOBITS
&& len < sechdrs[i].sh_offset + sechdrs[i].sh_size)
@@ -1572,6 +1569,13 @@ static struct module *load_module(void __user *umod,
}
mod = (void *)sechdrs[modindex].sh_addr;
+ if (symindex == 0) {
+ printk(KERN_WARNING "%s: module has no symbols (stripped?)\n",
+ mod->name);
+ err = -ENOEXEC;
+ goto free_hdr;
+ }
+
/* Optional sections */
exportindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab");
gplindex = find_sec(hdr, sechdrs, secstrings, "__ksymtab_gpl");
diff --git a/kernel/pid.c b/kernel/pid.c
index 6ed44f56ca45..57527f0cda5e 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -27,7 +27,7 @@
#include <linux/hash.h>
#define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
-static struct list_head *pid_hash[PIDTYPE_MAX];
+static struct hlist_head *pid_hash[PIDTYPE_MAX];
static int pidhash_shift;
int pid_max = PID_MAX_DEFAULT;
@@ -146,73 +146,66 @@ failure:
return -1;
}
-fastcall struct pid *find_pid(enum pid_type type, int nr)
+struct pid * fastcall find_pid(enum pid_type type, int nr)
{
- struct list_head *elem, *bucket = &pid_hash[type][pid_hashfn(nr)];
+ struct hlist_node *elem;
struct pid *pid;
- __list_for_each(elem, bucket) {
- pid = list_entry(elem, struct pid, hash_chain);
+ hlist_for_each_entry(pid, elem,
+ &pid_hash[type][pid_hashfn(nr)], pid_chain) {
if (pid->nr == nr)
return pid;
}
return NULL;
}
-void fastcall link_pid(task_t *task, struct pid_link *link, struct pid *pid)
-{
- atomic_inc(&pid->count);
- list_add_tail(&link->pid_chain, &pid->task_list);
- link->pidptr = pid;
-}
-
int fastcall attach_pid(task_t *task, enum pid_type type, int nr)
{
- struct pid *pid = find_pid(type, nr);
-
- if (pid)
- atomic_inc(&pid->count);
- else {
- pid = &task->pids[type].pid;
- pid->nr = nr;
- atomic_set(&pid->count, 1);
- INIT_LIST_HEAD(&pid->task_list);
- pid->task = task;
- get_task_struct(task);
- list_add(&pid->hash_chain, &pid_hash[type][pid_hashfn(nr)]);
+ struct pid *pid, *task_pid;
+
+ task_pid = &task->pids[type];
+ pid = find_pid(type, nr);
+ if (pid == NULL) {
+ hlist_add_head(&task_pid->pid_chain,
+ &pid_hash[type][pid_hashfn(nr)]);
+ INIT_LIST_HEAD(&task_pid->pid_list);
+ } else {
+ INIT_HLIST_NODE(&task_pid->pid_chain);
+ list_add_tail(&task_pid->pid_list, &pid->pid_list);
}
- list_add_tail(&task->pids[type].pid_chain, &pid->task_list);
- task->pids[type].pidptr = pid;
+ task_pid->nr = nr;
return 0;
}
static inline int __detach_pid(task_t *task, enum pid_type type)
{
- struct pid_link *link = task->pids + type;
- struct pid *pid = link->pidptr;
+ struct pid *pid, *pid_next;
int nr;
- list_del(&link->pid_chain);
- if (!atomic_dec_and_test(&pid->count))
- return 0;
-
+ pid = &task->pids[type];
+ if (!hlist_unhashed(&pid->pid_chain)) {
+ hlist_del(&pid->pid_chain);
+ if (!list_empty(&pid->pid_list)) {
+ pid_next = list_entry(pid->pid_list.next,
+ struct pid, pid_list);
+ /* insert next pid from pid_list to hash */
+ hlist_add_head(&pid_next->pid_chain,
+ &pid_hash[type][pid_hashfn(pid_next->nr)]);
+ }
+ }
+ list_del(&pid->pid_list);
nr = pid->nr;
- list_del(&pid->hash_chain);
- put_task_struct(pid->task);
+ pid->nr = 0;
return nr;
}
-static void _detach_pid(task_t *task, enum pid_type type)
-{
- __detach_pid(task, type);
-}
-
void fastcall detach_pid(task_t *task, enum pid_type type)
{
- int nr = __detach_pid(task, type);
+ int nr;
+ nr = __detach_pid(task, type);
if (!nr)
return;
@@ -222,16 +215,18 @@ void fastcall detach_pid(task_t *task, enum pid_type type)
free_pidmap(nr);
}
-task_t *find_task_by_pid(int nr)
+task_t *find_task_by_pid_type(int type, int nr)
{
- struct pid *pid = find_pid(PIDTYPE_PID, nr);
+ struct pid *pid;
+ pid = find_pid(type, nr);
if (!pid)
return NULL;
- return pid_task(pid->task_list.next, PIDTYPE_PID);
+
+ return pid_task(&pid->pid_list, type);
}
-EXPORT_SYMBOL(find_task_by_pid);
+EXPORT_SYMBOL(find_task_by_pid_type);
/*
* This function switches the PIDs if a non-leader thread calls
@@ -240,13 +235,13 @@ EXPORT_SYMBOL(find_task_by_pid);
*/
void switch_exec_pids(task_t *leader, task_t *thread)
{
- _detach_pid(leader, PIDTYPE_PID);
- _detach_pid(leader, PIDTYPE_TGID);
- _detach_pid(leader, PIDTYPE_PGID);
- _detach_pid(leader, PIDTYPE_SID);
+ __detach_pid(leader, PIDTYPE_PID);
+ __detach_pid(leader, PIDTYPE_TGID);
+ __detach_pid(leader, PIDTYPE_PGID);
+ __detach_pid(leader, PIDTYPE_SID);
- _detach_pid(thread, PIDTYPE_PID);
- _detach_pid(thread, PIDTYPE_TGID);
+ __detach_pid(thread, PIDTYPE_PID);
+ __detach_pid(thread, PIDTYPE_TGID);
leader->pid = leader->tgid = thread->pid;
thread->pid = thread->tgid;
@@ -271,15 +266,15 @@ void switch_exec_pids(task_t *leader, task_t *thread)
void __init pidhash_init(void)
{
int i, j, pidhash_size;
- unsigned long megabytes = max_pfn >> (20 - PAGE_SHIFT);
+ unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
pidhash_shift = max(4, fls(megabytes * 4));
pidhash_shift = min(12, pidhash_shift);
pidhash_size = 1 << pidhash_shift;
- printk("PID hash table entries: %d (order %d: %Zd bytes)\n",
+ printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
pidhash_size, pidhash_shift,
- pidhash_size * sizeof(struct list_head));
+ PIDTYPE_MAX * pidhash_size * sizeof(struct hlist_head));
for (i = 0; i < PIDTYPE_MAX; i++) {
pid_hash[i] = alloc_bootmem(pidhash_size *
@@ -287,7 +282,7 @@ void __init pidhash_init(void)
if (!pid_hash[i])
panic("Could not alloc pidhash!\n");
for (j = 0; j < pidhash_size; j++)
- INIT_LIST_HEAD(&pid_hash[i][j]);
+ INIT_HLIST_HEAD(&pid_hash[i][j]);
}
}
diff --git a/kernel/signal.c b/kernel/signal.c
index e5d6cbc50c1e..8b05f0b8c2dc 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -26,6 +26,8 @@
#include <asm/unistd.h>
#include <asm/siginfo.h>
+extern void k_getrusage(struct task_struct *, int, struct rusage *);
+
/*
* SLAB caches for signal bits.
*/
@@ -367,6 +369,22 @@ void __exit_signal(struct task_struct *tsk)
if (tsk == sig->curr_target)
sig->curr_target = next_thread(tsk);
tsk->signal = NULL;
+ /*
+ * Accumulate here the counters for all threads but the
+ * group leader as they die, so they can be added into
+ * the process-wide totals when those are taken.
+ * The group leader stays around as a zombie as long
+ * as there are other threads. When it gets reaped,
+ * the exit.c code will add its counts into these totals.
+ * We won't ever get here for the group leader, since it
+ * will have been the last reference on the signal_struct.
+ */
+ sig->utime += tsk->utime;
+ sig->stime += tsk->stime;
+ sig->min_flt += tsk->min_flt;
+ sig->maj_flt += tsk->maj_flt;
+ sig->nvcsw += tsk->nvcsw;
+ sig->nivcsw += tsk->nivcsw;
spin_unlock(&sighand->siglock);
sig = NULL; /* Marker for below. */
}
@@ -660,12 +678,15 @@ static void handle_stop_signal(int sig, struct task_struct *p)
* the SIGCHLD was pending on entry to this kill.
*/
p->signal->group_stop_count = 0;
+ p->signal->stop_state = 1;
+ spin_unlock(&p->sighand->siglock);
if (p->ptrace & PT_PTRACED)
do_notify_parent_cldstop(p, p->parent);
else
do_notify_parent_cldstop(
p->group_leader,
p->group_leader->real_parent);
+ spin_lock(&p->sighand->siglock);
}
rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending);
t = p;
@@ -696,6 +717,23 @@ static void handle_stop_signal(int sig, struct task_struct *p)
t = next_thread(t);
} while (t != p);
+
+ if (p->signal->stop_state > 0) {
+ /*
+ * We were in fact stopped, and are now continued.
+ * Notify the parent with CLD_CONTINUED.
+ */
+ p->signal->stop_state = -1;
+ p->signal->group_exit_code = 0;
+ spin_unlock(&p->sighand->siglock);
+ if (p->ptrace & PT_PTRACED)
+ do_notify_parent_cldstop(p, p->parent);
+ else
+ do_notify_parent_cldstop(
+ p->group_leader,
+ p->group_leader->real_parent);
+ spin_lock(&p->sighand->siglock);
+ }
}
}
@@ -1072,8 +1110,6 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
{
struct task_struct *p;
- struct list_head *l;
- struct pid *pid;
int retval, success;
if (pgrp <= 0)
@@ -1081,11 +1117,11 @@ int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
success = 0;
retval = -ESRCH;
- for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
+ do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
int err = group_send_sig_info(sig, info, p);
success |= !err;
retval = err;
- }
+ } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
return success ? 0 : retval;
}
@@ -1112,8 +1148,6 @@ int
kill_sl_info(int sig, struct siginfo *info, pid_t sid)
{
int err, retval = -EINVAL;
- struct pid *pid;
- struct list_head *l;
struct task_struct *p;
if (sid <= 0)
@@ -1121,13 +1155,13 @@ kill_sl_info(int sig, struct siginfo *info, pid_t sid)
retval = -ESRCH;
read_lock(&tasklist_lock);
- for_each_task_pid(sid, PIDTYPE_SID, p, l, pid) {
+ do_each_task_pid(sid, PIDTYPE_SID, p) {
if (!p->signal->leader)
continue;
err = group_send_sig_info(sig, info, p);
if (retval)
retval = err;
- }
+ } while_each_task_pid(sid, PIDTYPE_SID, p);
read_unlock(&tasklist_lock);
out:
return retval;
@@ -1455,8 +1489,8 @@ void do_notify_parent(struct task_struct *tsk, int sig)
if (sig == -1)
BUG();
- BUG_ON(tsk->group_leader != tsk && tsk->group_leader->state != TASK_ZOMBIE && !tsk->ptrace);
- BUG_ON(tsk->group_leader == tsk && !thread_group_empty(tsk) && !tsk->ptrace);
+ BUG_ON(!tsk->ptrace &&
+ (tsk->group_leader != tsk || !thread_group_empty(tsk)));
info.si_signo = sig;
info.si_errno = 0;
@@ -1464,8 +1498,9 @@ void do_notify_parent(struct task_struct *tsk, int sig)
info.si_uid = tsk->uid;
/* FIXME: find out whether or not this is supposed to be c*time. */
- info.si_utime = tsk->utime;
- info.si_stime = tsk->stime;
+ info.si_utime = tsk->utime + tsk->signal->utime;
+ info.si_stime = tsk->stime + tsk->signal->stime;
+ k_getrusage(tsk, RUSAGE_BOTH, &info.si_rusage);
status = tsk->exit_code & 0x7f;
why = SI_KERNEL; /* shouldn't happen */
@@ -1555,9 +1590,16 @@ do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent)
/* FIXME: find out whether or not this is supposed to be c*time. */
info.si_utime = tsk->utime;
info.si_stime = tsk->stime;
+ k_getrusage(tsk, RUSAGE_BOTH, &info.si_rusage);
- info.si_status = tsk->exit_code & 0x7f;
- info.si_code = CLD_STOPPED;
+ info.si_status = (tsk->signal ? tsk->signal->group_exit_code :
+ tsk->exit_code) & 0x7f;
+ if (info.si_status == 0) {
+ info.si_status = SIGCONT;
+ info.si_code = CLD_CONTINUED;
+ } else {
+ info.si_code = CLD_STOPPED;
+ }
sighand = parent->sighand;
spin_lock_irqsave(&sighand->siglock, flags);
@@ -1623,14 +1665,17 @@ do_signal_stop(int signr)
stop_count = --sig->group_stop_count;
current->exit_code = signr;
set_current_state(TASK_STOPPED);
+ if (stop_count == 0)
+ sig->stop_state = 1;
spin_unlock_irq(&sighand->siglock);
}
else if (thread_group_empty(current)) {
/*
* Lock must be held through transition to stopped state.
*/
- current->exit_code = signr;
+ current->exit_code = current->signal->group_exit_code = signr;
set_current_state(TASK_STOPPED);
+ sig->stop_state = 1;
spin_unlock_irq(&sighand->siglock);
}
else {
@@ -1696,6 +1741,8 @@ do_signal_stop(int signr)
current->exit_code = signr;
set_current_state(TASK_STOPPED);
+ if (stop_count == 0)
+ sig->stop_state = 1;
spin_unlock_irq(&sighand->siglock);
read_unlock(&tasklist_lock);
@@ -1736,6 +1783,8 @@ static inline int handle_group_stop(void)
* without any associated signal being in our queue.
*/
stop_count = --current->signal->group_stop_count;
+ if (stop_count == 0)
+ current->signal->stop_state = 1;
current->exit_code = current->signal->group_exit_code;
set_current_state(TASK_STOPPED);
spin_unlock_irq(&current->sighand->siglock);
@@ -2098,6 +2147,8 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
err |= __put_user(from->si_status, &to->si_status);
err |= __put_user(from->si_utime, &to->si_utime);
err |= __put_user(from->si_stime, &to->si_stime);
+ err |= __copy_to_user(&to->si_rusage, &from->si_rusage,
+ sizeof(to->si_rusage));
break;
case __SI_RT: /* This is not generated by the kernel as of now. */
case __SI_MESGQ: /* But this is */
diff --git a/kernel/sys.c b/kernel/sys.c
index 1bbc66a60b8c..a4b29df201f6 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -310,8 +310,6 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
{
struct task_struct *g, *p;
struct user_struct *user;
- struct pid *pid;
- struct list_head *l;
int error = -EINVAL;
if (which > 2 || which < 0)
@@ -336,8 +334,9 @@ asmlinkage long sys_setpriority(int which, int who, int niceval)
case PRIO_PGRP:
if (!who)
who = process_group(current);
- for_each_task_pid(who, PIDTYPE_PGID, p, l, pid)
+ do_each_task_pid(who, PIDTYPE_PGID, p) {
error = set_one_prio(p, niceval, error);
+ } while_each_task_pid(who, PIDTYPE_PGID, p);
break;
case PRIO_USER:
if (!who)
@@ -371,8 +370,6 @@ out:
asmlinkage long sys_getpriority(int which, int who)
{
struct task_struct *g, *p;
- struct list_head *l;
- struct pid *pid;
struct user_struct *user;
long niceval, retval = -ESRCH;
@@ -394,11 +391,11 @@ asmlinkage long sys_getpriority(int which, int who)
case PRIO_PGRP:
if (!who)
who = process_group(current);
- for_each_task_pid(who, PIDTYPE_PGID, p, l, pid) {
+ do_each_task_pid(who, PIDTYPE_PGID, p) {
niceval = 20 - task_nice(p);
if (niceval > retval)
retval = niceval;
- }
+ } while_each_task_pid(who, PIDTYPE_PGID, p);
break;
case PRIO_USER:
if (!who)
@@ -947,10 +944,39 @@ asmlinkage long sys_times(struct tms __user * tbuf)
*/
if (tbuf) {
struct tms tmp;
- tmp.tms_utime = jiffies_to_clock_t(current->utime);
- tmp.tms_stime = jiffies_to_clock_t(current->stime);
- tmp.tms_cutime = jiffies_to_clock_t(current->cutime);
- tmp.tms_cstime = jiffies_to_clock_t(current->cstime);
+ struct task_struct *tsk = current;
+ struct task_struct *t;
+ unsigned long utime, stime, cutime, cstime;
+
+ read_lock(&tasklist_lock);
+ utime = tsk->signal->utime;
+ stime = tsk->signal->stime;
+ t = tsk;
+ do {
+ utime += t->utime;
+ stime += t->stime;
+ t = next_thread(t);
+ } while (t != tsk);
+
+ /*
+ * While we have tasklist_lock read-locked, no dying thread
+ * can be updating current->signal->[us]time. Instead,
+ * we got their counts included in the live thread loop.
+ * However, another thread can come in right now and
+ * do a wait call that updates current->signal->c[us]time.
+ * To make sure we always see that pair updated atomically,
+ * we take the siglock around fetching them.
+ */
+ spin_lock_irq(&tsk->sighand->siglock);
+ cutime = tsk->signal->cutime;
+ cstime = tsk->signal->cstime;
+ spin_unlock_irq(&tsk->sighand->siglock);
+ read_unlock(&tasklist_lock);
+
+ tmp.tms_utime = jiffies_to_clock_t(utime);
+ tmp.tms_stime = jiffies_to_clock_t(stime);
+ tmp.tms_cutime = jiffies_to_clock_t(cutime);
+ tmp.tms_cstime = jiffies_to_clock_t(cstime);
if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
return -EFAULT;
}
@@ -1015,12 +1041,11 @@ asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
if (pgid != pid) {
struct task_struct *p;
- struct pid *pid;
- struct list_head *l;
- for_each_task_pid(pgid, PIDTYPE_PGID, p, l, pid)
+ do_each_task_pid(pgid, PIDTYPE_PGID, p) {
if (p->signal->session == current->signal->session)
goto ok_pgid;
+ } while_each_task_pid(pgid, PIDTYPE_PGID, p);
goto out;
}
@@ -1533,50 +1558,101 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
* a lot simpler! (Which we're not doing right now because we're not
* measuring them yet).
*
- * This is SMP safe. Either we are called from sys_getrusage on ourselves
- * below (we know we aren't going to exit/disappear and only we change our
- * rusage counters), or we are called from wait4() on a process which is
- * either stopped or zombied. In the zombied case the task won't get
- * reaped till shortly after the call to getrusage(), in both cases the
- * task being examined is in a frozen state so the counters won't change.
+ * This expects to be called with tasklist_lock read-locked or better,
+ * and the siglock not locked. It may momentarily take the siglock.
+ *
+ * When sampling multiple threads for RUSAGE_GROUP, under SMP we might have
+ * races with threads incrementing their own counters. But since word
+ * reads are atomic, we either get new values or old values and we don't
+ * care which for the sums. We always take the siglock to protect reading
+ * the c* fields from p->signal from races with exit.c updating those
+ * fields when reaping, so a sample either gets all the additions of a
+ * given child after it's reaped, or none so this sample is before reaping.
*/
-int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
+
+void k_getrusage(struct task_struct *p, int who, struct rusage *r)
{
- struct rusage r;
+ struct task_struct *t;
+ unsigned long flags;
+ unsigned long utime, stime;
+
+ memset((char *) r, 0, sizeof *r);
+
+ if (unlikely(!p->signal))
+ return;
- memset((char *) &r, 0, sizeof(r));
switch (who) {
case RUSAGE_SELF:
- jiffies_to_timeval(p->utime, &r.ru_utime);
- jiffies_to_timeval(p->stime, &r.ru_stime);
- r.ru_nvcsw = p->nvcsw;
- r.ru_nivcsw = p->nivcsw;
- r.ru_minflt = p->min_flt;
- r.ru_majflt = p->maj_flt;
+ jiffies_to_timeval(p->utime, &r->ru_utime);
+ jiffies_to_timeval(p->stime, &r->ru_stime);
+ r->ru_nvcsw = p->nvcsw;
+ r->ru_nivcsw = p->nivcsw;
+ r->ru_minflt = p->min_flt;
+ r->ru_majflt = p->maj_flt;
break;
case RUSAGE_CHILDREN:
- jiffies_to_timeval(p->cutime, &r.ru_utime);
- jiffies_to_timeval(p->cstime, &r.ru_stime);
- r.ru_nvcsw = p->cnvcsw;
- r.ru_nivcsw = p->cnivcsw;
- r.ru_minflt = p->cmin_flt;
- r.ru_majflt = p->cmaj_flt;
+ spin_lock_irqsave(&p->sighand->siglock, flags);
+ utime = p->signal->cutime;
+ stime = p->signal->cstime;
+ r->ru_nvcsw = p->signal->cnvcsw;
+ r->ru_nivcsw = p->signal->cnivcsw;
+ r->ru_minflt = p->signal->cmin_flt;
+ r->ru_majflt = p->signal->cmaj_flt;
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ jiffies_to_timeval(utime, &r->ru_utime);
+ jiffies_to_timeval(stime, &r->ru_stime);
break;
- default:
- jiffies_to_timeval(p->utime + p->cutime, &r.ru_utime);
- jiffies_to_timeval(p->stime + p->cstime, &r.ru_stime);
- r.ru_nvcsw = p->nvcsw + p->cnvcsw;
- r.ru_nivcsw = p->nivcsw + p->cnivcsw;
- r.ru_minflt = p->min_flt + p->cmin_flt;
- r.ru_majflt = p->maj_flt + p->cmaj_flt;
+ case RUSAGE_GROUP:
+ spin_lock_irqsave(&p->sighand->siglock, flags);
+ utime = stime = 0;
+ goto sum_group;
+ case RUSAGE_BOTH:
+ spin_lock_irqsave(&p->sighand->siglock, flags);
+ utime = p->signal->cutime;
+ stime = p->signal->cstime;
+ r->ru_nvcsw = p->signal->cnvcsw;
+ r->ru_nivcsw = p->signal->cnivcsw;
+ r->ru_minflt = p->signal->cmin_flt;
+ r->ru_majflt = p->signal->cmaj_flt;
+ sum_group:
+ utime += p->signal->utime;
+ stime += p->signal->stime;
+ r->ru_nvcsw += p->signal->nvcsw;
+ r->ru_nivcsw += p->signal->nivcsw;
+ r->ru_minflt += p->signal->min_flt;
+ r->ru_majflt += p->signal->maj_flt;
+ t = p;
+ do {
+ utime += t->utime;
+ stime += t->stime;
+ r->ru_nvcsw += t->nvcsw;
+ r->ru_nivcsw += t->nivcsw;
+ r->ru_minflt += t->min_flt;
+ r->ru_majflt += t->maj_flt;
+ t = next_thread(t);
+ } while (t != p);
+ spin_unlock_irqrestore(&p->sighand->siglock, flags);
+ jiffies_to_timeval(utime, &r->ru_utime);
+ jiffies_to_timeval(stime, &r->ru_stime);
break;
+ default:
+ BUG();
}
+}
+
+int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
+{
+ struct rusage r;
+ read_lock(&tasklist_lock);
+ k_getrusage(p, who, &r);
+ read_unlock(&tasklist_lock);
return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
}
asmlinkage long sys_getrusage(int who, struct rusage __user *ru)
{
- if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
+ if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN
+ && who != RUSAGE_GROUP)
return -EINVAL;
return getrusage(current, who, ru);
}