summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2005-01-17 20:28:07 -0800
committerTony Luck <tony.luck@intel.com>2005-01-17 20:28:07 -0800
commit9790ee6f6a30db6b9fd4f27e437b87fa8e511cdd (patch)
tree5283a41fba9fe6a85c0ed186ad0469dd0cc04323 /kernel
parentbcb39c06eefc908121323c23ead2fdd246585ad3 (diff)
parentfdefff6242ce95e350570a7f65e1ff49bc3a66b6 (diff)
Merge ia64 test tree back into release tree.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c33
-rw-r--r--kernel/audit.c10
-rw-r--r--kernel/auditsc.c2
-rw-r--r--kernel/capability.c67
-rw-r--r--kernel/compat.c140
-rw-r--r--kernel/cpu.c7
-rw-r--r--kernel/dma.c2
-rw-r--r--kernel/exec_domain.c2
-rw-r--r--kernel/exit.c77
-rw-r--r--kernel/fork.c47
-rw-r--r--kernel/intermodule.c2
-rw-r--r--kernel/irq/autoprobe.c1
-rw-r--r--kernel/irq/handle.c11
-rw-r--r--kernel/itimer.c57
-rw-r--r--kernel/kallsyms.c7
-rw-r--r--kernel/kprobes.c2
-rw-r--r--kernel/ksysfs.c3
-rw-r--r--kernel/kthread.c25
-rw-r--r--kernel/module.c125
-rw-r--r--kernel/params.c206
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/posix-timers.c2
-rw-r--r--kernel/power/Kconfig2
-rw-r--r--kernel/power/disk.c14
-rw-r--r--kernel/power/main.c9
-rw-r--r--kernel/power/swsusp.c30
-rw-r--r--kernel/printk.c10
-rw-r--r--kernel/profile.c14
-rw-r--r--kernel/ptrace.c79
-rw-r--r--kernel/rcupdate.c45
-rw-r--r--kernel/resource.c2
-rw-r--r--kernel/sched.c854
-rw-r--r--kernel/signal.c219
-rw-r--r--kernel/softirq.c20
-rw-r--r--kernel/spinlock.c237
-rw-r--r--kernel/stop_machine.c4
-rw-r--r--kernel/sys.c48
-rw-r--r--kernel/sysctl.c5
-rw-r--r--kernel/time.c53
-rw-r--r--kernel/timer.c78
-rw-r--r--kernel/user.c2
-rw-r--r--kernel/workqueue.c6
42 files changed, 1529 insertions, 1032 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 4a3bd224a836..32e39accbb86 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -528,3 +528,36 @@ void acct_process(long exitcode)
do_acct_process(exitcode, file);
fput(file);
}
+
+
+/*
+ * acct_update_integrals
+ * - update mm integral fields in task_struct
+ */
+void acct_update_integrals(void)
+{
+ struct task_struct *tsk = current;
+
+ if (likely(tsk->mm)) {
+ long delta = tsk->stime - tsk->acct_stimexpd;
+
+ if (delta == 0)
+ return;
+ tsk->acct_stimexpd = tsk->stime;
+ tsk->acct_rss_mem1 += delta * tsk->mm->rss;
+ tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
+ }
+}
+
+/*
+ * acct_clear_integrals
+ * - clear the mm integral fields in task_struct
+ */
+void acct_clear_integrals(struct task_struct *tsk)
+{
+ if (tsk) {
+ tsk->acct_stimexpd = 0;
+ tsk->acct_rss_mem1 = 0;
+ tsk->acct_vm_mem1 = 0;
+ }
+}
diff --git a/kernel/audit.c b/kernel/audit.c
index d813b7aa4b4c..e21f947bacf2 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -98,8 +98,8 @@ static struct sock *audit_sock;
* The second list is a list of pre-allocated audit buffers (if more
* than AUDIT_MAXFREE are in use, the audit buffer is freed instead of
* being placed on the freelist). */
-static spinlock_t audit_txlist_lock = SPIN_LOCK_UNLOCKED;
-static spinlock_t audit_freelist_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(audit_txlist_lock);
+static DEFINE_SPINLOCK(audit_freelist_lock);
static int audit_freelist_count = 0;
static LIST_HEAD(audit_txlist);
static LIST_HEAD(audit_freelist);
@@ -169,7 +169,7 @@ static inline int audit_rate_check(void)
{
static unsigned long last_check = 0;
static int messages = 0;
- static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+ static DEFINE_SPINLOCK(lock);
unsigned long flags;
unsigned long now;
unsigned long elapsed;
@@ -199,7 +199,7 @@ static inline int audit_rate_check(void)
void audit_log_lost(const char *message)
{
static unsigned long last_msg = 0;
- static spinlock_t lock = SPIN_LOCK_UNLOCKED;
+ static DEFINE_SPINLOCK(lock);
unsigned long flags;
unsigned long now;
int print;
@@ -419,7 +419,7 @@ static int audit_receive_skb(struct sk_buff *skb)
if (rlen > skb->len)
rlen = skb->len;
if ((err = audit_receive_msg(skb, nlh))) {
- netlink_ack(skb, nlh, -err);
+ netlink_ack(skb, nlh, err);
} else if (nlh->nlmsg_flags & NLM_F_ACK)
netlink_ack(skb, nlh, 0);
skb_pull(skb, rlen);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 0962944e8357..aa617bbe49b0 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -591,7 +591,7 @@ static void audit_log_exit(struct audit_context *context)
if (context->personality != PER_LINUX)
audit_log_format(ab, " per=%lx", context->personality);
if (context->return_valid)
- audit_log_format(ab, " exit=%u", context->return_code);
+ audit_log_format(ab, " exit=%d", context->return_code);
audit_log_format(ab,
" a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
" pid=%d loginuid=%d uid=%d gid=%d"
diff --git a/kernel/capability.c b/kernel/capability.c
index 7800a5066c0f..b828d545a97b 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -23,7 +23,7 @@ EXPORT_SYMBOL(cap_bset);
* This global lock protects task->cap_* for all tasks including current.
* Locking rule: acquire this prior to tasklist_lock.
*/
-spinlock_t task_capability_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(task_capability_lock);
/*
* For sys_getproccap() and sys_setproccap(), any of the three
@@ -85,34 +85,60 @@ out:
* cap_set_pg - set capabilities for all processes in a given process
* group. We call this holding task_capability_lock and tasklist_lock.
*/
-static inline void cap_set_pg(int pgrp, kernel_cap_t *effective,
+static inline int cap_set_pg(int pgrp, kernel_cap_t *effective,
kernel_cap_t *inheritable,
kernel_cap_t *permitted)
{
task_t *g, *target;
+ int ret = -EPERM;
+ int found = 0;
do_each_task_pid(pgrp, PIDTYPE_PGID, g) {
target = g;
- while_each_thread(g, target)
- security_capset_set(target, effective, inheritable, permitted);
+ while_each_thread(g, target) {
+ if (!security_capset_check(target, effective,
+ inheritable,
+ permitted)) {
+ security_capset_set(target, effective,
+ inheritable,
+ permitted);
+ ret = 0;
+ }
+ found = 1;
+ }
} while_each_task_pid(pgrp, PIDTYPE_PGID, g);
+
+ if (!found)
+ ret = 0;
+ return ret;
}
/*
* cap_set_all - set capabilities for all processes other than init
* and self. We call this holding task_capability_lock and tasklist_lock.
*/
-static inline void cap_set_all(kernel_cap_t *effective,
+static inline int cap_set_all(kernel_cap_t *effective,
kernel_cap_t *inheritable,
kernel_cap_t *permitted)
{
task_t *g, *target;
+ int ret = -EPERM;
+ int found = 0;
do_each_thread(g, target) {
if (target == current || target->pid == 1)
continue;
+ found = 1;
+ if (security_capset_check(target, effective, inheritable,
+ permitted))
+ continue;
+ ret = 0;
security_capset_set(target, effective, inheritable, permitted);
} while_each_thread(g, target);
+
+ if (!found)
+ ret = 0;
+ return ret;
}
/*
@@ -147,7 +173,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
if (get_user(pid, &header->pid))
return -EFAULT;
- if (pid && !capable(CAP_SETPCAP))
+ if (pid && pid != current->pid && !capable(CAP_SETPCAP))
return -EPERM;
if (copy_from_user(&effective, &data->effective, sizeof(effective)) ||
@@ -167,36 +193,23 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
} else
target = current;
- ret = -EPERM;
-
- if (security_capset_check(target, &effective, &inheritable, &permitted))
- goto out;
-
- if (!cap_issubset(inheritable, cap_combine(target->cap_inheritable,
- current->cap_permitted)))
- goto out;
-
- /* verify restrictions on target's new Permitted set */
- if (!cap_issubset(permitted, cap_combine(target->cap_permitted,
- current->cap_permitted)))
- goto out;
-
- /* verify the _new_Effective_ is a subset of the _new_Permitted_ */
- if (!cap_issubset(effective, permitted))
- goto out;
-
ret = 0;
/* having verified that the proposed changes are legal,
we now put them into effect. */
if (pid < 0) {
if (pid == -1) /* all procs other than current and init */
- cap_set_all(&effective, &inheritable, &permitted);
+ ret = cap_set_all(&effective, &inheritable, &permitted);
else /* all procs in process group */
- cap_set_pg(-pid, &effective, &inheritable, &permitted);
+ ret = cap_set_pg(-pid, &effective, &inheritable,
+ &permitted);
} else {
- security_capset_set(target, &effective, &inheritable, &permitted);
+ ret = security_capset_check(target, &effective, &inheritable,
+ &permitted);
+ if (!ret)
+ security_capset_set(target, &effective, &inheritable,
+ &permitted);
}
out:
diff --git a/kernel/compat.c b/kernel/compat.c
index 672310635347..f14fbde52bb3 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -20,6 +20,7 @@
#include <linux/futex.h> /* for FUTEX_WAIT */
#include <linux/syscalls.h>
#include <linux/unistd.h>
+#include <linux/security.h>
#include <asm/uaccess.h>
@@ -162,15 +163,15 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
struct compat_tms tmp;
struct task_struct *tsk = current;
struct task_struct *t;
- unsigned long utime, stime, cutime, cstime;
+ cputime_t utime, stime, cutime, cstime;
read_lock(&tasklist_lock);
utime = tsk->signal->utime;
stime = tsk->signal->stime;
t = tsk;
do {
- utime += t->utime;
- stime += t->stime;
+ utime = cputime_add(utime, t->utime);
+ stime = cputime_add(stime, t->stime);
t = next_thread(t);
} while (t != tsk);
@@ -189,10 +190,10 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
spin_unlock_irq(&tsk->sighand->siglock);
read_unlock(&tasklist_lock);
- tmp.tms_utime = compat_jiffies_to_clock_t(utime);
- tmp.tms_stime = compat_jiffies_to_clock_t(stime);
- tmp.tms_cutime = compat_jiffies_to_clock_t(cutime);
- tmp.tms_cstime = compat_jiffies_to_clock_t(cstime);
+ tmp.tms_utime = compat_jiffies_to_clock_t(cputime_to_jiffies(utime));
+ tmp.tms_stime = compat_jiffies_to_clock_t(cputime_to_jiffies(stime));
+ tmp.tms_cutime = compat_jiffies_to_clock_t(cputime_to_jiffies(cutime));
+ tmp.tms_cstime = compat_jiffies_to_clock_t(cputime_to_jiffies(cstime));
if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
return -EFAULT;
}
@@ -680,3 +681,128 @@ long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
return 0;
}
+
+void
+sigset_from_compat (sigset_t *set, compat_sigset_t *compat)
+{
+ switch (_NSIG_WORDS) {
+#if defined (__COMPAT_ENDIAN_SWAP__)
+ case 4: set->sig[3] = compat->sig[7] | (((long)compat->sig[6]) << 32 );
+ case 3: set->sig[2] = compat->sig[5] | (((long)compat->sig[4]) << 32 );
+ case 2: set->sig[1] = compat->sig[3] | (((long)compat->sig[2]) << 32 );
+ case 1: set->sig[0] = compat->sig[1] | (((long)compat->sig[0]) << 32 );
+#else
+ case 4: set->sig[3] = compat->sig[6] | (((long)compat->sig[7]) << 32 );
+ case 3: set->sig[2] = compat->sig[4] | (((long)compat->sig[5]) << 32 );
+ case 2: set->sig[1] = compat->sig[2] | (((long)compat->sig[3]) << 32 );
+ case 1: set->sig[0] = compat->sig[0] | (((long)compat->sig[1]) << 32 );
+#endif
+ }
+}
+
+asmlinkage long
+compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
+ struct compat_siginfo __user *uinfo,
+ struct compat_timespec __user *uts, compat_size_t sigsetsize)
+{
+ compat_sigset_t s32;
+ sigset_t s;
+ int sig;
+ struct timespec t;
+ siginfo_t info;
+ long ret, timeout = 0;
+
+ if (sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+
+ if (copy_from_user(&s32, uthese, sizeof(compat_sigset_t)))
+ return -EFAULT;
+ sigset_from_compat(&s, &s32);
+ sigdelsetmask(&s,sigmask(SIGKILL)|sigmask(SIGSTOP));
+ signotset(&s);
+
+ if (uts) {
+ if (get_compat_timespec (&t, uts))
+ return -EFAULT;
+ if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0
+ || t.tv_sec < 0)
+ return -EINVAL;
+ }
+
+ spin_lock_irq(&current->sighand->siglock);
+ sig = dequeue_signal(current, &s, &info);
+ if (!sig) {
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ if (uts)
+ timeout = timespec_to_jiffies(&t)
+ +(t.tv_sec || t.tv_nsec);
+ if (timeout) {
+ current->real_blocked = current->blocked;
+ sigandsets(&current->blocked, &current->blocked, &s);
+
+ recalc_sigpending();
+ spin_unlock_irq(&current->sighand->siglock);
+
+ current->state = TASK_INTERRUPTIBLE;
+ timeout = schedule_timeout(timeout);
+
+ spin_lock_irq(&current->sighand->siglock);
+ sig = dequeue_signal(current, &s, &info);
+ current->blocked = current->real_blocked;
+ siginitset(&current->real_blocked, 0);
+ recalc_sigpending();
+ }
+ }
+ spin_unlock_irq(&current->sighand->siglock);
+
+ if (sig) {
+ ret = sig;
+ if (uinfo) {
+ if (copy_siginfo_to_user32(uinfo, &info))
+ ret = -EFAULT;
+ }
+ }else {
+ ret = timeout?-EINTR:-EAGAIN;
+ }
+ return ret;
+
+}
+
+#ifdef __ARCH_WANT_COMPAT_SYS_TIME
+
+/* compat_time_t is a 32 bit "long" and needs to get converted. */
+
+asmlinkage long compat_sys_time(compat_time_t __user * tloc)
+{
+ compat_time_t i;
+ struct timeval tv;
+
+ do_gettimeofday(&tv);
+ i = tv.tv_sec;
+
+ if (tloc) {
+ if (put_user(i,tloc))
+ i = -EFAULT;
+ }
+ return i;
+}
+
+asmlinkage long compat_sys_stime(compat_time_t __user *tptr)
+{
+ struct timespec tv;
+ int err;
+
+ if (get_user(tv.tv_sec, tptr))
+ return -EFAULT;
+
+ tv.tv_nsec = 0;
+
+ err = security_settime(&tv, NULL);
+ if (err)
+ return err;
+
+ do_settimeofday(&tv);
+ return 0;
+}
+
+#endif /* __ARCH_WANT_COMPAT_SYS_TIME */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index ebaba873ebad..628f4ccda127 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -48,7 +48,9 @@ static inline void check_for_tasks(int cpu)
write_lock_irq(&tasklist_lock);
for_each_process(p) {
- if (task_cpu(p) == cpu && (p->utime != 0 || p->stime != 0))
+ if (task_cpu(p) == cpu &&
+ (!cputime_eq(p->utime, cputime_zero) ||
+ !cputime_eq(p->stime, cputime_zero)))
printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
(state = %ld, flags = %lx) \n",
p->comm, p->pid, cpu, p->state, p->flags);
@@ -132,7 +134,8 @@ int cpu_down(unsigned int cpu)
__cpu_die(cpu);
/* Move it here so it can run. */
- kthread_bind(p, smp_processor_id());
+ kthread_bind(p, get_cpu());
+ put_cpu();
/* CPU is completely dead: tell everyone. Too late to complain. */
if (notifier_call_chain(&cpu_chain, CPU_DEAD, (void *)(long)cpu)
diff --git a/kernel/dma.c b/kernel/dma.c
index 940d02c50879..aef0a45b7893 100644
--- a/kernel/dma.c
+++ b/kernel/dma.c
@@ -38,7 +38,7 @@
*/
-spinlock_t dma_spin_lock = SPIN_LOCK_UNLOCKED;
+DEFINE_SPINLOCK(dma_spin_lock);
/*
* If our port doesn't define this it has no PC like DMA
diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c
index ad3e5d54e119..867d6dbeb574 100644
--- a/kernel/exec_domain.c
+++ b/kernel/exec_domain.c
@@ -22,7 +22,7 @@
static void default_handler(int, struct pt_regs *);
static struct exec_domain *exec_domains = &default_exec_domain;
-static rwlock_t exec_domains_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(exec_domains_lock);
static u_long ident_map[32] = {
diff --git a/kernel/exit.c b/kernel/exit.c
index 64bc9502cd1c..3171228f25c3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -159,7 +159,7 @@ static int will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
if (p == ignored_task
- || p->exit_state >= EXIT_ZOMBIE
+ || p->exit_state
|| p->real_parent->pid == 1)
continue;
if (process_group(p->real_parent) != pgrp
@@ -332,7 +332,9 @@ void daemonize(const char *name, ...)
exit_mm(current);
set_special_pids(1, 1);
+ down(&tty_sem);
current->signal->tty = NULL;
+ up(&tty_sem);
/* Block and flush all signals */
sigfillset(&blocked);
@@ -470,7 +472,7 @@ EXPORT_SYMBOL_GPL(exit_fs);
* Turn us into a lazy TLB process if we
* aren't already..
*/
-static inline void __exit_mm(struct task_struct * tsk)
+void exit_mm(struct task_struct * tsk)
{
struct mm_struct *mm = tsk->mm;
@@ -506,18 +508,13 @@ static inline void __exit_mm(struct task_struct * tsk)
mmput(mm);
}
-void exit_mm(struct task_struct *tsk)
-{
- __exit_mm(tsk);
-}
-
static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
{
/*
* Make sure we're not reparenting to ourselves and that
* the parent is not a zombie.
*/
- BUG_ON(p == reaper || reaper->state >= EXIT_ZOMBIE || reaper->exit_state >= EXIT_ZOMBIE);
+ BUG_ON(p == reaper || reaper->exit_state >= EXIT_ZOMBIE);
p->real_parent = reaper;
if (p->parent == p->real_parent)
BUG();
@@ -560,7 +557,7 @@ static inline void reparent_thread(task_t *p, task_t *father, int traced)
* a normal stop since it's no longer being
* traced.
*/
- p->state = TASK_STOPPED;
+ ptrace_untrace(p);
}
}
@@ -599,7 +596,7 @@ static inline void forget_original_parent(struct task_struct * father,
reaper = child_reaper;
break;
}
- } while (reaper->exit_state >= EXIT_ZOMBIE);
+ } while (reaper->exit_state);
/*
* There are only two places where our children can be:
@@ -656,7 +653,7 @@ static void exit_notify(struct task_struct *tsk)
struct task_struct *t;
struct list_head ptrace_dead, *_p, *_n;
- if (signal_pending(tsk) && !tsk->signal->group_exit
+ if (signal_pending(tsk) && !(tsk->signal->flags & SIGNAL_GROUP_EXIT)
&& !thread_group_empty(tsk)) {
/*
* This occurs when there was a race between our exit
@@ -750,7 +747,9 @@ static void exit_notify(struct task_struct *tsk)
}
state = EXIT_ZOMBIE;
- if (tsk->exit_signal == -1 && tsk->ptrace == 0)
+ if (tsk->exit_signal == -1 &&
+ (likely(tsk->ptrace == 0) ||
+ unlikely(tsk->parent->signal->flags & SIGNAL_GROUP_EXIT)))
state = EXIT_DEAD;
tsk->exit_state = state;
@@ -758,8 +757,8 @@ static void exit_notify(struct task_struct *tsk)
* Clear these here so that update_process_times() won't try to deliver
* itimer, profile or rlimit signals to this task while it is in late exit.
*/
- tsk->it_virt_value = 0;
- tsk->it_prof_value = 0;
+ tsk->it_virt_value = cputime_zero;
+ tsk->it_prof_value = cputime_zero;
write_unlock_irq(&tasklist_lock);
@@ -793,6 +792,12 @@ fastcall NORET_TYPE void do_exit(long code)
panic("Attempted to kill init!");
if (tsk->io_context)
exit_io_context();
+
+ if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
+ current->ptrace_message = code;
+ ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
+ }
+
tsk->flags |= PF_EXITING;
del_timer_sync(&tsk->real_timer);
@@ -801,15 +806,12 @@ fastcall NORET_TYPE void do_exit(long code)
current->comm, current->pid,
preempt_count());
- if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
- current->ptrace_message = code;
- ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
- }
-
+ acct_update_integrals();
+ update_mem_hiwater();
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead)
acct_process(code);
- __exit_mm(tsk);
+ exit_mm(tsk);
exit_sem(tsk);
__exit_files(tsk);
@@ -877,18 +879,18 @@ do_group_exit(int exit_code)
{
BUG_ON(exit_code & 0x80); /* core dumps don't get here */
- if (current->signal->group_exit)
+ if (current->signal->flags & SIGNAL_GROUP_EXIT)
exit_code = current->signal->group_exit_code;
else if (!thread_group_empty(current)) {
struct signal_struct *const sig = current->signal;
struct sighand_struct *const sighand = current->sighand;
read_lock(&tasklist_lock);
spin_lock_irq(&sighand->siglock);
- if (sig->group_exit)
+ if (sig->flags & SIGNAL_GROUP_EXIT)
/* Another thread got here before we took the lock. */
exit_code = sig->group_exit_code;
else {
- sig->group_exit = 1;
+ sig->flags = SIGNAL_GROUP_EXIT;
sig->group_exit_code = exit_code;
zap_other_threads(current);
}
@@ -1046,10 +1048,16 @@ static int wait_task_zombie(task_t *p, int noreap,
* here reaping other children at the same time.
*/
spin_lock_irq(&p->parent->sighand->siglock);
- p->parent->signal->cutime +=
- p->utime + p->signal->utime + p->signal->cutime;
- p->parent->signal->cstime +=
- p->stime + p->signal->stime + p->signal->cstime;
+ p->parent->signal->cutime =
+ cputime_add(p->parent->signal->cutime,
+ cputime_add(p->utime,
+ cputime_add(p->signal->utime,
+ p->signal->cutime)));
+ p->parent->signal->cstime =
+ cputime_add(p->parent->signal->cstime,
+ cputime_add(p->stime,
+ cputime_add(p->signal->stime,
+ p->signal->cstime)));
p->parent->signal->cmin_flt +=
p->min_flt + p->signal->min_flt + p->signal->cmin_flt;
p->parent->signal->cmaj_flt +=
@@ -1068,7 +1076,7 @@ static int wait_task_zombie(task_t *p, int noreap,
read_unlock(&tasklist_lock);
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
- status = p->signal->group_exit
+ status = (p->signal->flags & SIGNAL_GROUP_EXIT)
? p->signal->group_exit_code : p->exit_code;
if (!retval && stat_addr)
retval = put_user(status, stat_addr);
@@ -1180,7 +1188,7 @@ static int wait_task_stopped(task_t *p, int delayed_group_leader, int noreap,
* race with the EXIT_ZOMBIE case.
*/
exit_code = xchg(&p->exit_code, 0);
- if (unlikely(p->exit_state >= EXIT_ZOMBIE)) {
+ if (unlikely(p->exit_state)) {
/*
* The task resumed and then died. Let the next iteration
* catch it in EXIT_ZOMBIE. Note that exit_code might
@@ -1258,16 +1266,17 @@ static int wait_task_continued(task_t *p, int noreap,
if (unlikely(!p->signal))
return 0;
- if (p->signal->stop_state >= 0)
+ if (!(p->signal->flags & SIGNAL_STOP_CONTINUED))
return 0;
spin_lock_irq(&p->sighand->siglock);
- if (p->signal->stop_state >= 0) { /* Re-check with the lock held. */
+ /* Re-check with the lock held. */
+ if (!(p->signal->flags & SIGNAL_STOP_CONTINUED)) {
spin_unlock_irq(&p->sighand->siglock);
return 0;
}
if (!noreap)
- p->signal->stop_state = 0;
+ p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
spin_unlock_irq(&p->sighand->siglock);
pid = p->pid;
@@ -1316,7 +1325,7 @@ static long do_wait(pid_t pid, int options, struct siginfo __user *infop,
struct task_struct *tsk;
int flag, retval;
- add_wait_queue(&current->wait_chldexit,&wait);
+ add_wait_queue(&current->signal->wait_chldexit,&wait);
repeat:
/*
* We will set this flag if we see any child that might later
@@ -1430,7 +1439,7 @@ check_continued:
retval = -ECHILD;
end:
current->state = TASK_RUNNING;
- remove_wait_queue(&current->wait_chldexit,&wait);
+ remove_wait_queue(&current->signal->wait_chldexit,&wait);
if (infop) {
if (retval > 0)
retval = 0;
diff --git a/kernel/fork.c b/kernel/fork.c
index 84252e055db4..be1ff8ddbb9c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -39,6 +39,7 @@
#include <linux/audit.h>
#include <linux/profile.h>
#include <linux/rmap.h>
+#include <linux/acct.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -47,17 +48,17 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
-/* The idle threads do not count..
- * Protected by write_lock_irq(&tasklist_lock)
+/*
+ * Protected counters by write_lock_irq(&tasklist_lock)
*/
-int nr_threads;
-
-int max_threads;
unsigned long total_forks; /* Handle normal Linux uptimes. */
+int nr_threads; /* The idle threads do not count.. */
+
+int max_threads; /* tunable limit on nr_threads */
DEFINE_PER_CPU(unsigned long, process_counts) = 0;
-rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
+ __cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
EXPORT_SYMBOL(tasklist_lock);
@@ -218,6 +219,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
/* insert tmp into the share list, just after mpnt */
spin_lock(&file->f_mapping->i_mmap_lock);
+ tmp->vm_truncate_count = mpnt->vm_truncate_count;
flush_dcache_mmap_lock(file->f_mapping);
vma_prio_tree_add(tmp, mpnt);
flush_dcache_mmap_unlock(file->f_mapping);
@@ -279,7 +281,7 @@ static inline void mm_free_pgd(struct mm_struct * mm)
#define mm_free_pgd(mm)
#endif /* CONFIG_MMU */
-spinlock_t mmlist_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
#define allocate_mm() (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
@@ -469,6 +471,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
if (retval)
goto free_pt;
+ mm->hiwater_rss = mm->rss;
+ mm->hiwater_vm = mm->total_vm;
+
good_mm:
tsk->mm = mm;
tsk->active_mm = mm;
@@ -729,11 +734,11 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
return -ENOMEM;
atomic_set(&sig->count, 1);
atomic_set(&sig->live, 1);
- sig->group_exit = 0;
+ init_waitqueue_head(&sig->wait_chldexit);
+ sig->flags = 0;
sig->group_exit_code = 0;
sig->group_exit_task = NULL;
sig->group_stop_count = 0;
- sig->stop_state = 0;
sig->curr_target = NULL;
init_sigpending(&sig->shared_pending);
INIT_LIST_HEAD(&sig->posix_timers);
@@ -744,7 +749,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
sig->leader = 0; /* session leadership doesn't inherit */
sig->tty_old_pgrp = 0;
- sig->utime = sig->stime = sig->cutime = sig->cstime = 0;
+ sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
@@ -857,7 +862,6 @@ static task_t *copy_process(unsigned long clone_flags,
INIT_LIST_HEAD(&p->children);
INIT_LIST_HEAD(&p->sibling);
- init_waitqueue_head(&p->wait_chldexit);
p->vfork_done = NULL;
spin_lock_init(&p->alloc_lock);
spin_lock_init(&p->proc_lock);
@@ -865,12 +869,23 @@ static task_t *copy_process(unsigned long clone_flags,
clear_tsk_thread_flag(p, TIF_SIGPENDING);
init_sigpending(&p->pending);
- p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
- p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
+ p->it_real_value = 0;
+ p->it_real_incr = 0;
+ p->it_virt_value = cputime_zero;
+ p->it_virt_incr = cputime_zero;
+ p->it_prof_value = cputime_zero;
+ p->it_prof_incr = cputime_zero;
init_timer(&p->real_timer);
p->real_timer.data = (unsigned long) p;
- p->utime = p->stime = 0;
+ p->utime = cputime_zero;
+ p->stime = cputime_zero;
+ p->rchar = 0; /* I/O counter: bytes read */
+ p->wchar = 0; /* I/O counter: bytes written */
+ p->syscr = 0; /* I/O counter: read syscalls */
+ p->syscw = 0; /* I/O counter: write syscalls */
+ acct_clear_integrals(p);
+
p->lock_depth = -1; /* -1 = no lock */
do_posix_clock_monotonic_gettime(&p->start_time);
p->security = NULL;
@@ -985,7 +1000,7 @@ static task_t *copy_process(unsigned long clone_flags,
* do not create this new thread - the whole thread
* group is supposed to exit anyway.
*/
- if (current->signal->group_exit) {
+ if (current->signal->flags & SIGNAL_GROUP_EXIT) {
spin_unlock(&current->sighand->siglock);
write_unlock_irq(&tasklist_lock);
retval = -EAGAIN;
@@ -1020,6 +1035,7 @@ static task_t *copy_process(unsigned long clone_flags,
}
nr_threads++;
+ total_forks++;
write_unlock_irq(&tasklist_lock);
retval = 0;
@@ -1152,7 +1168,6 @@ long do_fork(unsigned long clone_flags,
wake_up_new_task(p, clone_flags);
else
p->state = TASK_STOPPED;
- ++total_forks;
if (unlikely (trace)) {
current->ptrace_message = pid;
diff --git a/kernel/intermodule.c b/kernel/intermodule.c
index 09f556507f57..9078649382cf 100644
--- a/kernel/intermodule.c
+++ b/kernel/intermodule.c
@@ -14,7 +14,7 @@
*/
static struct list_head ime_list = LIST_HEAD_INIT(ime_list);
-static spinlock_t ime_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(ime_lock);
static int kmalloc_failed;
struct inter_module_entry {
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 16818726cd21..98d62d8efeaf 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -137,6 +137,7 @@ unsigned int probe_irq_mask(unsigned long val)
return mask & val;
}
+EXPORT_SYMBOL(probe_irq_mask);
/**
* probe_irq_off - end an interrupt autodetect
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index ebc25823b73d..2fb0e46e11f3 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -73,17 +73,6 @@ irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs)
}
/*
- * Exit an interrupt context. Process softirqs if needed and possible:
- */
-void irq_exit(void)
-{
- preempt_count() -= IRQ_EXIT_OFFSET;
- if (!in_interrupt() && local_softirq_pending())
- do_softirq();
- preempt_enable_no_resched();
-}
-
-/*
* Have got an event to handle:
*/
fastcall int handle_IRQ_event(unsigned int irq, struct pt_regs *regs,
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 95fbf1c6becf..e1743c563206 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -16,11 +16,10 @@
int do_getitimer(int which, struct itimerval *value)
{
- register unsigned long val, interval;
+ register unsigned long val;
switch (which) {
case ITIMER_REAL:
- interval = current->it_real_incr;
val = 0;
/*
* FIXME! This needs to be atomic, in case the kernel timer happens!
@@ -32,20 +31,20 @@ int do_getitimer(int which, struct itimerval *value)
if ((long) val <= 0)
val = 1;
}
+ jiffies_to_timeval(val, &value->it_value);
+ jiffies_to_timeval(current->it_real_incr, &value->it_interval);
break;
case ITIMER_VIRTUAL:
- val = current->it_virt_value;
- interval = current->it_virt_incr;
+ cputime_to_timeval(current->it_virt_value, &value->it_value);
+ cputime_to_timeval(current->it_virt_incr, &value->it_interval);
break;
case ITIMER_PROF:
- val = current->it_prof_value;
- interval = current->it_prof_incr;
+ cputime_to_timeval(current->it_prof_value, &value->it_value);
+ cputime_to_timeval(current->it_prof_incr, &value->it_interval);
break;
default:
return(-EINVAL);
}
- jiffies_to_timeval(val, &value->it_value);
- jiffies_to_timeval(interval, &value->it_interval);
return 0;
}
@@ -81,37 +80,43 @@ void it_real_fn(unsigned long __data)
int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
{
- register unsigned long i, j;
+ unsigned long expire;
+ cputime_t cputime;
int k;
- i = timeval_to_jiffies(&value->it_interval);
- j = timeval_to_jiffies(&value->it_value);
if (ovalue && (k = do_getitimer(which, ovalue)) < 0)
return k;
switch (which) {
case ITIMER_REAL:
del_timer_sync(&current->real_timer);
- current->it_real_value = j;
- current->it_real_incr = i;
- if (!j)
+ expire = timeval_to_jiffies(&value->it_value);
+ current->it_real_value = expire;
+ current->it_real_incr =
+ timeval_to_jiffies(&value->it_interval);
+ if (!expire)
break;
- if (j > (unsigned long) LONG_MAX)
- j = LONG_MAX;
- i = j + jiffies;
- current->real_timer.expires = i;
+ if (expire > (unsigned long) LONG_MAX)
+ expire = LONG_MAX;
+ current->real_timer.expires = jiffies + expire;
add_timer(&current->real_timer);
break;
case ITIMER_VIRTUAL:
- if (j)
- j++;
- current->it_virt_value = j;
- current->it_virt_incr = i;
+ cputime = timeval_to_cputime(&value->it_value);
+ if (cputime_gt(cputime, cputime_zero))
+ cputime = cputime_add(cputime,
+ jiffies_to_cputime(1));
+ current->it_virt_value = cputime;
+ cputime = timeval_to_cputime(&value->it_interval);
+ current->it_virt_incr = cputime;
break;
case ITIMER_PROF:
- if (j)
- j++;
- current->it_prof_value = j;
- current->it_prof_incr = i;
+ cputime = timeval_to_cputime(&value->it_value);
+ if (cputime_gt(cputime, cputime_zero))
+ cputime = cputime_add(cputime,
+ jiffies_to_cputime(1));
+ current->it_prof_value = cputime;
+ cputime = timeval_to_cputime(&value->it_interval);
+ current->it_prof_incr = cputime;
break;
default:
return -EINVAL;
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 8db13f565ed9..315751c2b09b 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -20,6 +20,8 @@
#include <linux/proc_fs.h>
#include <linux/mm.h>
+#include <asm/sections.h>
+
#ifdef CONFIG_KALLSYMS_ALL
#define all_var 1
#else
@@ -28,7 +30,7 @@
/* These will be re-linked against their real values during the second link stage */
extern unsigned long kallsyms_addresses[] __attribute__((weak));
-extern unsigned long kallsyms_num_syms __attribute__((weak));
+extern unsigned long kallsyms_num_syms __attribute__((weak,section("data")));
extern u8 kallsyms_names[] __attribute__((weak));
extern u8 kallsyms_token_table[] __attribute__((weak));
@@ -36,9 +38,6 @@ extern u16 kallsyms_token_index[] __attribute__((weak));
extern unsigned long kallsyms_markers[] __attribute__((weak));
-/* Defined by the linker script. */
-extern char _stext[], _etext[], _sinittext[], _einittext[], _end[];
-
static inline int is_kernel_inittext(unsigned long addr)
{
if (addr >= (unsigned long)_sinittext
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index d3d1321b0e5c..cc6f72585f1e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -43,7 +43,7 @@
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
unsigned int kprobe_cpu = NR_CPUS;
-static spinlock_t kprobe_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(kprobe_lock);
/* Locks kprobe: irqs must be disabled */
void lock_kprobes(void)
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 31f1a60df733..1f064a63f8cf 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -30,7 +30,8 @@ static ssize_t hotplug_seqnum_show(struct subsystem *subsys, char *page)
KERNEL_ATTR_RO(hotplug_seqnum);
#endif
-static decl_subsys(kernel, NULL, NULL);
+decl_subsys(kernel, NULL, NULL);
+EXPORT_SYMBOL_GPL(kernel_subsys);
static struct attribute * kernel_attrs[] = {
#ifdef CONFIG_HOTPLUG
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 5689ebb1a250..e377e2244103 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -14,6 +14,12 @@
#include <linux/module.h>
#include <asm/semaphore.h>
+/*
+ * We dont want to execute off keventd since it might
+ * hold a semaphore our callers hold too:
+ */
+static struct workqueue_struct *helper_wq;
+
struct kthread_create_info
{
/* Information passed to kthread() from keventd. */
@@ -126,12 +132,13 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
init_completion(&create.started);
init_completion(&create.done);
- /* If we're being called to start the first workqueue, we
- * can't use keventd. */
- if (!keventd_up())
+ /*
+ * The workqueue needs to start up first:
+ */
+ if (!helper_wq)
work.func(work.data);
else {
- schedule_work(&work);
+ queue_work(helper_wq, &work);
wait_for_completion(&create.done);
}
if (!IS_ERR(create.result)) {
@@ -183,3 +190,13 @@ int kthread_stop(struct task_struct *k)
return ret;
}
EXPORT_SYMBOL(kthread_stop);
+
+static __init int helper_init(void)
+{
+ helper_wq = create_singlethread_workqueue("kthread");
+ BUG_ON(!helper_wq);
+
+ return 0;
+}
+core_initcall(helper_init);
+
diff --git a/kernel/module.c b/kernel/module.c
index 0798443ce002..ce427b675b98 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -53,7 +53,7 @@
#define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
/* Protects module list */
-static spinlock_t modlist_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(modlist_lock);
/* List of modules, protected by module_mutex AND modlist_lock */
static DECLARE_MUTEX(module_mutex);
@@ -379,7 +379,7 @@ static void module_unload_init(struct module *mod)
for (i = 0; i < NR_CPUS; i++)
local_set(&mod->ref[i].count, 0);
/* Hold reference count during initialization. */
- local_set(&mod->ref[smp_processor_id()].count, 1);
+ local_set(&mod->ref[_smp_processor_id()].count, 1);
/* Backwards compatibility macros put refcount during init. */
mod->waiter = current;
}
@@ -651,7 +651,8 @@ void symbol_put_addr(void *addr)
}
EXPORT_SYMBOL_GPL(symbol_put_addr);
-static ssize_t show_refcnt(struct module *mod, char *buffer)
+static ssize_t show_refcnt(struct module_attribute *mattr,
+ struct module *mod, char *buffer)
{
/* sysfs holds a reference */
return sprintf(buffer, "%u\n", module_refcount(mod)-1);
@@ -681,13 +682,6 @@ static inline int use_module(struct module *a, struct module *b)
static inline void module_unload_init(struct module *mod)
{
}
-
-asmlinkage long
-sys_delete_module(const char __user *name_user, unsigned int flags)
-{
- return -ENOSYS;
-}
-
#endif /* CONFIG_MODULE_UNLOAD */
#ifdef CONFIG_OBSOLETE_MODPARM
@@ -936,79 +930,71 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
* J. Corbet <corbet@lwn.net>
*/
#ifdef CONFIG_KALLSYMS
-static void module_sect_attrs_release(struct kobject *kobj)
-{
- kfree(container_of(kobj, struct module_sections, kobj));
-}
-
-static ssize_t module_sect_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
+static ssize_t module_sect_show(struct module_attribute *mattr,
+ struct module *mod, char *buf)
{
struct module_sect_attr *sattr =
- container_of(attr, struct module_sect_attr, attr);
+ container_of(mattr, struct module_sect_attr, mattr);
return sprintf(buf, "0x%lx\n", sattr->address);
}
-static struct sysfs_ops module_sect_ops = {
- .show = module_sect_show,
-};
-
-static struct kobj_type module_sect_ktype = {
- .sysfs_ops = &module_sect_ops,
- .release = module_sect_attrs_release,
-};
-
static void add_sect_attrs(struct module *mod, unsigned int nsect,
char *secstrings, Elf_Shdr *sechdrs)
{
- unsigned int nloaded = 0, i;
+ unsigned int nloaded = 0, i, size[2];
+ struct module_sect_attrs *sect_attrs;
struct module_sect_attr *sattr;
-
- if (!mod->mkobj)
- return;
+ struct attribute **gattr;
/* Count loaded sections and allocate structures */
for (i = 0; i < nsect; i++)
if (sechdrs[i].sh_flags & SHF_ALLOC)
nloaded++;
- mod->sect_attrs = kmalloc(sizeof(struct module_sections) +
- nloaded*sizeof(mod->sect_attrs->attrs[0]), GFP_KERNEL);
- if (! mod->sect_attrs)
+ size[0] = ALIGN(sizeof(*sect_attrs)
+ + nloaded * sizeof(sect_attrs->attrs[0]),
+ sizeof(sect_attrs->grp.attrs[0]));
+ size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.attrs[0]);
+ if (! (sect_attrs = kmalloc(size[0] + size[1], GFP_KERNEL)))
return;
- /* sections entry setup */
- memset(mod->sect_attrs, 0, sizeof(struct module_sections));
- if (kobject_set_name(&mod->sect_attrs->kobj, "sections"))
- goto out;
- mod->sect_attrs->kobj.parent = &mod->mkobj->kobj;
- mod->sect_attrs->kobj.ktype = &module_sect_ktype;
- if (kobject_register(&mod->sect_attrs->kobj))
- goto out;
+ /* Setup section attributes. */
+ sect_attrs->grp.name = "sections";
+ sect_attrs->grp.attrs = (void *)sect_attrs + size[0];
- /* And the section attributes. */
- sattr = &mod->sect_attrs->attrs[0];
+ sattr = &sect_attrs->attrs[0];
+ gattr = &sect_attrs->grp.attrs[0];
for (i = 0; i < nsect; i++) {
if (! (sechdrs[i].sh_flags & SHF_ALLOC))
continue;
sattr->address = sechdrs[i].sh_addr;
strlcpy(sattr->name, secstrings + sechdrs[i].sh_name,
- MODULE_SECT_NAME_LEN);
- sattr->attr.name = sattr->name;
- sattr->attr.owner = mod;
- sattr->attr.mode = S_IRUGO;
- (void) sysfs_create_file(&mod->sect_attrs->kobj, &sattr->attr);
- sattr++;
+ MODULE_SECT_NAME_LEN);
+ sattr->mattr.show = module_sect_show;
+ sattr->mattr.store = NULL;
+ sattr->mattr.attr.name = sattr->name;
+ sattr->mattr.attr.owner = mod;
+ sattr->mattr.attr.mode = S_IRUGO;
+ *(gattr++) = &(sattr++)->mattr.attr;
}
+ *gattr = NULL;
+
+ if (sysfs_create_group(&mod->mkobj.kobj, &sect_attrs->grp))
+ goto out;
+
+ mod->sect_attrs = sect_attrs;
return;
out:
- kfree(mod->sect_attrs);
- mod->sect_attrs = NULL;
+ kfree(sect_attrs);
}
static void remove_sect_attrs(struct module *mod)
{
if (mod->sect_attrs) {
- kobject_unregister(&mod->sect_attrs->kobj);
+ sysfs_remove_group(&mod->mkobj.kobj,
+ &mod->sect_attrs->grp);
+ /* We are positive that no one is using any sect attrs
+ * at this point. Deallocate immediately. */
+ kfree(mod->sect_attrs);
mod->sect_attrs = NULL;
}
}
@@ -1029,11 +1015,11 @@ static inline void remove_sect_attrs(struct module *mod)
#ifdef CONFIG_MODULE_UNLOAD
static inline int module_add_refcnt_attr(struct module *mod)
{
- return sysfs_create_file(&mod->mkobj->kobj, &refcnt.attr);
+ return sysfs_create_file(&mod->mkobj.kobj, &refcnt.attr);
}
static void module_remove_refcnt_attr(struct module *mod)
{
- return sysfs_remove_file(&mod->mkobj->kobj, &refcnt.attr);
+ return sysfs_remove_file(&mod->mkobj.kobj, &refcnt.attr);
}
#else
static inline int module_add_refcnt_attr(struct module *mod)
@@ -1052,17 +1038,13 @@ static int mod_sysfs_setup(struct module *mod,
{
int err;
- mod->mkobj = kmalloc(sizeof(struct module_kobject), GFP_KERNEL);
- if (!mod->mkobj)
- return -ENOMEM;
-
- memset(&mod->mkobj->kobj, 0, sizeof(mod->mkobj->kobj));
- err = kobject_set_name(&mod->mkobj->kobj, "%s", mod->name);
+ memset(&mod->mkobj.kobj, 0, sizeof(mod->mkobj.kobj));
+ err = kobject_set_name(&mod->mkobj.kobj, "%s", mod->name);
if (err)
goto out;
- kobj_set_kset_s(mod->mkobj, module_subsys);
- mod->mkobj->mod = mod;
- err = kobject_register(&mod->mkobj->kobj);
+ kobj_set_kset_s(&mod->mkobj, module_subsys);
+ mod->mkobj.mod = mod;
+ err = kobject_register(&mod->mkobj.kobj);
if (err)
goto out;
@@ -1077,11 +1059,8 @@ static int mod_sysfs_setup(struct module *mod,
return 0;
out_unreg:
- /* Calls module_kobj_release */
- kobject_unregister(&mod->mkobj->kobj);
- return err;
+ kobject_unregister(&mod->mkobj.kobj);
out:
- kfree(mod->mkobj);
return err;
}
@@ -1090,8 +1069,7 @@ static void mod_kobject_remove(struct module *mod)
module_remove_refcnt_attr(mod);
module_param_sysfs_remove(mod);
- /* Calls module_kobj_release */
- kobject_unregister(&mod->mkobj->kobj);
+ kobject_unregister(&mod->mkobj.kobj);
}
/* Free a module, remove from lists, etc (must hold module mutex). */
@@ -1713,6 +1691,9 @@ static struct module *load_module(void __user *umod,
/ sizeof(struct kernel_param),
NULL);
}
+ if (err < 0)
+ goto arch_cleanup;
+
err = mod_sysfs_setup(mod,
(struct kernel_param *)
sechdrs[setupindex].sh_addr,
@@ -2089,11 +2070,9 @@ void module_add_driver(struct module *mod, struct device_driver *drv)
{
if (!mod || !drv)
return;
- if (!mod->mkobj)
- return;
/* Don't check return code; this call is idempotent */
- sysfs_create_link(&drv->kobj, &mod->mkobj->kobj, "module");
+ sysfs_create_link(&drv->kobj, &mod->mkobj.kobj, "module");
}
EXPORT_SYMBOL(module_add_driver);
diff --git a/kernel/params.c b/kernel/params.c
index 45dd451e17c1..ec3dbf68e253 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -77,10 +77,16 @@ static int parse_one(char *param,
static char *next_arg(char *args, char **param, char **val)
{
unsigned int i, equals = 0;
- int in_quote = 0;
+ int in_quote = 0, quoted = 0;
+ char *next;
/* Chew any extra spaces */
while (*args == ' ') args++;
+ if (*args == '"') {
+ args++;
+ in_quote = 1;
+ quoted = 1;
+ }
for (i = 0; args[i]; i++) {
if (args[i] == ' ' && !in_quote)
@@ -106,13 +112,16 @@ static char *next_arg(char *args, char **param, char **val)
if (args[i-1] == '"')
args[i-1] = '\0';
}
+ if (quoted && args[i-1] == '"')
+ args[i-1] = '\0';
}
if (args[i]) {
args[i] = '\0';
- return args + i + 1;
+ next = args + i + 1;
} else
- return args + i;
+ next = args + i;
+ return next;
}
/* Args looks like "foo=bar,bar2 baz=fuz wiz". */
@@ -357,26 +366,23 @@ extern struct kernel_param __start___param[], __stop___param[];
struct param_attribute
{
- struct attribute attr;
+ struct module_attribute mattr;
struct kernel_param *param;
};
-struct param_kobject
+struct module_param_attrs
{
- struct kobject kobj;
-
- unsigned int num_attributes;
- struct param_attribute attr[0];
+ struct attribute_group grp;
+ struct param_attribute attrs[0];
};
-#define to_param_attr(n) container_of(n, struct param_attribute, attr);
+#define to_param_attr(n) container_of(n, struct param_attribute, mattr);
-static ssize_t param_attr_show(struct kobject *kobj,
- struct attribute *attr,
- char *buf)
+static ssize_t param_attr_show(struct module_attribute *mattr,
+ struct module *mod, char *buf)
{
int count;
- struct param_attribute *attribute = to_param_attr(attr);
+ struct param_attribute *attribute = to_param_attr(mattr);
if (!attribute->param->get)
return -EPERM;
@@ -390,12 +396,12 @@ static ssize_t param_attr_show(struct kobject *kobj,
}
/* sysfs always hands a nul-terminated string in buf. We rely on that. */
-static ssize_t param_attr_store(struct kobject *kobj,
- struct attribute *attr,
+static ssize_t param_attr_store(struct module_attribute *mattr,
+ struct module *owner,
const char *buf, size_t len)
{
int err;
- struct param_attribute *attribute = to_param_attr(attr);
+ struct param_attribute *attribute = to_param_attr(mattr);
if (!attribute->param->set)
return -EPERM;
@@ -406,27 +412,6 @@ static ssize_t param_attr_store(struct kobject *kobj,
return err;
}
-
-static struct sysfs_ops param_sysfs_ops = {
- .show = param_attr_show,
- .store = param_attr_store,
-};
-
-static void param_kobj_release(struct kobject *kobj)
-{
- kfree(container_of(kobj, struct param_kobject, kobj));
-}
-
-static struct kobj_type param_ktype = {
- .sysfs_ops = &param_sysfs_ops,
- .release = &param_kobj_release,
-};
-
-static struct kset param_kset = {
- .subsys = &module_subsys,
- .ktype = &param_ktype,
-};
-
#ifdef CONFIG_MODULES
#define __modinit
#else
@@ -434,54 +419,6 @@ static struct kset param_kset = {
#endif
/*
- * param_add_attribute - actually adds an parameter to sysfs
- * @mod: owner of parameter
- * @pk: param_kobject the attribute shall be assigned to.
- * One per module, one per KBUILD_MODNAME.
- * @kp: kernel_param to be added
- * @skip: offset where the parameter name start in kp->name.
- * Needed for built-in modules
- *
- * Fill in data into appropriate &pk->attr[], and create sysfs file.
- */
-static __modinit int param_add_attribute(struct module *mod,
- struct param_kobject *pk,
- struct kernel_param *kp,
- unsigned int skip)
-{
- struct param_attribute *a;
- int err;
-
- a = &pk->attr[pk->num_attributes];
- a->attr.name = (char *) &kp->name[skip];
- a->attr.owner = mod;
- a->attr.mode = kp->perm;
- a->param = kp;
- err = sysfs_create_file(&pk->kobj, &a->attr);
- if (!err)
- pk->num_attributes++;
- return err;
-}
-
-/*
- * param_sysfs_remove - remove sysfs support for one module or KBUILD_MODNAME
- * @pk: struct param_kobject which is to be removed
- *
- * Called when an error in registration occurs or a module is removed
- * from the system.
- */
-static __modinit void param_sysfs_remove(struct param_kobject *pk)
-{
- unsigned int i;
- for (i = 0; i < pk->num_attributes; i++)
- sysfs_remove_file(&pk->kobj,&pk->attr[i].attr);
-
- /* Calls param_kobj_release */
- kobject_unregister(&pk->kobj);
-}
-
-
-/*
* param_sysfs_setup - setup sysfs support for one module or KBUILD_MODNAME
* @mk: struct module_kobject (contains parent kobject)
* @kparam: array of struct kernel_param, the actual parameter definitions
@@ -492,15 +429,17 @@ static __modinit void param_sysfs_remove(struct param_kobject *pk)
* in sysfs. A pointer to the param_kobject is returned on success,
* NULL if there's no parameter to export, or other ERR_PTR(err).
*/
-static __modinit struct param_kobject *
+static __modinit struct module_param_attrs *
param_sysfs_setup(struct module_kobject *mk,
struct kernel_param *kparam,
unsigned int num_params,
unsigned int name_skip)
{
- struct param_kobject *pk;
+ struct module_param_attrs *mp;
unsigned int valid_attrs = 0;
- unsigned int i;
+ unsigned int i, size[2];
+ struct param_attribute *pattr;
+ struct attribute **gattr;
int err;
for (i=0; i<num_params; i++) {
@@ -511,42 +450,39 @@ param_sysfs_setup(struct module_kobject *mk,
if (!valid_attrs)
return NULL;
- pk = kmalloc(sizeof(struct param_kobject)
- + sizeof(struct param_attribute) * valid_attrs,
- GFP_KERNEL);
- if (!pk)
- return ERR_PTR(-ENOMEM);
- memset(pk, 0, sizeof(struct param_kobject)
- + sizeof(struct param_attribute) * valid_attrs);
+ size[0] = ALIGN(sizeof(*mp) +
+ valid_attrs * sizeof(mp->attrs[0]),
+ sizeof(mp->grp.attrs[0]));
+ size[1] = (valid_attrs + 1) * sizeof(mp->grp.attrs[0]);
- err = kobject_set_name(&pk->kobj, "parameters");
- if (err)
- goto out;
+ mp = kmalloc(size[0] + size[1], GFP_KERNEL);
+ if (!mp)
+ return ERR_PTR(-ENOMEM);
- pk->kobj.kset = &param_kset;
- pk->kobj.parent = &mk->kobj;
- err = kobject_register(&pk->kobj);
- if (err)
- goto out;
+ mp->grp.name = "parameters";
+ mp->grp.attrs = (void *)mp + size[0];
+ pattr = &mp->attrs[0];
+ gattr = &mp->grp.attrs[0];
for (i = 0; i < num_params; i++) {
- if (kparam[i].perm) {
- err = param_add_attribute(mk->mod, pk,
- &kparam[i], name_skip);
- if (err)
- goto out_unreg;
+ struct kernel_param *kp = &kparam[i];
+ if (kp->perm) {
+ pattr->param = kp;
+ pattr->mattr.show = param_attr_show;
+ pattr->mattr.store = param_attr_store;
+ pattr->mattr.attr.name = (char *)&kp->name[name_skip];
+ pattr->mattr.attr.owner = mk->mod;
+ pattr->mattr.attr.mode = kp->perm;
+ *(gattr++) = &(pattr++)->mattr.attr;
}
}
+ *gattr = NULL;
- return pk;
-
-out_unreg:
- param_sysfs_remove(pk);
- return ERR_PTR(err);
-
-out:
- kfree(pk);
- return ERR_PTR(err);
+ if ((err = sysfs_create_group(&mk->kobj, &mp->grp))) {
+ kfree(mp);
+ return ERR_PTR(err);
+ }
+ return mp;
}
@@ -565,13 +501,13 @@ int module_param_sysfs_setup(struct module *mod,
struct kernel_param *kparam,
unsigned int num_params)
{
- struct param_kobject *pk;
+ struct module_param_attrs *mp;
- pk = param_sysfs_setup(mod->mkobj, kparam, num_params, 0);
- if (IS_ERR(pk))
- return PTR_ERR(pk);
+ mp = param_sysfs_setup(&mod->mkobj, kparam, num_params, 0);
+ if (IS_ERR(mp))
+ return PTR_ERR(mp);
- mod->params_kobject = pk;
+ mod->param_attrs = mp;
return 0;
}
@@ -584,9 +520,13 @@ int module_param_sysfs_setup(struct module *mod,
*/
void module_param_sysfs_remove(struct module *mod)
{
- if (mod->params_kobject) {
- param_sysfs_remove(mod->params_kobject);
- mod->params_kobject = NULL;
+ if (mod->param_attrs) {
+ sysfs_remove_group(&mod->mkobj.kobj,
+ &mod->param_attrs->grp);
+ /* We are positive that no one is using any param
+ * attrs at this point. Deallocate immediately. */
+ kfree(mod->param_attrs);
+ mod->param_attrs = NULL;
}
}
#endif
@@ -610,8 +550,10 @@ static void __init kernel_param_sysfs_setup(const char *name,
kobject_register(&mk->kobj);
/* no need to keep the kobject if no parameter is exported */
- if (!param_sysfs_setup(mk, kparam, num_params, name_skip))
+ if (!param_sysfs_setup(mk, kparam, num_params, name_skip)) {
kobject_unregister(&mk->kobj);
+ kfree(mk);
+ }
}
/*
@@ -691,7 +633,7 @@ static ssize_t module_attr_show(struct kobject *kobj,
if (!try_module_get(mk->mod))
return -ENODEV;
- ret = attribute->show(mk->mod, buf);
+ ret = attribute->show(attribute, mk->mod, buf);
module_put(mk->mod);
@@ -710,14 +652,8 @@ static struct sysfs_ops module_sysfs_ops = {
};
#endif
-static void module_kobj_release(struct kobject *kobj)
-{
- kfree(container_of(kobj, struct module_kobject, kobj));
-}
-
static struct kobj_type module_ktype = {
.sysfs_ops = &module_sysfs_ops,
- .release = &module_kobj_release,
};
decl_subsys(module, &module_ktype, NULL);
@@ -728,8 +664,6 @@ decl_subsys(module, &module_ktype, NULL);
static int __init param_sysfs_init(void)
{
subsystem_register(&module_subsys);
- kobject_set_name(&param_kset.kobj, "parameters");
- kset_init(&param_kset);
param_sysfs_builtin();
diff --git a/kernel/pid.c b/kernel/pid.c
index 185a8bee8168..edba31c681ac 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -60,7 +60,7 @@ typedef struct pidmap {
static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
{ [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
-static spinlock_t pidmap_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
fastcall void free_pidmap(int pid)
{
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 33a67e7ad826..9e79eca513ca 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -85,7 +85,7 @@ static inline u64 mpy_l_X_l_ll(unsigned long mpy1,unsigned long mpy2)
*/
static kmem_cache_t *posix_timers_cache;
static struct idr posix_timers_id;
-static spinlock_t idr_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(idr_lock);
/*
* Just because the timer is not in the timer list does NOT mean it is
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index ed49ffd12bc6..696387ffe49c 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -48,7 +48,7 @@ config SOFTWARE_SUSPEND
involved in suspending. Also in this case there is a risk that buffers
on disk won't match with saved ones.
- For more information take a look at Documentation/power/swsusp.txt.
+ For more information take a look at <file:Documentation/power/swsusp.txt>.
config PM_STD_PARTITION
string "Default resume partition"
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 0f5dc712ad70..b9b3f5881677 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -51,7 +51,7 @@ static void power_down(suspend_disk_method_t mode)
local_irq_save(flags);
switch(mode) {
case PM_DISK_PLATFORM:
- device_power_down(PM_SUSPEND_DISK);
+ device_power_down(PMSG_SUSPEND);
error = pm_ops->enter(PM_SUSPEND_DISK);
break;
case PM_DISK_SHUTDOWN:
@@ -144,8 +144,10 @@ static int prepare(void)
free_some_memory();
disable_nonboot_cpus();
- if ((error = device_suspend(PM_SUSPEND_DISK)))
+ if ((error = device_suspend(PMSG_FREEZE))) {
+ printk("Some devices failed to suspend\n");
goto Finish;
+ }
return 0;
Finish:
@@ -163,7 +165,7 @@ static int prepare(void)
*
* If we're going through the firmware, then get it over with quickly.
*
- * If not, then call swsusp to do it's thing, then figure out how
+ * If not, then call swsusp to do its thing, then figure out how
* to power down the system.
*/
@@ -201,7 +203,7 @@ int pm_suspend_disk(void)
* software_resume - Resume from a saved image.
*
* Called as a late_initcall (so all devices are discovered and
- * initialized), we call pmdisk to see if we have a saved image or not.
+ * initialized), we call swsusp to see if we have a saved image or not.
* If so, we quiesce devices, the restore the saved image. We will
* return above (in pm_suspend_disk() ) if everything goes well.
* Otherwise, we fail gracefully and return to the normally
@@ -221,7 +223,7 @@ static int software_resume(void)
return 0;
}
- pr_debug("PM: Reading pmdisk image.\n");
+ pr_debug("PM: Reading swsusp image.\n");
if ((error = swsusp_read()))
goto Done;
@@ -284,7 +286,7 @@ static char * pm_disk_modes[] = {
static ssize_t disk_show(struct subsystem * subsys, char * buf)
{
- return sprintf(buf,"%s\n",pm_disk_modes[pm_disk_mode]);
+ return sprintf(buf, "%s\n", pm_disk_modes[pm_disk_mode]);
}
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 0aefb03ede09..b7ef95c96230 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -65,7 +65,7 @@ static int suspend_prepare(suspend_state_t state)
goto Thaw;
}
- if ((error = device_suspend(state)))
+ if ((error = device_suspend(PMSG_SUSPEND)))
goto Finish;
return 0;
Finish:
@@ -78,13 +78,14 @@ static int suspend_prepare(suspend_state_t state)
}
-static int suspend_enter(u32 state)
+static int suspend_enter(suspend_state_t state)
{
int error = 0;
unsigned long flags;
local_irq_save(flags);
- if ((error = device_power_down(state)))
+
+ if ((error = device_power_down(PMSG_SUSPEND)))
goto Done;
error = pm_ops->enter(state);
device_power_up();
@@ -99,7 +100,7 @@ static int suspend_enter(u32 state)
* @state: State we're coming out of.
*
* Call platform code to clean up, restart processes, and free the
- * console that we've allocated.
+ * console that we've allocated. This is not called for suspend-to-disk.
*/
static void suspend_finish(suspend_state_t state)
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 4bf7fe6d2feb..22cdaa42d922 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -67,12 +67,13 @@
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
#include <asm/io.h>
#include "power.h"
/* References to section boundaries */
-extern char __nosave_begin, __nosave_end;
+extern const void __nosave_begin, __nosave_end;
/* Variables to be preserved over suspend */
static int pagedir_order_check;
@@ -419,7 +420,7 @@ struct highmem_page {
struct highmem_page *next;
};
-struct highmem_page *highmem_copy = NULL;
+static struct highmem_page *highmem_copy;
static int save_highmem_zone(struct zone *zone)
{
@@ -752,11 +753,11 @@ static int swsusp_alloc(void)
return -ENOSPC;
if ((error = alloc_pagedir())) {
- pr_debug("suspend: Allocating pagedir failed.\n");
+ printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
return error;
}
if ((error = alloc_image_pages())) {
- pr_debug("suspend: Allocating image pages failed.\n");
+ printk(KERN_ERR "suspend: Allocating image pages failed.\n");
swsusp_free();
return error;
}
@@ -766,7 +767,7 @@ static int swsusp_alloc(void)
return 0;
}
-int suspend_prepare_image(void)
+static int suspend_prepare_image(void)
{
int error;
@@ -842,11 +843,22 @@ int swsusp_suspend(void)
if ((error = arch_prepare_suspend()))
return error;
local_irq_disable();
+ /* At this point, device_suspend() has been called, but *not*
+ * device_power_down(). We *must* device_power_down() now.
+ * Otherwise, drivers for some devices (e.g. interrupt controllers)
+ * become desynchronized with the actual state of the hardware
+ * at resume time, and evil weirdness ensues.
+ */
+ if ((error = device_power_down(PMSG_FREEZE))) {
+ local_irq_enable();
+ return error;
+ }
save_processor_state();
error = swsusp_arch_suspend();
/* Restore control flow magically appears here */
restore_processor_state();
restore_highmem();
+ device_power_up();
local_irq_enable();
return error;
}
@@ -866,6 +878,7 @@ int swsusp_resume(void)
{
int error;
local_irq_disable();
+ device_power_down(PMSG_FREEZE);
/* We'll ignore saved state, but this gets preempt count (etc) right */
save_processor_state();
error = swsusp_arch_resume();
@@ -875,6 +888,7 @@ int swsusp_resume(void)
BUG_ON(!error);
restore_processor_state();
restore_highmem();
+ device_power_up();
local_irq_enable();
return error;
}
@@ -1036,12 +1050,12 @@ static int submit(int rw, pgoff_t page_off, void * page)
return error;
}
-int bio_read_page(pgoff_t page_off, void * page)
+static int bio_read_page(pgoff_t page_off, void * page)
{
return submit(READ, page_off, page);
}
-int bio_write_page(pgoff_t page_off, void * page)
+static int bio_write_page(pgoff_t page_off, void * page)
{
return submit(WRITE, page_off, page);
}
@@ -1158,7 +1172,7 @@ static int __init read_pagedir(void)
return -ENOMEM;
pagedir_nosave = (struct pbe *)addr;
- pr_debug("pmdisk: Reading pagedir (%d Pages)\n",n);
+ pr_debug("swsusp: Reading pagedir (%d Pages)\n",n);
for (i = 0; i < n && !error; i++, addr += PAGE_SIZE) {
unsigned long offset = swp_offset(swsusp_info.pagedir[i]);
diff --git a/kernel/printk.c b/kernel/printk.c
index 4e9fd492f30e..d914a90d6206 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -78,7 +78,7 @@ static int console_locked;
* It is also used in interesting ways to provide interlocking in
* release_console_sem().
*/
-static spinlock_t logbuf_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(logbuf_lock);
static char __log_buf[__LOG_BUF_LEN];
static char *log_buf = __log_buf;
@@ -284,6 +284,7 @@ int do_syslog(int type, char __user * buf, int len)
error = __put_user(c,buf);
buf++;
i++;
+ cond_resched();
spin_lock_irq(&logbuf_lock);
}
spin_unlock_irq(&logbuf_lock);
@@ -325,6 +326,7 @@ int do_syslog(int type, char __user * buf, int len)
c = LOG_BUF(j);
spin_unlock_irq(&logbuf_lock);
error = __put_user(c,&buf[count-1-i]);
+ cond_resched();
spin_lock_irq(&logbuf_lock);
}
spin_unlock_irq(&logbuf_lock);
@@ -340,6 +342,7 @@ int do_syslog(int type, char __user * buf, int len)
error = -EFAULT;
break;
}
+ cond_resched();
}
}
break;
@@ -642,8 +645,9 @@ void release_console_sem(void)
_con_start = con_start;
_log_end = log_end;
con_start = log_end; /* Flush */
- spin_unlock_irqrestore(&logbuf_lock, flags);
+ spin_unlock(&logbuf_lock);
call_console_drivers(_con_start, _log_end);
+ local_irq_restore(flags);
}
console_locked = 0;
console_may_schedule = 0;
@@ -871,7 +875,7 @@ void tty_write_message(struct tty_struct *tty, char *msg)
*/
int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
{
- static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED;
+ static DEFINE_SPINLOCK(ratelimit_lock);
static unsigned long toks = 10*5*HZ;
static unsigned long last_msg;
static int missed;
diff --git a/kernel/profile.c b/kernel/profile.c
index ff62fa98328a..a38fa70075fe 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -83,7 +83,7 @@ void __init profile_init(void)
#ifdef CONFIG_PROFILING
static DECLARE_RWSEM(profile_rwsem);
-static rwlock_t handoff_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(handoff_lock);
static struct notifier_block * task_exit_notifier;
static struct notifier_block * task_free_notifier;
static struct notifier_block * munmap_notifier;
@@ -326,17 +326,15 @@ static int __devinit profile_cpu_callback(struct notifier_block *info,
node = cpu_to_node(cpu);
per_cpu(cpu_profile_flip, cpu) = 0;
if (!per_cpu(cpu_profile_hits, cpu)[1]) {
- page = alloc_pages_node(node, GFP_KERNEL, 0);
+ page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
if (!page)
return NOTIFY_BAD;
- clear_highpage(page);
per_cpu(cpu_profile_hits, cpu)[1] = page_address(page);
}
if (!per_cpu(cpu_profile_hits, cpu)[0]) {
- page = alloc_pages_node(node, GFP_KERNEL, 0);
+ page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
if (!page)
goto out_free;
- clear_highpage(page);
per_cpu(cpu_profile_hits, cpu)[0] = page_address(page);
}
break;
@@ -510,16 +508,14 @@ static int __init create_hash_tables(void)
int node = cpu_to_node(cpu);
struct page *page;
- page = alloc_pages_node(node, GFP_KERNEL, 0);
+ page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
if (!page)
goto out_cleanup;
- clear_highpage(page);
per_cpu(cpu_profile_hits, cpu)[1]
= (struct profile_hit *)page_address(page);
- page = alloc_pages_node(node, GFP_KERNEL, 0);
+ page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
if (!page)
goto out_cleanup;
- clear_highpage(page);
per_cpu(cpu_profile_hits, cpu)[0]
= (struct profile_hit *)page_address(page);
}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 60801c692810..136a8feba91e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -38,10 +38,24 @@ void __ptrace_link(task_t *child, task_t *new_parent)
SET_LINKS(child);
}
-static inline int pending_resume_signal(struct sigpending *pending)
+/*
+ * Turn a tracing stop into a normal stop now, since with no tracer there
+ * would be no way to wake it up with SIGCONT or SIGKILL. If there was a
+ * signal sent that would resume the child, but didn't because it was in
+ * TASK_TRACED, resume it now.
+ * Requires that irqs be disabled.
+ */
+void ptrace_untrace(task_t *child)
{
-#define M(sig) (1UL << ((sig)-1))
- return sigtestsetmask(&pending->signal, M(SIGCONT) | M(SIGKILL));
+ spin_lock(&child->sighand->siglock);
+ if (child->state == TASK_TRACED) {
+ if (child->signal->flags & SIGNAL_STOP_STOPPED) {
+ child->state = TASK_STOPPED;
+ } else {
+ signal_wake_up(child, 1);
+ }
+ }
+ spin_unlock(&child->sighand->siglock);
}
/*
@@ -55,29 +69,15 @@ void __ptrace_unlink(task_t *child)
if (!child->ptrace)
BUG();
child->ptrace = 0;
- if (list_empty(&child->ptrace_list))
- return;
- list_del_init(&child->ptrace_list);
- REMOVE_LINKS(child);
- child->parent = child->real_parent;
- SET_LINKS(child);
-
- if (child->state == TASK_TRACED) {
- /*
- * Turn a tracing stop into a normal stop now,
- * since with no tracer there would be no way
- * to wake it up with SIGCONT or SIGKILL.
- * If there was a signal sent that would resume the child,
- * but didn't because it was in TASK_TRACED, resume it now.
- */
- spin_lock(&child->sighand->siglock);
- child->state = TASK_STOPPED;
- if (pending_resume_signal(&child->pending) ||
- pending_resume_signal(&child->signal->shared_pending)) {
- signal_wake_up(child, 1);
- }
- spin_unlock(&child->sighand->siglock);
+ if (!list_empty(&child->ptrace_list)) {
+ list_del_init(&child->ptrace_list);
+ REMOVE_LINKS(child);
+ child->parent = child->real_parent;
+ SET_LINKS(child);
}
+
+ if (child->state == TASK_TRACED)
+ ptrace_untrace(child);
}
/*
@@ -319,18 +319,33 @@ static int ptrace_setoptions(struct task_struct *child, long data)
static int ptrace_getsiginfo(struct task_struct *child, siginfo_t __user * data)
{
- if (child->last_siginfo == NULL)
- return -EINVAL;
- return copy_siginfo_to_user(data, child->last_siginfo);
+ siginfo_t lastinfo;
+
+ spin_lock_irq(&child->sighand->siglock);
+ if (likely(child->last_siginfo != NULL)) {
+ memcpy(&lastinfo, child->last_siginfo, sizeof (siginfo_t));
+ spin_unlock_irq(&child->sighand->siglock);
+ return copy_siginfo_to_user(data, &lastinfo);
+ }
+ spin_unlock_irq(&child->sighand->siglock);
+ return -EINVAL;
}
static int ptrace_setsiginfo(struct task_struct *child, siginfo_t __user * data)
{
- if (child->last_siginfo == NULL)
- return -EINVAL;
- if (copy_from_user(child->last_siginfo, data, sizeof (siginfo_t)) != 0)
+ siginfo_t newinfo;
+
+ if (copy_from_user(&newinfo, data, sizeof (siginfo_t)) != 0)
return -EFAULT;
- return 0;
+
+ spin_lock_irq(&child->sighand->siglock);
+ if (likely(child->last_siginfo != NULL)) {
+ memcpy(child->last_siginfo, &newinfo, sizeof (siginfo_t));
+ spin_unlock_irq(&child->sighand->siglock);
+ return 0;
+ }
+ spin_unlock_irq(&child->sighand->siglock);
+ return -EINVAL;
}
int ptrace_request(struct task_struct *child, long request,
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 80cac1cd0859..f0ae3c3c013e 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -49,9 +49,9 @@
/* Definition for rcupdate control block. */
struct rcu_ctrlblk rcu_ctrlblk =
- { .cur = -300, .completed = -300 , .lock = SEQCNT_ZERO };
+ { .cur = -300, .completed = -300 };
struct rcu_ctrlblk rcu_bh_ctrlblk =
- { .cur = -300, .completed = -300 , .lock = SEQCNT_ZERO };
+ { .cur = -300, .completed = -300 };
/* Bookkeeping of the progress of the grace period */
struct rcu_state {
@@ -60,9 +60,9 @@ struct rcu_state {
/* for current batch to proceed. */
};
-struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
+static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
{.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
-struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
+static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
{.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
@@ -185,10 +185,13 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
rcp->completed == rcp->cur) {
/* Can't change, since spin lock held. */
cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask);
- write_seqcount_begin(&rcp->lock);
+
rcp->next_pending = 0;
+ /* next_pending == 0 must be visible in __rcu_process_callbacks()
+ * before it can see new value of cur.
+ */
+ smp_wmb();
rcp->cur++;
- write_seqcount_end(&rcp->lock);
}
}
@@ -216,9 +219,9 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
struct rcu_state *rsp, struct rcu_data *rdp)
{
if (rdp->quiescbatch != rcp->cur) {
- /* new grace period: record qsctr value. */
+ /* start new grace period: */
rdp->qs_pending = 1;
- rdp->last_qsctr = rdp->qsctr;
+ rdp->passed_quiesc = 0;
rdp->quiescbatch = rcp->cur;
return;
}
@@ -231,11 +234,10 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
return;
/*
- * Races with local timer interrupt - in the worst case
- * we may miss one quiescent state of that CPU. That is
- * tolerable. So no need to disable interrupts.
+ * Was there a quiescent state since the beginning of the grace
+ * period? If no, then exit and wait for the next call.
*/
- if (rdp->qsctr == rdp->last_qsctr)
+ if (!rdp->passed_quiesc)
return;
rdp->qs_pending = 0;
@@ -319,8 +321,6 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
local_irq_disable();
if (rdp->nxtlist && !rdp->curlist) {
- int next_pending, seq;
-
rdp->curlist = rdp->nxtlist;
rdp->curtail = rdp->nxttail;
rdp->nxtlist = NULL;
@@ -330,14 +330,15 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
/*
* start the next batch of callbacks
*/
- do {
- seq = read_seqcount_begin(&rcp->lock);
- /* determine batch number */
- rdp->batch = rcp->cur + 1;
- next_pending = rcp->next_pending;
- } while (read_seqcount_retry(&rcp->lock, seq));
-
- if (!next_pending) {
+
+ /* determine batch number */
+ rdp->batch = rcp->cur + 1;
+ /* see the comment and corresponding wmb() in
+ * the rcu_start_batch()
+ */
+ smp_rmb();
+
+ if (!rcp->next_pending) {
/* and start it/schedule start if it's a new batch */
spin_lock(&rsp->lock);
rcu_start_batch(rcp, rsp, 1);
diff --git a/kernel/resource.c b/kernel/resource.c
index 5f013dc4f649..72596bc6fdaf 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -39,7 +39,7 @@ struct resource iomem_resource = {
EXPORT_SYMBOL(iomem_resource);
-static rwlock_t resource_lock = RW_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(resource_lock);
#ifdef CONFIG_PROC_FS
diff --git a/kernel/sched.c b/kernel/sched.c
index 43bfde70a34c..099f946ed40c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -36,6 +36,7 @@
#include <linux/blkdev.h>
#include <linux/delay.h>
#include <linux/smp.h>
+#include <linux/threads.h>
#include <linux/timer.h>
#include <linux/rcupdate.h>
#include <linux/cpu.h>
@@ -48,12 +49,6 @@
#include <asm/unistd.h>
-#ifdef CONFIG_NUMA
-#define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
-#else
-#define cpu_to_node_mask(cpu) (cpu_online_map)
-#endif
-
/*
* Convert user-nice values [ -20 ... 0 ... 19 ]
* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -97,7 +92,6 @@
#define MAX_SLEEP_AVG (DEF_TIMESLICE * MAX_BONUS)
#define STARVATION_LIMIT (MAX_SLEEP_AVG)
#define NS_MAX_SLEEP_AVG (JIFFIES_TO_NS(MAX_SLEEP_AVG))
-#define CREDIT_LIMIT 100
/*
* If a task is 'interactive' then we reinsert it in the active
@@ -131,12 +125,14 @@
(NS_TO_JIFFIES((p)->sleep_avg) * MAX_BONUS / \
MAX_SLEEP_AVG)
+#define GRANULARITY (10 * HZ / 1000 ? : 1)
+
#ifdef CONFIG_SMP
-#define TIMESLICE_GRANULARITY(p) (MIN_TIMESLICE * \
+#define TIMESLICE_GRANULARITY(p) (GRANULARITY * \
(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)) * \
num_online_cpus())
#else
-#define TIMESLICE_GRANULARITY(p) (MIN_TIMESLICE * \
+#define TIMESLICE_GRANULARITY(p) (GRANULARITY * \
(1 << (((MAX_BONUS - CURRENT_BONUS(p)) ? : 1) - 1)))
#endif
@@ -153,12 +149,6 @@
(JIFFIES_TO_NS(MAX_SLEEP_AVG * \
(MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1))
-#define HIGH_CREDIT(p) \
- ((p)->interactive_credit > CREDIT_LIMIT)
-
-#define LOW_CREDIT(p) \
- ((p)->interactive_credit < -CREDIT_LIMIT)
-
#define TASK_PREEMPTS_CURR(p, rq) \
((p)->prio < (rq)->curr->prio)
@@ -448,11 +438,21 @@ static runqueue_t *this_rq_lock(void)
return rq;
}
-static inline void rq_unlock(runqueue_t *rq)
- __releases(rq->lock)
+#ifdef CONFIG_SCHED_SMT
+static int cpu_and_siblings_are_idle(int cpu)
{
- spin_unlock_irq(&rq->lock);
+ int sib;
+ for_each_cpu_mask(sib, cpu_sibling_map[cpu]) {
+ if (idle_cpu(sib))
+ continue;
+ return 0;
+ }
+
+ return 1;
}
+#else
+#define cpu_and_siblings_are_idle(A) idle_cpu(A)
+#endif
#ifdef CONFIG_SCHEDSTATS
/*
@@ -581,10 +581,14 @@ static void enqueue_task(struct task_struct *p, prio_array_t *array)
}
/*
- * Used by the migration code - we pull tasks from the head of the
- * remote queue so we want these tasks to show up at the head of the
- * local queue:
+ * Put task to the end of the run list without the overhead of dequeue
+ * followed by enqueue.
*/
+static void requeue_task(struct task_struct *p, prio_array_t *array)
+{
+ list_move_tail(&p->run_list, array->queue + p->prio);
+}
+
static inline void enqueue_task_head(struct task_struct *p, prio_array_t *array)
{
list_add(&p->run_list, array->queue + p->prio);
@@ -663,8 +667,6 @@ static void recalc_task_prio(task_t *p, unsigned long long now)
sleep_time > INTERACTIVE_SLEEP(p)) {
p->sleep_avg = JIFFIES_TO_NS(MAX_SLEEP_AVG -
DEF_TIMESLICE);
- if (!HIGH_CREDIT(p))
- p->interactive_credit++;
} else {
/*
* The lower the sleep avg a task has the more
@@ -673,19 +675,11 @@ static void recalc_task_prio(task_t *p, unsigned long long now)
sleep_time *= (MAX_BONUS - CURRENT_BONUS(p)) ? : 1;
/*
- * Tasks with low interactive_credit are limited to
- * one timeslice worth of sleep avg bonus.
+ * Tasks waking from uninterruptible sleep are
+ * limited in their sleep_avg rise as they
+ * are likely to be waiting on I/O
*/
- if (LOW_CREDIT(p) &&
- sleep_time > JIFFIES_TO_NS(task_timeslice(p)))
- sleep_time = JIFFIES_TO_NS(task_timeslice(p));
-
- /*
- * Non high_credit tasks waking from uninterruptible
- * sleep are limited in their sleep_avg rise as they
- * are likely to be cpu hogs waiting on I/O
- */
- if (p->activated == -1 && !HIGH_CREDIT(p) && p->mm) {
+ if (p->activated == -1 && p->mm) {
if (p->sleep_avg >= INTERACTIVE_SLEEP(p))
sleep_time = 0;
else if (p->sleep_avg + sleep_time >=
@@ -705,11 +699,8 @@ static void recalc_task_prio(task_t *p, unsigned long long now)
*/
p->sleep_avg += sleep_time;
- if (p->sleep_avg > NS_MAX_SLEEP_AVG) {
+ if (p->sleep_avg > NS_MAX_SLEEP_AVG)
p->sleep_avg = NS_MAX_SLEEP_AVG;
- if (!HIGH_CREDIT(p))
- p->interactive_credit++;
- }
}
}
@@ -934,9 +925,10 @@ static inline unsigned long target_load(int cpu)
#endif
/*
- * wake_idle() is useful especially on SMT architectures to wake a
- * task onto an idle sibling if we would otherwise wake it onto a
- * busy sibling.
+ * wake_idle() will wake a task on an idle cpu if task->cpu is
+ * not idle and an idle cpu is available. The span of cpus to
+ * search starts with cpus closest then further out as needed,
+ * so we always favor a closer, idle cpu.
*
* Returns the CPU we should wake onto.
*/
@@ -944,24 +936,23 @@ static inline unsigned long target_load(int cpu)
static int wake_idle(int cpu, task_t *p)
{
cpumask_t tmp;
- runqueue_t *rq = cpu_rq(cpu);
struct sched_domain *sd;
int i;
if (idle_cpu(cpu))
return cpu;
- sd = rq->sd;
- if (!(sd->flags & SD_WAKE_IDLE))
- return cpu;
-
- cpus_and(tmp, sd->span, p->cpus_allowed);
-
- for_each_cpu_mask(i, tmp) {
- if (idle_cpu(i))
- return i;
+ for_each_domain(cpu, sd) {
+ if (sd->flags & SD_WAKE_IDLE) {
+ cpus_and(tmp, sd->span, cpu_online_map);
+ cpus_and(tmp, tmp, p->cpus_allowed);
+ for_each_cpu_mask(i, tmp) {
+ if (idle_cpu(i))
+ return i;
+ }
+ }
+ else break;
}
-
return cpu;
}
#else
@@ -1073,7 +1064,7 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync)
out_set_cpu:
schedstat_inc(rq, ttwu_attempts);
new_cpu = wake_idle(new_cpu, p);
- if (new_cpu != cpu && cpu_isset(new_cpu, p->cpus_allowed)) {
+ if (new_cpu != cpu) {
schedstat_inc(rq, ttwu_moved);
set_task_cpu(p, new_cpu);
task_rq_unlock(rq, &flags);
@@ -1126,7 +1117,7 @@ out:
int fastcall wake_up_process(task_t * p)
{
return try_to_wake_up(p, TASK_STOPPED | TASK_TRACED |
- TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
+ TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE, 0);
}
EXPORT_SYMBOL(wake_up_process);
@@ -1191,7 +1182,7 @@ void fastcall sched_fork(task_t *p)
*/
current->time_slice = 1;
preempt_disable();
- scheduler_tick(0, 0);
+ scheduler_tick();
local_irq_enable();
preempt_enable();
} else
@@ -1227,8 +1218,6 @@ void fastcall wake_up_new_task(task_t * p, unsigned long clone_flags)
p->sleep_avg = JIFFIES_TO_NS(CURRENT_BONUS(p) *
CHILD_PENALTY / 100 * MAX_SLEEP_AVG / MAX_BONUS);
- p->interactive_credit = 0;
-
p->prio = effective_prio(p);
if (likely(cpu == this_cpu)) {
@@ -1667,13 +1656,18 @@ int can_migrate_task(task_t *p, runqueue_t *rq, int this_cpu,
if (!cpu_isset(this_cpu, p->cpus_allowed))
return 0;
- /* Aggressive migration if we've failed balancing */
- if (idle == NEWLY_IDLE ||
- sd->nr_balance_failed < sd->cache_nice_tries) {
- if (task_hot(p, rq->timestamp_last_tick, sd))
- return 0;
- }
+ /*
+ * Aggressive migration if:
+ * 1) the [whole] cpu is idle, or
+ * 2) too many balance attempts have failed.
+ */
+
+ if (cpu_and_siblings_are_idle(this_cpu) || \
+ sd->nr_balance_failed > sd->cache_nice_tries)
+ return 1;
+ if (task_hot(p, rq->timestamp_last_tick, sd))
+ return 0;
return 1;
}
@@ -2009,7 +2003,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
if (sd->balance_interval < sd->max_interval)
sd->balance_interval++;
} else {
- sd->nr_balance_failed = 0;
+ sd->nr_balance_failed = 0;
/* We were unbalanced, so reset the balancing interval */
sd->balance_interval = sd->min_interval;
@@ -2088,23 +2082,6 @@ static inline void idle_balance(int this_cpu, runqueue_t *this_rq)
}
}
-#ifdef CONFIG_SCHED_SMT
-static int cpu_and_siblings_are_idle(int cpu)
-{
- int sib;
- for_each_cpu_mask(sib, cpu_sibling_map[cpu]) {
- if (idle_cpu(sib))
- continue;
- return 0;
- }
-
- return 1;
-}
-#else
-#define cpu_and_siblings_are_idle(A) idle_cpu(A)
-#endif
-
-
/*
* active_load_balance is run by migration threads. It pushes running tasks
* off the busiest CPU onto idle CPUs. It requires at least 1 task to be
@@ -2117,7 +2094,9 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
{
struct sched_domain *sd;
struct sched_group *cpu_group;
+ runqueue_t *target_rq;
cpumask_t visited_cpus;
+ int cpu;
schedstat_inc(busiest_rq, alb_cnt);
/*
@@ -2126,46 +2105,43 @@ static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
*/
visited_cpus = CPU_MASK_NONE;
for_each_domain(busiest_cpu, sd) {
- if (!(sd->flags & SD_LOAD_BALANCE) || busiest_rq->nr_running <= 1)
- break; /* no more domains to search or no more tasks to move */
+ if (!(sd->flags & SD_LOAD_BALANCE))
+ /* no more domains to search */
+ break;
cpu_group = sd->groups;
- do { /* sched_groups should either use list_heads or be merged into the domains structure */
- int cpu, target_cpu = -1;
- runqueue_t *target_rq;
-
+ do {
for_each_cpu_mask(cpu, cpu_group->cpumask) {
- if (cpu_isset(cpu, visited_cpus) || cpu == busiest_cpu ||
- !cpu_and_siblings_are_idle(cpu)) {
- cpu_set(cpu, visited_cpus);
+ if (busiest_rq->nr_running <= 1)
+ /* no more tasks left to move */
+ return;
+ if (cpu_isset(cpu, visited_cpus))
+ continue;
+ cpu_set(cpu, visited_cpus);
+ if (!cpu_and_siblings_are_idle(cpu) || cpu == busiest_cpu)
continue;
- }
- target_cpu = cpu;
- break;
- }
- if (target_cpu == -1)
- goto next_group; /* failed to find a suitable target cpu in this domain */
-
- target_rq = cpu_rq(target_cpu);
- /*
- * This condition is "impossible", if it occurs we need to fix it
- * Reported by Bjorn Helgaas on a 128-cpu setup.
- */
- BUG_ON(busiest_rq == target_rq);
-
- /* move a task from busiest_rq to target_rq */
- double_lock_balance(busiest_rq, target_rq);
- if (move_tasks(target_rq, target_cpu, busiest_rq, 1, sd, SCHED_IDLE)) {
- schedstat_inc(busiest_rq, alb_lost);
- schedstat_inc(target_rq, alb_gained);
- } else {
- schedstat_inc(busiest_rq, alb_failed);
+ target_rq = cpu_rq(cpu);
+ /*
+ * This condition is "impossible", if it occurs
+ * we need to fix it. Originally reported by
+ * Bjorn Helgaas on a 128-cpu setup.
+ */
+ BUG_ON(busiest_rq == target_rq);
+
+ /* move a task from busiest_rq to target_rq */
+ double_lock_balance(busiest_rq, target_rq);
+ if (move_tasks(target_rq, cpu, busiest_rq,
+ 1, sd, SCHED_IDLE)) {
+ schedstat_inc(busiest_rq, alb_lost);
+ schedstat_inc(target_rq, alb_gained);
+ } else {
+ schedstat_inc(busiest_rq, alb_failed);
+ }
+ spin_unlock(&target_rq->lock);
}
- spin_unlock(&target_rq->lock);
-next_group:
cpu_group = cpu_group->next;
- } while (cpu_group != sd->groups && busiest_rq->nr_running > 1);
+ } while (cpu_group != sd->groups);
}
}
@@ -2275,48 +2251,172 @@ EXPORT_PER_CPU_SYMBOL(kstat);
((rq)->curr->static_prio > (rq)->best_expired_prio))
/*
+ * Do the virtual cpu time signal calculations.
+ * @p: the process that the cpu time gets accounted to
+ * @cputime: the cpu time spent in user space since the last update
+ */
+static inline void account_it_virt(struct task_struct * p, cputime_t cputime)
+{
+ cputime_t it_virt = p->it_virt_value;
+
+ if (cputime_gt(it_virt, cputime_zero) &&
+ cputime_gt(cputime, cputime_zero)) {
+ if (cputime_ge(cputime, it_virt)) {
+ it_virt = cputime_add(it_virt, p->it_virt_incr);
+ send_sig(SIGVTALRM, p, 1);
+ }
+ it_virt = cputime_sub(it_virt, cputime);
+ p->it_virt_value = it_virt;
+ }
+}
+
+/*
+ * Do the virtual profiling signal calculations.
+ * @p: the process that the cpu time gets accounted to
+ * @cputime: the cpu time spent in user and kernel space since the last update
+ */
+static void account_it_prof(struct task_struct *p, cputime_t cputime)
+{
+ cputime_t it_prof = p->it_prof_value;
+
+ if (cputime_gt(it_prof, cputime_zero) &&
+ cputime_gt(cputime, cputime_zero)) {
+ if (cputime_ge(cputime, it_prof)) {
+ it_prof = cputime_add(it_prof, p->it_prof_incr);
+ send_sig(SIGPROF, p, 1);
+ }
+ it_prof = cputime_sub(it_prof, cputime);
+ p->it_prof_value = it_prof;
+ }
+}
+
+/*
+ * Check if the process went over its cputime resource limit after
+ * some cpu time got added to utime/stime.
+ * @p: the process that the cpu time gets accounted to
+ * @cputime: the cpu time spent in user and kernel space since the last update
+ */
+static void check_rlimit(struct task_struct *p, cputime_t cputime)
+{
+ cputime_t total, tmp;
+
+ total = cputime_add(p->utime, p->stime);
+ tmp = jiffies_to_cputime(p->signal->rlim[RLIMIT_CPU].rlim_cur);
+ if (unlikely(cputime_gt(total, tmp))) {
+ /* Send SIGXCPU every second. */
+ tmp = cputime_sub(total, cputime);
+ if (cputime_to_secs(tmp) < cputime_to_secs(total))
+ send_sig(SIGXCPU, p, 1);
+ /* and SIGKILL when we go over max.. */
+ tmp = jiffies_to_cputime(p->signal->rlim[RLIMIT_CPU].rlim_max);
+ if (cputime_gt(total, tmp))
+ send_sig(SIGKILL, p, 1);
+ }
+}
+
+/*
+ * Account user cpu time to a process.
+ * @p: the process that the cpu time gets accounted to
+ * @hardirq_offset: the offset to subtract from hardirq_count()
+ * @cputime: the cpu time spent in user space since the last update
+ */
+void account_user_time(struct task_struct *p, cputime_t cputime)
+{
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ cputime64_t tmp;
+
+ p->utime = cputime_add(p->utime, cputime);
+
+ /* Check for signals (SIGVTALRM, SIGPROF, SIGXCPU & SIGKILL). */
+ check_rlimit(p, cputime);
+ account_it_virt(p, cputime);
+ account_it_prof(p, cputime);
+
+ /* Add user time to cpustat. */
+ tmp = cputime_to_cputime64(cputime);
+ if (TASK_NICE(p) > 0)
+ cpustat->nice = cputime64_add(cpustat->nice, tmp);
+ else
+ cpustat->user = cputime64_add(cpustat->user, tmp);
+}
+
+/*
+ * Account system cpu time to a process.
+ * @p: the process that the cpu time gets accounted to
+ * @hardirq_offset: the offset to subtract from hardirq_count()
+ * @cputime: the cpu time spent in kernel space since the last update
+ */
+void account_system_time(struct task_struct *p, int hardirq_offset,
+ cputime_t cputime)
+{
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ runqueue_t *rq = this_rq();
+ cputime64_t tmp;
+
+ p->stime = cputime_add(p->stime, cputime);
+
+ /* Check for signals (SIGPROF, SIGXCPU & SIGKILL). */
+ if (likely(p->signal && p->exit_state < EXIT_ZOMBIE)) {
+ check_rlimit(p, cputime);
+ account_it_prof(p, cputime);
+ }
+
+ /* Add system time to cpustat. */
+ tmp = cputime_to_cputime64(cputime);
+ if (hardirq_count() - hardirq_offset)
+ cpustat->irq = cputime64_add(cpustat->irq, tmp);
+ else if (softirq_count())
+ cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+ else if (p != rq->idle)
+ cpustat->system = cputime64_add(cpustat->system, tmp);
+ else if (atomic_read(&rq->nr_iowait) > 0)
+ cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
+ else
+ cpustat->idle = cputime64_add(cpustat->idle, tmp);
+}
+
+/*
+ * Account for involuntary wait time.
+ * @p: the process from which the cpu time has been stolen
+ * @steal: the cpu time spent in involuntary wait
+ */
+void account_steal_time(struct task_struct *p, cputime_t steal)
+{
+ struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ cputime64_t tmp = cputime_to_cputime64(steal);
+ runqueue_t *rq = this_rq();
+
+ if (p == rq->idle) {
+ p->stime = cputime_add(p->stime, steal);
+ if (atomic_read(&rq->nr_iowait) > 0)
+ cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
+ else
+ cpustat->idle = cputime64_add(cpustat->idle, tmp);
+ } else
+ cpustat->steal = cputime64_add(cpustat->steal, tmp);
+}
+
+/*
* This function gets called by the timer code, with HZ frequency.
* We call it with interrupts disabled.
*
* It also gets called by the fork code, when changing the parent's
* timeslices.
*/
-void scheduler_tick(int user_ticks, int sys_ticks)
+void scheduler_tick(void)
{
int cpu = smp_processor_id();
- struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
runqueue_t *rq = this_rq();
task_t *p = current;
rq->timestamp_last_tick = sched_clock();
- if (rcu_pending(cpu))
- rcu_check_callbacks(cpu, user_ticks);
-
- /* note: this timer irq context must be accounted for as well */
- if (hardirq_count() - HARDIRQ_OFFSET) {
- cpustat->irq += sys_ticks;
- sys_ticks = 0;
- } else if (softirq_count()) {
- cpustat->softirq += sys_ticks;
- sys_ticks = 0;
- }
-
if (p == rq->idle) {
- if (atomic_read(&rq->nr_iowait) > 0)
- cpustat->iowait += sys_ticks;
- else
- cpustat->idle += sys_ticks;
if (wake_priority_sleeper(rq))
goto out;
rebalance_tick(cpu, rq, SCHED_IDLE);
return;
}
- if (TASK_NICE(p) > 0)
- cpustat->nice += user_ticks;
- else
- cpustat->user += user_ticks;
- cpustat->system += sys_ticks;
/* Task might have expired already, but not scheduled off yet */
if (p->array != rq->active) {
@@ -2342,8 +2442,7 @@ void scheduler_tick(int user_ticks, int sys_ticks)
set_tsk_need_resched(p);
/* put it at the end of the queue: */
- dequeue_task(p, rq->active);
- enqueue_task(p, rq->active);
+ requeue_task(p, rq->active);
}
goto out_unlock;
}
@@ -2384,10 +2483,8 @@ void scheduler_tick(int user_ticks, int sys_ticks)
(p->time_slice >= TIMESLICE_GRANULARITY(p)) &&
(p->array == rq->active)) {
- dequeue_task(p, rq->active);
+ requeue_task(p, rq->active);
set_tsk_need_resched(p);
- p->prio = effective_prio(p);
- enqueue_task(p, rq->active);
}
}
out_unlock:
@@ -2521,6 +2618,38 @@ static inline int dependent_sleeper(int this_cpu, runqueue_t *this_rq)
}
#endif
+#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
+
+void fastcall add_preempt_count(int val)
+{
+ /*
+ * Underflow?
+ */
+ BUG_ON(((int)preempt_count() < 0));
+ preempt_count() += val;
+ /*
+ * Spinlock count overflowing soon?
+ */
+ BUG_ON((preempt_count() & PREEMPT_MASK) >= PREEMPT_MASK-10);
+}
+EXPORT_SYMBOL(add_preempt_count);
+
+void fastcall sub_preempt_count(int val)
+{
+ /*
+ * Underflow?
+ */
+ BUG_ON(val > preempt_count());
+ /*
+ * Is the spinlock portion underflowing?
+ */
+ BUG_ON((val < PREEMPT_MASK) && !(preempt_count() & PREEMPT_MASK));
+ preempt_count() -= val;
+}
+EXPORT_SYMBOL(sub_preempt_count);
+
+#endif
+
/*
* schedule() is the main scheduler function.
*/
@@ -2540,7 +2669,7 @@ asmlinkage void __sched schedule(void)
* schedule() atomically, we ignore that path for now.
* Otherwise, whine if we are scheduling when we should not be.
*/
- if (likely(!(current->exit_state & (EXIT_DEAD | EXIT_ZOMBIE)))) {
+ if (likely(!current->exit_state)) {
if (unlikely(in_atomic())) {
printk(KERN_ERR "scheduling while atomic: "
"%s/0x%08x/%d\n",
@@ -2561,7 +2690,7 @@ need_resched_nonpreemptible:
* The idle thread is not allowed to schedule!
* Remove this check after it has been exercised a bit.
*/
- if (unlikely(current == rq->idle) && current->state != TASK_RUNNING) {
+ if (unlikely(prev == rq->idle) && prev->state != TASK_RUNNING) {
printk(KERN_ERR "bad: scheduling from the idle thread!\n");
dump_stack();
}
@@ -2574,21 +2703,16 @@ need_resched_nonpreemptible:
run_time = NS_MAX_SLEEP_AVG;
/*
- * Tasks with interactive credits get charged less run_time
- * at high sleep_avg to delay them losing their interactive
- * status
+ * Tasks charged proportionately less run_time at high sleep_avg to
+ * delay them losing their interactive status
*/
- if (HIGH_CREDIT(prev))
- run_time /= (CURRENT_BONUS(prev) ? : 1);
+ run_time /= (CURRENT_BONUS(prev) ? : 1);
spin_lock_irq(&rq->lock);
- if (unlikely(current->flags & PF_DEAD))
- current->state = EXIT_DEAD;
- /*
- * if entering off of a kernel preemption go straight
- * to picking the next task.
- */
+ if (unlikely(prev->flags & PF_DEAD))
+ prev->state = EXIT_DEAD;
+
switch_count = &prev->nivcsw;
if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
switch_count = &prev->nvcsw;
@@ -2670,11 +2794,8 @@ switch_tasks:
rcu_qsctr_inc(task_cpu(prev));
prev->sleep_avg -= run_time;
- if ((long)prev->sleep_avg <= 0) {
+ if ((long)prev->sleep_avg <= 0)
prev->sleep_avg = 0;
- if (!(HIGH_CREDIT(prev) || LOW_CREDIT(prev)))
- prev->interactive_credit--;
- }
prev->timestamp = prev->last_ran = now;
sched_info_switch(prev, next);
@@ -2711,7 +2832,10 @@ EXPORT_SYMBOL(schedule);
asmlinkage void __sched preempt_schedule(void)
{
struct thread_info *ti = current_thread_info();
-
+#ifdef CONFIG_PREEMPT_BKL
+ struct task_struct *task = current;
+ int saved_lock_depth;
+#endif
/*
* If there is a non-zero preempt_count or interrupts are disabled,
* we do not want to preempt the current task. Just return..
@@ -2720,9 +2844,21 @@ asmlinkage void __sched preempt_schedule(void)
return;
need_resched:
- ti->preempt_count = PREEMPT_ACTIVE;
+ add_preempt_count(PREEMPT_ACTIVE);
+ /*
+ * We keep the big kernel semaphore locked, but we
+ * clear ->lock_depth so that schedule() doesnt
+ * auto-release the semaphore:
+ */
+#ifdef CONFIG_PREEMPT_BKL
+ saved_lock_depth = task->lock_depth;
+ task->lock_depth = -1;
+#endif
schedule();
- ti->preempt_count = 0;
+#ifdef CONFIG_PREEMPT_BKL
+ task->lock_depth = saved_lock_depth;
+#endif
+ sub_preempt_count(PREEMPT_ACTIVE);
/* we could miss a preemption opportunity between schedule and now */
barrier();
@@ -2955,7 +3091,7 @@ void set_user_nice(task_t *p, long nice)
*/
rq = task_rq_lock(p, &flags);
/*
- * The RT priorities are set via setscheduler(), but we still
+ * The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
* it wont have any effect on scheduling until the task is
* not SCHED_NORMAL:
@@ -3055,6 +3191,15 @@ int task_nice(const task_t *p)
return TASK_NICE(p);
}
+/*
+ * The only users of task_nice are binfmt_elf and binfmt_elf32.
+ * binfmt_elf is no longer modular, but binfmt_elf32 still is.
+ * Therefore, task_nice is needed if there is a compat_mode.
+ */
+#ifdef CONFIG_COMPAT
+EXPORT_SYMBOL_GPL(task_nice);
+#endif
+
/**
* idle_cpu - is a given cpu idle currently?
* @cpu: the processor in question.
@@ -3096,67 +3241,48 @@ static void __setscheduler(struct task_struct *p, int policy, int prio)
p->prio = p->static_prio;
}
-/*
- * setscheduler - change the scheduling policy and/or RT priority of a thread.
+/**
+ * sched_setscheduler - change the scheduling policy and/or RT priority of
+ * a thread.
+ * @p: the task in question.
+ * @policy: new policy.
+ * @param: structure containing the new RT priority.
*/
-static int setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+int sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param)
{
- struct sched_param lp;
- int retval = -EINVAL;
+ int retval;
int oldprio, oldpolicy = -1;
prio_array_t *array;
unsigned long flags;
runqueue_t *rq;
- task_t *p;
-
- if (!param || pid < 0)
- goto out_nounlock;
- retval = -EFAULT;
- if (copy_from_user(&lp, param, sizeof(struct sched_param)))
- goto out_nounlock;
-
- /*
- * We play safe to avoid deadlocks.
- */
- read_lock_irq(&tasklist_lock);
-
- p = find_process_by_pid(pid);
-
- retval = -ESRCH;
- if (!p)
- goto out_unlock;
recheck:
/* double check policy once rq lock held */
if (policy < 0)
policy = oldpolicy = p->policy;
- else {
- retval = -EINVAL;
- if (policy != SCHED_FIFO && policy != SCHED_RR &&
+ else if (policy != SCHED_FIFO && policy != SCHED_RR &&
policy != SCHED_NORMAL)
- goto out_unlock;
- }
+ return -EINVAL;
/*
* Valid priorities for SCHED_FIFO and SCHED_RR are
* 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL is 0.
*/
- retval = -EINVAL;
- if (lp.sched_priority < 0 || lp.sched_priority > MAX_USER_RT_PRIO-1)
- goto out_unlock;
- if ((policy == SCHED_NORMAL) != (lp.sched_priority == 0))
- goto out_unlock;
+ if (param->sched_priority < 0 ||
+ param->sched_priority > MAX_USER_RT_PRIO-1)
+ return -EINVAL;
+ if ((policy == SCHED_NORMAL) != (param->sched_priority == 0))
+ return -EINVAL;
- retval = -EPERM;
if ((policy == SCHED_FIFO || policy == SCHED_RR) &&
!capable(CAP_SYS_NICE))
- goto out_unlock;
+ return -EPERM;
if ((current->euid != p->euid) && (current->euid != p->uid) &&
!capable(CAP_SYS_NICE))
- goto out_unlock;
+ return -EPERM;
- retval = security_task_setscheduler(p, policy, &lp);
+ retval = security_task_setscheduler(p, policy, param);
if (retval)
- goto out_unlock;
+ return retval;
/*
* To be able to change p->policy safely, the apropriate
* runqueue lock must be held.
@@ -3170,12 +3296,11 @@ recheck:
}
array = p->array;
if (array)
- deactivate_task(p, task_rq(p));
- retval = 0;
+ deactivate_task(p, rq);
oldprio = p->prio;
- __setscheduler(p, policy, lp.sched_priority);
+ __setscheduler(p, policy, param->sched_priority);
if (array) {
- __activate_task(p, task_rq(p));
+ __activate_task(p, rq);
/*
* Reschedule if we are currently running on this runqueue and
* our priority decreased, or if we are not currently running on
@@ -3188,22 +3313,41 @@ recheck:
resched_task(rq->curr);
}
task_rq_unlock(rq, &flags);
-out_unlock:
+ return 0;
+}
+EXPORT_SYMBOL_GPL(sched_setscheduler);
+
+static int do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
+{
+ int retval;
+ struct sched_param lparam;
+ struct task_struct *p;
+
+ if (!param || pid < 0)
+ return -EINVAL;
+ if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
+ return -EFAULT;
+ read_lock_irq(&tasklist_lock);
+ p = find_process_by_pid(pid);
+ if (!p) {
+ read_unlock_irq(&tasklist_lock);
+ return -ESRCH;
+ }
+ retval = sched_setscheduler(p, policy, &lparam);
read_unlock_irq(&tasklist_lock);
-out_nounlock:
return retval;
}
/**
* sys_sched_setscheduler - set/change the scheduler policy and RT priority
* @pid: the pid in question.
- * @policy: new policy
+ * @policy: new policy.
* @param: structure containing the new RT priority.
*/
asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
struct sched_param __user *param)
{
- return setscheduler(pid, policy, param);
+ return do_sched_setscheduler(pid, policy, param);
}
/**
@@ -3213,7 +3357,7 @@ asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
*/
asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param)
{
- return setscheduler(pid, -1, param);
+ return do_sched_setscheduler(pid, -1, param);
}
/**
@@ -3444,8 +3588,14 @@ asmlinkage long sys_sched_yield(void)
} else if (!rq->expired->nr_active)
schedstat_inc(rq, yld_exp_empty);
- dequeue_task(current, array);
- enqueue_task(current, target);
+ if (array != target) {
+ dequeue_task(current, array);
+ enqueue_task(current, target);
+ } else
+ /*
+ * requeue_task is cheaper so perform that if possible.
+ */
+ requeue_task(current, array);
/*
* Since we are going to call schedule() anyway, there's
@@ -3460,13 +3610,71 @@ asmlinkage long sys_sched_yield(void)
return 0;
}
-void __sched __cond_resched(void)
+static inline void __cond_resched(void)
{
- set_current_state(TASK_RUNNING);
- schedule();
+ do {
+ add_preempt_count(PREEMPT_ACTIVE);
+ schedule();
+ sub_preempt_count(PREEMPT_ACTIVE);
+ } while (need_resched());
+}
+
+int __sched cond_resched(void)
+{
+ if (need_resched()) {
+ __cond_resched();
+ return 1;
+ }
+ return 0;
+}
+
+EXPORT_SYMBOL(cond_resched);
+
+/*
+ * cond_resched_lock() - if a reschedule is pending, drop the given lock,
+ * call schedule, and on return reacquire the lock.
+ *
+ * This works OK both with and without CONFIG_PREEMPT. We do strange low-level
+ * operations here to prevent schedule() from being called twice (once via
+ * spin_unlock(), once by hand).
+ */
+int cond_resched_lock(spinlock_t * lock)
+{
+#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT)
+ if (lock->break_lock) {
+ lock->break_lock = 0;
+ spin_unlock(lock);
+ cpu_relax();
+ spin_lock(lock);
+ }
+#endif
+ if (need_resched()) {
+ _raw_spin_unlock(lock);
+ preempt_enable_no_resched();
+ __cond_resched();
+ spin_lock(lock);
+ return 1;
+ }
+ return 0;
+}
+
+EXPORT_SYMBOL(cond_resched_lock);
+
+int __sched cond_resched_softirq(void)
+{
+ BUG_ON(!in_softirq());
+
+ if (need_resched()) {
+ __local_bh_enable();
+ __cond_resched();
+ local_bh_disable();
+ return 1;
+ }
+ return 0;
}
-EXPORT_SYMBOL(__cond_resched);
+EXPORT_SYMBOL(cond_resched_softirq);
+
/**
* yield - yield the current processor to other threads.
@@ -3491,7 +3699,7 @@ EXPORT_SYMBOL(yield);
*/
void __sched io_schedule(void)
{
- struct runqueue *rq = this_rq();
+ struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());
atomic_inc(&rq->nr_iowait);
schedule();
@@ -3502,7 +3710,7 @@ EXPORT_SYMBOL(io_schedule);
long __sched io_schedule_timeout(long timeout)
{
- struct runqueue *rq = this_rq();
+ struct runqueue *rq = &per_cpu(runqueues, _smp_processor_id());
long ret;
atomic_inc(&rq->nr_iowait);
@@ -3699,7 +3907,6 @@ void __devinit init_idle(task_t *idle, int cpu)
unsigned long flags;
idle->sleep_avg = 0;
- idle->interactive_credit = 0;
idle->array = NULL;
idle->prio = MAX_PRIO;
idle->state = TASK_RUNNING;
@@ -3711,7 +3918,7 @@ void __devinit init_idle(task_t *idle, int cpu)
spin_unlock_irqrestore(&rq->lock, flags);
/* Set the preempt count _outside_ the spinlocks! */
-#ifdef CONFIG_PREEMPT
+#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_BKL)
idle->thread_info->preempt_count = (idle->lock_depth >= 0);
#else
idle->thread_info->preempt_count = 0;
@@ -3879,8 +4086,7 @@ static int migration_thread(void * data)
if (req->type == REQ_MOVE_TASK) {
spin_unlock(&rq->lock);
- __migrate_task(req->task, smp_processor_id(),
- req->dest_cpu);
+ __migrate_task(req->task, cpu, req->dest_cpu);
local_irq_enable();
} else if (req->type == REQ_SET_DOMAIN) {
rq->sd = req->sd;
@@ -4004,6 +4210,20 @@ void sched_idle_next(void)
spin_unlock_irqrestore(&rq->lock, flags);
}
+/* Ensures that the idle task is using init_mm right before its cpu goes
+ * offline.
+ */
+void idle_task_exit(void)
+{
+ struct mm_struct *mm = current->active_mm;
+
+ BUG_ON(cpu_online(smp_processor_id()));
+
+ if (mm != &init_mm)
+ switch_mm(mm, &init_mm, current);
+ mmdrop(mm);
+}
+
static void migrate_dead(unsigned int dead_cpu, task_t *tsk)
{
struct runqueue *rq = cpu_rq(dead_cpu);
@@ -4136,6 +4356,94 @@ int __init migration_init(void)
#endif
#ifdef CONFIG_SMP
+#define SCHED_DOMAIN_DEBUG
+#ifdef SCHED_DOMAIN_DEBUG
+static void sched_domain_debug(struct sched_domain *sd, int cpu)
+{
+ int level = 0;
+
+ printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
+
+ do {
+ int i;
+ char str[NR_CPUS];
+ struct sched_group *group = sd->groups;
+ cpumask_t groupmask;
+
+ cpumask_scnprintf(str, NR_CPUS, sd->span);
+ cpus_clear(groupmask);
+
+ printk(KERN_DEBUG);
+ for (i = 0; i < level + 1; i++)
+ printk(" ");
+ printk("domain %d: ", level);
+
+ if (!(sd->flags & SD_LOAD_BALANCE)) {
+ printk("does not load-balance\n");
+ if (sd->parent)
+ printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent");
+ break;
+ }
+
+ printk("span %s\n", str);
+
+ if (!cpu_isset(cpu, sd->span))
+ printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);
+ if (!cpu_isset(cpu, group->cpumask))
+ printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);
+
+ printk(KERN_DEBUG);
+ for (i = 0; i < level + 2; i++)
+ printk(" ");
+ printk("groups:");
+ do {
+ if (!group) {
+ printk("\n");
+ printk(KERN_ERR "ERROR: group is NULL\n");
+ break;
+ }
+
+ if (!group->cpu_power) {
+ printk("\n");
+ printk(KERN_ERR "ERROR: domain->cpu_power not set\n");
+ }
+
+ if (!cpus_weight(group->cpumask)) {
+ printk("\n");
+ printk(KERN_ERR "ERROR: empty group\n");
+ }
+
+ if (cpus_intersects(groupmask, group->cpumask)) {
+ printk("\n");
+ printk(KERN_ERR "ERROR: repeated CPUs\n");
+ }
+
+ cpus_or(groupmask, groupmask, group->cpumask);
+
+ cpumask_scnprintf(str, NR_CPUS, group->cpumask);
+ printk(" %s", str);
+
+ group = group->next;
+ } while (group != sd->groups);
+ printk("\n");
+
+ if (!cpus_equal(sd->span, groupmask))
+ printk(KERN_ERR "ERROR: groups don't span domain->span\n");
+
+ level++;
+ sd = sd->parent;
+
+ if (sd) {
+ if (!cpus_subset(groupmask, sd->span))
+ printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");
+ }
+
+ } while (sd);
+}
+#else
+#define sched_domain_debug(sd, cpu) {}
+#endif
+
/*
* Attach the domain 'sd' to 'cpu' as its base domain. Callers must
* hold the hotplug lock.
@@ -4147,6 +4455,8 @@ void __devinit cpu_attach_domain(struct sched_domain *sd, int cpu)
runqueue_t *rq = cpu_rq(cpu);
int local = 1;
+ sched_domain_debug(sd, cpu);
+
spin_lock_irqsave(&rq->lock, flags);
if (cpu == smp_processor_id() || !cpu_online(cpu)) {
@@ -4178,7 +4488,8 @@ static int __init isolated_cpu_setup(char *str)
str = get_options(str, ARRAY_SIZE(ints), ints);
cpus_clear(cpu_isolated_map);
for (i = 1; i <= ints[0]; i++)
- cpu_set(ints[i], cpu_isolated_map);
+ if (ints[i] < NR_CPUS)
+ cpu_set(ints[i], cpu_isolated_map);
return 1;
}
@@ -4421,96 +4732,6 @@ static void __devinit arch_destroy_sched_domains(void)
#endif /* ARCH_HAS_SCHED_DOMAIN */
-#define SCHED_DOMAIN_DEBUG
-#ifdef SCHED_DOMAIN_DEBUG
-static void sched_domain_debug(void)
-{
- int i;
-
- for_each_online_cpu(i) {
- runqueue_t *rq = cpu_rq(i);
- struct sched_domain *sd;
- int level = 0;
-
- sd = rq->sd;
-
- printk(KERN_DEBUG "CPU%d:\n", i);
-
- do {
- int j;
- char str[NR_CPUS];
- struct sched_group *group = sd->groups;
- cpumask_t groupmask;
-
- cpumask_scnprintf(str, NR_CPUS, sd->span);
- cpus_clear(groupmask);
-
- printk(KERN_DEBUG);
- for (j = 0; j < level + 1; j++)
- printk(" ");
- printk("domain %d: ", level);
-
- if (!(sd->flags & SD_LOAD_BALANCE)) {
- printk("does not load-balance");
- if (sd->parent)
- printk(" ERROR !SD_LOAD_BALANCE domain has parent");
- printk("\n");
- break;
- }
-
- printk("span %s\n", str);
-
- if (!cpu_isset(i, sd->span))
- printk(KERN_DEBUG "ERROR domain->span does not contain CPU%d\n", i);
- if (!cpu_isset(i, group->cpumask))
- printk(KERN_DEBUG "ERROR domain->groups does not contain CPU%d\n", i);
-
- printk(KERN_DEBUG);
- for (j = 0; j < level + 2; j++)
- printk(" ");
- printk("groups:");
- do {
- if (!group) {
- printk(" ERROR: NULL");
- break;
- }
-
- if (!group->cpu_power)
- printk(KERN_DEBUG "ERROR group->cpu_power not set\n");
-
- if (!cpus_weight(group->cpumask))
- printk(" ERROR empty group:");
-
- if (cpus_intersects(groupmask, group->cpumask))
- printk(" ERROR repeated CPUs:");
-
- cpus_or(groupmask, groupmask, group->cpumask);
-
- cpumask_scnprintf(str, NR_CPUS, group->cpumask);
- printk(" %s", str);
-
- group = group->next;
- } while (group != sd->groups);
- printk("\n");
-
- if (!cpus_equal(sd->span, groupmask))
- printk(KERN_DEBUG "ERROR groups don't span domain->span\n");
-
- level++;
- sd = sd->parent;
-
- if (sd) {
- if (!cpus_subset(groupmask, sd->span))
- printk(KERN_DEBUG "ERROR parent span is not a superset of domain->span\n");
- }
-
- } while (sd);
- }
-}
-#else
-#define sched_domain_debug() {}
-#endif
-
/*
* Initial dummy domain for early boot and for hotplug cpu. Being static,
* it is initialized to zero, so all balancing flags are cleared which is
@@ -4553,8 +4774,6 @@ static int update_sched_domains(struct notifier_block *nfb,
/* The hotplug lock is already held by cpu_up/cpu_down */
arch_init_sched_domains();
- sched_domain_debug();
-
return NOTIFY_OK;
}
#endif
@@ -4563,7 +4782,6 @@ void __init sched_init_smp(void)
{
lock_cpu_hotplug();
arch_init_sched_domains();
- sched_domain_debug();
unlock_cpu_hotplug();
/* XXX: Theoretical race here - CPU may be hotplugged now */
hotcpu_notifier(update_sched_domains, 0);
@@ -4640,7 +4858,7 @@ void __might_sleep(char *file, int line)
static unsigned long prev_jiffy; /* ratelimiting */
if ((in_atomic() || irqs_disabled()) &&
- system_state == SYSTEM_RUNNING) {
+ system_state == SYSTEM_RUNNING && !oops_in_progress) {
if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
return;
prev_jiffy = jiffies;
diff --git a/kernel/signal.c b/kernel/signal.c
index cc50f2504365..d98e9624ea30 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -153,11 +153,6 @@ static kmem_cache_t *sigqueue_cachep;
(!T(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \
(t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL)
-#define sig_avoid_stop_race() \
- (sigtestsetmask(&current->pending.signal, M(SIGCONT) | M(SIGKILL)) || \
- sigtestsetmask(&current->signal->shared_pending.signal, \
- M(SIGCONT) | M(SIGKILL)))
-
static int sig_ignored(struct task_struct *t, int sig)
{
void __user * handler;
@@ -380,8 +375,8 @@ void __exit_signal(struct task_struct *tsk)
* We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct.
*/
- sig->utime += tsk->utime;
- sig->stime += tsk->stime;
+ sig->utime = cputime_add(sig->utime, tsk->utime);
+ sig->stime = cputime_add(sig->stime, tsk->stime);
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
@@ -551,6 +546,21 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
if (!signr)
signr = __dequeue_signal(&tsk->signal->shared_pending,
mask, info);
+ if (signr && unlikely(sig_kernel_stop(signr))) {
+ /*
+ * Set a marker that we have dequeued a stop signal. Our
+ * caller might release the siglock and then the pending
+ * stop signal it is about to process is no longer in the
+ * pending bitmasks, but must still be cleared by a SIGCONT
+ * (and overruled by a SIGKILL). So those cases clear this
+ * shared flag after we've set it. Note that this flag may
+ * remain set after the signal we return is ignored or
+ * handled. That doesn't matter because its only purpose
+ * is to alert stop-signal processing code when another
+ * processor has come along and cleared the flag.
+ */
+ tsk->signal->flags |= SIGNAL_STOP_DEQUEUED;
+ }
if ( signr &&
((info->si_code & __SI_MASK) == __SI_TIMER) &&
info->si_sys_private){
@@ -577,15 +587,15 @@ void signal_wake_up(struct task_struct *t, int resume)
set_tsk_thread_flag(t, TIF_SIGPENDING);
/*
- * If resume is set, we want to wake it up in the TASK_STOPPED case.
- * We don't check for TASK_STOPPED because there is a race with it
+ * For SIGKILL, we want to wake it up in the stopped/traced case.
+ * We don't check t->state here because there is a race with it
* executing another processor and just now entering stopped state.
- * By calling wake_up_process any time resume is set, we ensure
- * the process will wake up and handle its stop or death signal.
+ * By using wake_up_state, we ensure the process will wake up and
+ * handle its death signal.
*/
mask = TASK_INTERRUPTIBLE;
if (resume)
- mask |= TASK_STOPPED;
+ mask |= TASK_STOPPED | TASK_TRACED;
if (!wake_up_state(t, mask))
kick_process(t);
}
@@ -651,6 +661,12 @@ static void handle_stop_signal(int sig, struct task_struct *p)
{
struct task_struct *t;
+ if (p->flags & SIGNAL_GROUP_EXIT)
+ /*
+ * The process is in the middle of dying already.
+ */
+ return;
+
if (sig_kernel_stop(sig)) {
/*
* This is a stop signal. Remove SIGCONT from all queues.
@@ -680,7 +696,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
* the SIGCHLD was pending on entry to this kill.
*/
p->signal->group_stop_count = 0;
- p->signal->stop_state = 1;
+ p->signal->flags = SIGNAL_STOP_CONTINUED;
spin_unlock(&p->sighand->siglock);
if (p->ptrace & PT_PTRACED)
do_notify_parent_cldstop(p, p->parent,
@@ -722,12 +738,12 @@ static void handle_stop_signal(int sig, struct task_struct *p)
t = next_thread(t);
} while (t != p);
- if (p->signal->stop_state > 0) {
+ if (p->signal->flags & SIGNAL_STOP_STOPPED) {
/*
* We were in fact stopped, and are now continued.
* Notify the parent with CLD_CONTINUED.
*/
- p->signal->stop_state = -1;
+ p->signal->flags = SIGNAL_STOP_CONTINUED;
p->signal->group_exit_code = 0;
spin_unlock(&p->sighand->siglock);
if (p->ptrace & PT_PTRACED)
@@ -739,7 +755,20 @@ static void handle_stop_signal(int sig, struct task_struct *p)
p->group_leader->real_parent,
CLD_CONTINUED);
spin_lock(&p->sighand->siglock);
+ } else {
+ /*
+ * We are not stopped, but there could be a stop
+ * signal in the middle of being processed after
+ * being removed from the queue. Clear that too.
+ */
+ p->signal->flags = 0;
}
+ } else if (sig == SIGKILL) {
+ /*
+ * Make sure that any pending stop signal already dequeued
+ * is undone by the wakeup for SIGKILL.
+ */
+ p->signal->flags = 0;
}
}
@@ -905,12 +934,12 @@ __group_complete_signal(int sig, struct task_struct *p)
struct task_struct *t;
/*
- * Don't bother zombies and stopped tasks (but
- * SIGKILL will punch through stopped state)
+ * Don't bother traced and stopped tasks (but
+ * SIGKILL will punch through that).
*/
- mask = EXIT_DEAD | EXIT_ZOMBIE | TASK_TRACED;
- if (sig != SIGKILL)
- mask |= TASK_STOPPED;
+ mask = TASK_STOPPED | TASK_TRACED;
+ if (sig == SIGKILL)
+ mask = 0;
/*
* Now find a thread we can wake up to take the signal off the queue.
@@ -953,7 +982,7 @@ __group_complete_signal(int sig, struct task_struct *p)
* Found a killable thread. If the signal will be fatal,
* then start taking the whole group down immediately.
*/
- if (sig_fatal(p, sig) && !p->signal->group_exit &&
+ if (sig_fatal(p, sig) && !(p->signal->flags & SIGNAL_GROUP_EXIT) &&
!sigismember(&t->real_blocked, sig) &&
(sig == SIGKILL || !(t->ptrace & PT_PTRACED))) {
/*
@@ -966,7 +995,7 @@ __group_complete_signal(int sig, struct task_struct *p)
* running and doing things after a slower
* thread has the fatal signal pending.
*/
- p->signal->group_exit = 1;
+ p->signal->flags = SIGNAL_GROUP_EXIT;
p->signal->group_exit_code = sig;
p->signal->group_stop_count = 0;
t = p;
@@ -1055,6 +1084,7 @@ void zap_other_threads(struct task_struct *p)
{
struct task_struct *t;
+ p->signal->flags = SIGNAL_GROUP_EXIT;
p->signal->group_stop_count = 0;
if (thread_group_empty(p))
@@ -1064,7 +1094,7 @@ void zap_other_threads(struct task_struct *p)
/*
* Don't bother with already dead threads
*/
- if (t->exit_state & (EXIT_ZOMBIE|EXIT_DEAD))
+ if (t->exit_state)
continue;
/*
@@ -1407,28 +1437,12 @@ out:
}
/*
- * Joy. Or not. Pthread wants us to wake up every thread
- * in our parent group.
+ * Wake up any threads in the parent blocked in wait* syscalls.
*/
-static void __wake_up_parent(struct task_struct *p,
+static inline void __wake_up_parent(struct task_struct *p,
struct task_struct *parent)
{
- struct task_struct *tsk = parent;
-
- /*
- * Fortunately this is not necessary for thread groups:
- */
- if (p->tgid == tsk->tgid) {
- wake_up_interruptible_sync(&tsk->wait_chldexit);
- return;
- }
-
- do {
- wake_up_interruptible_sync(&tsk->wait_chldexit);
- tsk = next_thread(tsk);
- if (tsk->signal != parent->signal)
- BUG();
- } while (tsk != parent);
+ wake_up_interruptible_sync(&parent->signal->wait_chldexit);
}
/*
@@ -1442,8 +1456,7 @@ void do_notify_parent(struct task_struct *tsk, int sig)
unsigned long flags;
struct sighand_struct *psig;
- if (sig == -1)
- BUG();
+ BUG_ON(sig == -1);
/* do_notify_parent_cldstop should have been called instead. */
BUG_ON(tsk->state & (TASK_STOPPED|TASK_TRACED));
@@ -1457,8 +1470,10 @@ void do_notify_parent(struct task_struct *tsk, int sig)
info.si_uid = tsk->uid;
/* FIXME: find out whether or not this is supposed to be c*time. */
- info.si_utime = tsk->utime + tsk->signal->utime;
- info.si_stime = tsk->stime + tsk->signal->stime;
+ info.si_utime = cputime_to_jiffies(cputime_add(tsk->utime,
+ tsk->signal->utime));
+ info.si_stime = cputime_to_jiffies(cputime_add(tsk->stime,
+ tsk->signal->stime));
info.si_status = tsk->exit_code & 0x7f;
if (tsk->exit_code & 0x80)
@@ -1514,8 +1529,8 @@ do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent,
info.si_uid = tsk->uid;
/* FIXME: find out whether or not this is supposed to be c*time. */
- info.si_utime = tsk->utime;
- info.si_stime = tsk->stime;
+ info.si_utime = cputime_to_jiffies(tsk->utime);
+ info.si_stime = cputime_to_jiffies(tsk->stime);
info.si_code = why;
switch (why) {
@@ -1551,11 +1566,12 @@ do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent,
* We always set current->last_siginfo while stopped here.
* That makes it a way to test a stopped process for
* being ptrace-stopped vs being job-control-stopped.
+ *
+ * If we actually decide not to stop at all because the tracer is gone,
+ * we leave nostop_code in current->exit_code.
*/
-static void ptrace_stop(int exit_code, siginfo_t *info)
+static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
{
- BUG_ON(!(current->ptrace & PT_PTRACED));
-
/*
* If there is a group stop in progress,
* we must participate in the bookkeeping.
@@ -1570,9 +1586,24 @@ static void ptrace_stop(int exit_code, siginfo_t *info)
set_current_state(TASK_TRACED);
spin_unlock_irq(&current->sighand->siglock);
read_lock(&tasklist_lock);
- do_notify_parent_cldstop(current, current->parent, CLD_TRAPPED);
- read_unlock(&tasklist_lock);
- schedule();
+ if (likely(current->ptrace & PT_PTRACED) &&
+ likely(current->parent != current->real_parent ||
+ !(current->ptrace & PT_ATTACHED)) &&
+ (likely(current->parent->signal != current->signal) ||
+ !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) {
+ do_notify_parent_cldstop(current, current->parent,
+ CLD_TRAPPED);
+ read_unlock(&tasklist_lock);
+ schedule();
+ } else {
+ /*
+ * By the time we got the lock, our tracer went away.
+ * Don't stop here.
+ */
+ read_unlock(&tasklist_lock);
+ set_current_state(TASK_RUNNING);
+ current->exit_code = nostop_code;
+ }
/*
* We are back. Now reacquire the siglock before touching
@@ -1603,7 +1634,7 @@ void ptrace_notify(int exit_code)
/* Let the debugger run. */
spin_lock_irq(&current->sighand->siglock);
- ptrace_stop(exit_code, &info);
+ ptrace_stop(exit_code, 0, &info);
spin_unlock_irq(&current->sighand->siglock);
}
@@ -1641,15 +1672,18 @@ finish_stop(int stop_count)
/*
* This performs the stopping for SIGSTOP and other stop signals.
* We have to stop all threads in the thread group.
+ * Returns nonzero if we've actually stopped and released the siglock.
+ * Returns zero if we didn't stop and still hold the siglock.
*/
-static void
+static int
do_signal_stop(int signr)
{
struct signal_struct *sig = current->signal;
struct sighand_struct *sighand = current->sighand;
int stop_count = -1;
- /* spin_lock_irq(&sighand->siglock) is now done in caller */
+ if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED))
+ return 0;
if (sig->group_stop_count > 0) {
/*
@@ -1661,7 +1695,7 @@ do_signal_stop(int signr)
current->exit_code = signr;
set_current_state(TASK_STOPPED);
if (stop_count == 0)
- sig->stop_state = 1;
+ sig->flags = SIGNAL_STOP_STOPPED;
spin_unlock_irq(&sighand->siglock);
}
else if (thread_group_empty(current)) {
@@ -1670,7 +1704,7 @@ do_signal_stop(int signr)
*/
current->exit_code = current->signal->group_exit_code = signr;
set_current_state(TASK_STOPPED);
- sig->stop_state = 1;
+ sig->flags = SIGNAL_STOP_STOPPED;
spin_unlock_irq(&sighand->siglock);
}
else {
@@ -1691,25 +1725,16 @@ do_signal_stop(int signr)
read_lock(&tasklist_lock);
spin_lock_irq(&sighand->siglock);
- if (unlikely(sig->group_exit)) {
+ if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED)) {
/*
- * There is a group exit in progress now.
- * We'll just ignore the stop and process the
- * associated fatal signal.
+ * Another stop or continue happened while we
+ * didn't have the lock. We can just swallow this
+ * signal now. If we raced with a SIGCONT, that
+ * should have just cleared it now. If we raced
+ * with another processor delivering a stop signal,
+ * then the SIGCONT that wakes us up should clear it.
*/
- spin_unlock_irq(&sighand->siglock);
- read_unlock(&tasklist_lock);
- return;
- }
-
- if (unlikely(sig_avoid_stop_race())) {
- /*
- * Either a SIGCONT or a SIGKILL signal was
- * posted in the siglock-not-held window.
- */
- spin_unlock_irq(&sighand->siglock);
- read_unlock(&tasklist_lock);
- return;
+ return 0;
}
if (sig->group_stop_count == 0) {
@@ -1737,13 +1762,14 @@ do_signal_stop(int signr)
current->exit_code = signr;
set_current_state(TASK_STOPPED);
if (stop_count == 0)
- sig->stop_state = 1;
+ sig->flags = SIGNAL_STOP_STOPPED;
spin_unlock_irq(&sighand->siglock);
read_unlock(&tasklist_lock);
}
finish_stop(stop_count);
+ return 1;
}
/*
@@ -1765,7 +1791,7 @@ static inline int handle_group_stop(void)
return 0;
}
- if (current->signal->group_exit)
+ if (current->signal->flags & SIGNAL_GROUP_EXIT)
/*
* Group stop is so another thread can do a core dump,
* or else we are racing against a death signal.
@@ -1779,7 +1805,7 @@ static inline int handle_group_stop(void)
*/
stop_count = --current->signal->group_stop_count;
if (stop_count == 0)
- current->signal->stop_state = 1;
+ current->signal->flags = SIGNAL_STOP_STOPPED;
current->exit_code = current->signal->group_exit_code;
set_current_state(TASK_STOPPED);
spin_unlock_irq(&current->sighand->siglock);
@@ -1811,7 +1837,7 @@ relock:
ptrace_signal_deliver(regs, cookie);
/* Let the debugger run. */
- ptrace_stop(signr, info);
+ ptrace_stop(signr, signr, info);
/* We're back. Did the debugger cancel the sig? */
signr = current->exit_code;
@@ -1873,28 +1899,27 @@ relock:
* This allows an intervening SIGCONT to be posted.
* We need to check for that and bail out if necessary.
*/
- if (signr == SIGSTOP) {
- do_signal_stop(signr); /* releases siglock */
- goto relock;
- }
- spin_unlock_irq(&current->sighand->siglock);
+ if (signr != SIGSTOP) {
+ spin_unlock_irq(&current->sighand->siglock);
- /* signals can be posted during this window */
+ /* signals can be posted during this window */
- if (is_orphaned_pgrp(process_group(current)))
- goto relock;
+ if (is_orphaned_pgrp(process_group(current)))
+ goto relock;
- spin_lock_irq(&current->sighand->siglock);
- if (unlikely(sig_avoid_stop_race())) {
- /*
- * Either a SIGCONT or a SIGKILL signal was
- * posted in the siglock-not-held window.
- */
- continue;
+ spin_lock_irq(&current->sighand->siglock);
}
- do_signal_stop(signr); /* releases siglock */
- goto relock;
+ if (likely(do_signal_stop(signr))) {
+ /* It released the siglock. */
+ goto relock;
+ }
+
+ /*
+ * We didn't actually stop, due to a race
+ * with SIGCONT or something like that.
+ */
+ continue;
}
spin_unlock_irq(&current->sighand->siglock);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7572ca9ece74..582a1e8091bc 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -142,7 +142,7 @@ void local_bh_enable(void)
* Keep preemption disabled until we are done with
* softirq processing:
*/
- preempt_count() -= SOFTIRQ_OFFSET - 1;
+ sub_preempt_count(SOFTIRQ_OFFSET - 1);
if (unlikely(!in_interrupt() && local_softirq_pending()))
do_softirq();
@@ -152,6 +152,24 @@ void local_bh_enable(void)
}
EXPORT_SYMBOL(local_bh_enable);
+#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
+# define invoke_softirq() __do_softirq()
+#else
+# define invoke_softirq() do_softirq()
+#endif
+
+/*
+ * Exit an interrupt context. Process softirqs if needed and possible:
+ */
+void irq_exit(void)
+{
+ account_system_vtime(current);
+ sub_preempt_count(IRQ_EXIT_OFFSET);
+ if (!in_interrupt() && local_softirq_pending())
+ invoke_softirq();
+ preempt_enable_no_resched();
+}
+
/*
* This function must run with irqs disabled!
*/
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 476da1fd86f4..beacf8b7cee7 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -2,6 +2,8 @@
* Copyright (2004) Linus Torvalds
*
* Author: Zwane Mwaikambo <zwane@fsmlabs.com>
+ *
+ * Copyright (2004) Ingo Molnar
*/
#include <linux/config.h>
@@ -11,6 +13,17 @@
#include <linux/interrupt.h>
#include <linux/module.h>
+/*
+ * Generic declaration of the raw read_trylock() function,
+ * architectures are supposed to optimize this:
+ */
+int __lockfunc generic_raw_read_trylock(rwlock_t *lock)
+{
+ _raw_read_lock(lock);
+ return 1;
+}
+EXPORT_SYMBOL(generic_raw_read_trylock);
+
int __lockfunc _spin_trylock(spinlock_t *lock)
{
preempt_disable();
@@ -22,86 +35,29 @@ int __lockfunc _spin_trylock(spinlock_t *lock)
}
EXPORT_SYMBOL(_spin_trylock);
-int __lockfunc _write_trylock(rwlock_t *lock)
+int __lockfunc _read_trylock(rwlock_t *lock)
{
preempt_disable();
- if (_raw_write_trylock(lock))
+ if (_raw_read_trylock(lock))
return 1;
preempt_enable();
return 0;
}
-EXPORT_SYMBOL(_write_trylock);
-
-#ifdef CONFIG_PREEMPT
-/*
- * This could be a long-held lock. If another CPU holds it for a long time,
- * and that CPU is not asked to reschedule then *this* CPU will spin on the
- * lock for a long time, even if *this* CPU is asked to reschedule.
- *
- * So what we do here, in the slow (contended) path is to spin on the lock by
- * hand while permitting preemption.
- *
- * Called inside preempt_disable().
- */
-static inline void __preempt_spin_lock(spinlock_t *lock)
-{
- if (preempt_count() > 1) {
- _raw_spin_lock(lock);
- return;
- }
-
- do {
- preempt_enable();
- while (spin_is_locked(lock))
- cpu_relax();
- preempt_disable();
- } while (!_raw_spin_trylock(lock));
-}
+EXPORT_SYMBOL(_read_trylock);
-void __lockfunc _spin_lock(spinlock_t *lock)
+int __lockfunc _write_trylock(rwlock_t *lock)
{
preempt_disable();
- if (unlikely(!_raw_spin_trylock(lock)))
- __preempt_spin_lock(lock);
-}
-
-static inline void __preempt_write_lock(rwlock_t *lock)
-{
- if (preempt_count() > 1) {
- _raw_write_lock(lock);
- return;
- }
-
- do {
- preempt_enable();
- while (rwlock_is_locked(lock))
- cpu_relax();
- preempt_disable();
- } while (!_raw_write_trylock(lock));
-}
+ if (_raw_write_trylock(lock))
+ return 1;
-void __lockfunc _write_lock(rwlock_t *lock)
-{
- preempt_disable();
- if (unlikely(!_raw_write_trylock(lock)))
- __preempt_write_lock(lock);
-}
-#else
-void __lockfunc _spin_lock(spinlock_t *lock)
-{
- preempt_disable();
- _raw_spin_lock(lock);
+ preempt_enable();
+ return 0;
}
+EXPORT_SYMBOL(_write_trylock);
-void __lockfunc _write_lock(rwlock_t *lock)
-{
- preempt_disable();
- _raw_write_lock(lock);
-}
-#endif
-EXPORT_SYMBOL(_spin_lock);
-EXPORT_SYMBOL(_write_lock);
+#ifndef CONFIG_PREEMPT
void __lockfunc _read_lock(rwlock_t *lock)
{
@@ -110,27 +66,6 @@ void __lockfunc _read_lock(rwlock_t *lock)
}
EXPORT_SYMBOL(_read_lock);
-void __lockfunc _spin_unlock(spinlock_t *lock)
-{
- _raw_spin_unlock(lock);
- preempt_enable();
-}
-EXPORT_SYMBOL(_spin_unlock);
-
-void __lockfunc _write_unlock(rwlock_t *lock)
-{
- _raw_write_unlock(lock);
- preempt_enable();
-}
-EXPORT_SYMBOL(_write_unlock);
-
-void __lockfunc _read_unlock(rwlock_t *lock)
-{
- _raw_read_unlock(lock);
- preempt_enable();
-}
-EXPORT_SYMBOL(_read_unlock);
-
unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
{
unsigned long flags;
@@ -212,6 +147,132 @@ void __lockfunc _write_lock_bh(rwlock_t *lock)
}
EXPORT_SYMBOL(_write_lock_bh);
+void __lockfunc _spin_lock(spinlock_t *lock)
+{
+ preempt_disable();
+ _raw_spin_lock(lock);
+}
+
+EXPORT_SYMBOL(_spin_lock);
+
+void __lockfunc _write_lock(rwlock_t *lock)
+{
+ preempt_disable();
+ _raw_write_lock(lock);
+}
+
+EXPORT_SYMBOL(_write_lock);
+
+#else /* CONFIG_PREEMPT: */
+
+/*
+ * This could be a long-held lock. We both prepare to spin for a long
+ * time (making _this_ CPU preemptable if possible), and we also signal
+ * towards that other CPU that it should break the lock ASAP.
+ *
+ * (We do this in a function because inlining it would be excessive.)
+ */
+
+#define BUILD_LOCK_OPS(op, locktype, is_locked_fn) \
+void __lockfunc _##op##_lock(locktype *lock) \
+{ \
+ preempt_disable(); \
+ for (;;) { \
+ if (likely(_raw_##op##_trylock(lock))) \
+ break; \
+ preempt_enable(); \
+ if (!(lock)->break_lock) \
+ (lock)->break_lock = 1; \
+ while (is_locked_fn(lock) && (lock)->break_lock) \
+ cpu_relax(); \
+ preempt_disable(); \
+ } \
+} \
+ \
+EXPORT_SYMBOL(_##op##_lock); \
+ \
+unsigned long __lockfunc _##op##_lock_irqsave(locktype *lock) \
+{ \
+ unsigned long flags; \
+ \
+ preempt_disable(); \
+ for (;;) { \
+ local_irq_save(flags); \
+ if (likely(_raw_##op##_trylock(lock))) \
+ break; \
+ local_irq_restore(flags); \
+ \
+ preempt_enable(); \
+ if (!(lock)->break_lock) \
+ (lock)->break_lock = 1; \
+ while (is_locked_fn(lock) && (lock)->break_lock) \
+ cpu_relax(); \
+ preempt_disable(); \
+ } \
+ return flags; \
+} \
+ \
+EXPORT_SYMBOL(_##op##_lock_irqsave); \
+ \
+void __lockfunc _##op##_lock_irq(locktype *lock) \
+{ \
+ _##op##_lock_irqsave(lock); \
+} \
+ \
+EXPORT_SYMBOL(_##op##_lock_irq); \
+ \
+void __lockfunc _##op##_lock_bh(locktype *lock) \
+{ \
+ unsigned long flags; \
+ \
+ /* */ \
+ /* Careful: we must exclude softirqs too, hence the */ \
+ /* irq-disabling. We use the generic preemption-aware */ \
+ /* function: */ \
+ /**/ \
+ flags = _##op##_lock_irqsave(lock); \
+ local_bh_disable(); \
+ local_irq_restore(flags); \
+} \
+ \
+EXPORT_SYMBOL(_##op##_lock_bh)
+
+/*
+ * Build preemption-friendly versions of the following
+ * lock-spinning functions:
+ *
+ * _[spin|read|write]_lock()
+ * _[spin|read|write]_lock_irq()
+ * _[spin|read|write]_lock_irqsave()
+ * _[spin|read|write]_lock_bh()
+ */
+BUILD_LOCK_OPS(spin, spinlock_t, spin_is_locked);
+BUILD_LOCK_OPS(read, rwlock_t, rwlock_is_locked);
+BUILD_LOCK_OPS(write, rwlock_t, spin_is_locked);
+
+#endif /* CONFIG_PREEMPT */
+
+void __lockfunc _spin_unlock(spinlock_t *lock)
+{
+ _raw_spin_unlock(lock);
+ preempt_enable();
+}
+EXPORT_SYMBOL(_spin_unlock);
+
+void __lockfunc _write_unlock(rwlock_t *lock)
+{
+ _raw_write_unlock(lock);
+ preempt_enable();
+}
+EXPORT_SYMBOL(_write_unlock);
+
+void __lockfunc _read_unlock(rwlock_t *lock)
+{
+ _raw_read_unlock(lock);
+ preempt_enable();
+}
+EXPORT_SYMBOL(_read_unlock);
+
void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
{
_raw_spin_unlock(lock);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2ceea25f67f6..e31b1cb8e503 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -95,7 +95,7 @@ static int stop_machine(void)
stopmachine_state = STOPMACHINE_WAIT;
for_each_online_cpu(i) {
- if (i == smp_processor_id())
+ if (i == _smp_processor_id())
continue;
ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
if (ret < 0)
@@ -177,7 +177,7 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
/* If they don't care which CPU fn runs on, bind to any online one. */
if (cpu == NR_CPUS)
- cpu = smp_processor_id();
+ cpu = _smp_processor_id();
p = kthread_create(do_stop, &smdata, "kstopmachine");
if (!IS_ERR(p)) {
diff --git a/kernel/sys.c b/kernel/sys.c
index fdc29f17ac93..6e354fd380e7 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -23,6 +23,7 @@
#include <linux/security.h>
#include <linux/dcookies.h>
#include <linux/suspend.h>
+#include <linux/tty.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -88,7 +89,7 @@ int cad_pid = 1;
*/
static struct notifier_block *reboot_notifier_list;
-rwlock_t notifier_lock = RW_LOCK_UNLOCKED;
+DEFINE_RWLOCK(notifier_lock);
/**
* notifier_chain_register - Add notifier to a notifier chain
@@ -892,15 +893,15 @@ asmlinkage long sys_times(struct tms __user * tbuf)
struct tms tmp;
struct task_struct *tsk = current;
struct task_struct *t;
- unsigned long utime, stime, cutime, cstime;
+ cputime_t utime, stime, cutime, cstime;
read_lock(&tasklist_lock);
utime = tsk->signal->utime;
stime = tsk->signal->stime;
t = tsk;
do {
- utime += t->utime;
- stime += t->stime;
+ utime = cputime_add(utime, t->utime);
+ stime = cputime_add(stime, t->stime);
t = next_thread(t);
} while (t != tsk);
@@ -919,10 +920,10 @@ asmlinkage long sys_times(struct tms __user * tbuf)
spin_unlock_irq(&tsk->sighand->siglock);
read_unlock(&tasklist_lock);
- tmp.tms_utime = jiffies_to_clock_t(utime);
- tmp.tms_stime = jiffies_to_clock_t(stime);
- tmp.tms_cutime = jiffies_to_clock_t(cutime);
- tmp.tms_cstime = jiffies_to_clock_t(cstime);
+ tmp.tms_utime = cputime_to_clock_t(utime);
+ tmp.tms_stime = cputime_to_clock_t(stime);
+ tmp.tms_cutime = cputime_to_clock_t(cutime);
+ tmp.tms_cstime = cputime_to_clock_t(cstime);
if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
return -EFAULT;
}
@@ -1075,6 +1076,7 @@ asmlinkage long sys_setsid(void)
if (!thread_group_leader(current))
return -EINVAL;
+ down(&tty_sem);
write_lock_irq(&tasklist_lock);
pid = find_pid(PIDTYPE_PGID, current->pid);
@@ -1088,6 +1090,7 @@ asmlinkage long sys_setsid(void)
err = process_group(current);
out:
write_unlock_irq(&tasklist_lock);
+ up(&tty_sem);
return err;
}
@@ -1525,7 +1528,7 @@ void k_getrusage(struct task_struct *p, int who, struct rusage *r)
{
struct task_struct *t;
unsigned long flags;
- unsigned long utime, stime;
+ cputime_t utime, stime;
memset((char *) r, 0, sizeof *r);
@@ -1542,12 +1545,12 @@ void k_getrusage(struct task_struct *p, int who, struct rusage *r)
r->ru_minflt = p->signal->cmin_flt;
r->ru_majflt = p->signal->cmaj_flt;
spin_unlock_irqrestore(&p->sighand->siglock, flags);
- jiffies_to_timeval(utime, &r->ru_utime);
- jiffies_to_timeval(stime, &r->ru_stime);
+ cputime_to_timeval(utime, &r->ru_utime);
+ cputime_to_timeval(stime, &r->ru_stime);
break;
case RUSAGE_SELF:
spin_lock_irqsave(&p->sighand->siglock, flags);
- utime = stime = 0;
+ utime = stime = cputime_zero;
goto sum_group;
case RUSAGE_BOTH:
spin_lock_irqsave(&p->sighand->siglock, flags);
@@ -1558,16 +1561,16 @@ void k_getrusage(struct task_struct *p, int who, struct rusage *r)
r->ru_minflt = p->signal->cmin_flt;
r->ru_majflt = p->signal->cmaj_flt;
sum_group:
- utime += p->signal->utime;
- stime += p->signal->stime;
+ utime = cputime_add(utime, p->signal->utime);
+ stime = cputime_add(stime, p->signal->stime);
r->ru_nvcsw += p->signal->nvcsw;
r->ru_nivcsw += p->signal->nivcsw;
r->ru_minflt += p->signal->min_flt;
r->ru_majflt += p->signal->maj_flt;
t = p;
do {
- utime += t->utime;
- stime += t->stime;
+ utime = cputime_add(utime, t->utime);
+ stime = cputime_add(stime, t->stime);
r->ru_nvcsw += t->nvcsw;
r->ru_nivcsw += t->nivcsw;
r->ru_minflt += t->min_flt;
@@ -1575,8 +1578,8 @@ void k_getrusage(struct task_struct *p, int who, struct rusage *r)
t = next_thread(t);
} while (t != p);
spin_unlock_irqrestore(&p->sighand->siglock, flags);
- jiffies_to_timeval(utime, &r->ru_utime);
- jiffies_to_timeval(stime, &r->ru_stime);
+ cputime_to_timeval(utime, &r->ru_utime);
+ cputime_to_timeval(stime, &r->ru_stime);
break;
default:
BUG();
@@ -1689,6 +1692,15 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
set_task_comm(me, ncomm);
return 0;
}
+ case PR_GET_NAME: {
+ struct task_struct *me = current;
+ unsigned char tcomm[sizeof(me->comm)];
+
+ get_task_comm(tcomm, me);
+ if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm)))
+ return -EFAULT;
+ return 0;
+ }
default:
error = -EINVAL;
break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 568b4579ef24..85503726d60b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -52,7 +52,6 @@
#if defined(CONFIG_SYSCTL)
/* External variables not in a header file. */
-extern int panic_timeout;
extern int C_A_D;
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
@@ -765,6 +764,7 @@ static ctl_table vm_table[] = {
.strategy = &sysctl_intvec,
.extra1 = &zero,
},
+#ifdef CONFIG_MMU
{
.ctl_name = VM_MAX_MAP_COUNT,
.procname = "max_map_count",
@@ -773,6 +773,7 @@ static ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
+#endif
{
.ctl_name = VM_LAPTOP_MODE,
.procname = "laptop_mode",
@@ -914,6 +915,7 @@ static ctl_table fs_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
+#ifdef CONFIG_MMU
{
.ctl_name = FS_LEASE_TIME,
.procname = "lease-break-time",
@@ -938,6 +940,7 @@ static ctl_table fs_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+#endif
{ .ctl_name = 0 }
};
diff --git a/kernel/time.c b/kernel/time.c
index b6d01cf709c4..d5400f6af052 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -33,6 +33,7 @@
#include <linux/smp_lock.h>
#include <linux/syscalls.h>
#include <linux/security.h>
+#include <linux/fs.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -52,12 +53,10 @@ EXPORT_SYMBOL(sys_tz);
* sys_gettimeofday(). Is this for backwards compatibility? If so,
* why not move it into the appropriate arch directory (for those
* architectures that need it).
- *
- * XXX This function is NOT 64-bit clean!
*/
-asmlinkage long sys_time(int __user * tloc)
+asmlinkage long sys_time(time_t __user * tloc)
{
- int i;
+ time_t i;
struct timeval tv;
do_gettimeofday(&tv);
@@ -417,7 +416,7 @@ asmlinkage long sys_adjtimex(struct timex __user *txc_p)
return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
}
-struct timespec current_kernel_time(void)
+inline struct timespec current_kernel_time(void)
{
struct timespec now;
unsigned long seq;
@@ -433,6 +432,50 @@ struct timespec current_kernel_time(void)
EXPORT_SYMBOL(current_kernel_time);
+/**
+ * current_fs_time - Return FS time
+ * @sb: Superblock.
+ *
+ * Return the current time truncated to the time granuality supported by
+ * the fs.
+ */
+struct timespec current_fs_time(struct super_block *sb)
+{
+ struct timespec now = current_kernel_time();
+ return timespec_trunc(now, sb->s_time_gran);
+}
+EXPORT_SYMBOL(current_fs_time);
+
+/**
+ * timespec_trunc - Truncate timespec to a granuality
+ * @t: Timespec
+ * @gran: Granuality in ns.
+ *
+ * Truncate a timespec to a granuality. gran must be smaller than a second.
+ * Always rounds down.
+ *
+ * This function should be only used for timestamps returned by
+ * current_kernel_time() or CURRENT_TIME, not with do_gettimeofday() because
+ * it doesn't handle the better resolution of the later.
+ */
+struct timespec timespec_trunc(struct timespec t, unsigned gran)
+{
+ /*
+ * Division is pretty slow so avoid it for common cases.
+ * Currently current_kernel_time() never returns better than
+ * jiffies resolution. Exploit that.
+ */
+ if (gran <= jiffies_to_usecs(1) * 1000) {
+ /* nothing */
+ } else if (gran == 1000000000) {
+ t.tv_nsec = 0;
+ } else {
+ t.tv_nsec -= t.tv_nsec % gran;
+ }
+ return t;
+}
+EXPORT_SYMBOL(timespec_trunc);
+
#ifdef CONFIG_TIME_INTERPOLATION
void getnstimeofday (struct timespec *tv)
{
diff --git a/kernel/timer.c b/kernel/timer.c
index d0eed9b563c4..6bb47b0e4983 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -465,7 +465,14 @@ repeat:
smp_wmb();
timer->base = NULL;
spin_unlock_irq(&base->lock);
- fn(data);
+ {
+ u32 preempt_count = preempt_count();
+ fn(data);
+ if (preempt_count != preempt_count()) {
+ printk("huh, entered %p with %08x, exited with %08x?\n", fn, preempt_count, preempt_count());
+ BUG();
+ }
+ }
spin_lock_irq(&base->lock);
goto repeat;
}
@@ -554,7 +561,7 @@ unsigned long tick_nsec = TICK_NSEC; /* ACTHZ period (nsec) */
/*
* The current time
* wall_to_monotonic is what we need to add to xtime (or xtime corrected
- * for sub jiffie times) to get to monotonic time. Monotonic is pegged at zero
+ * for sub jiffie times) to get to monotonic time. Monotonic is pegged
* at zero at system boot time, so wall_to_monotonic will be negative,
* however, we will ALWAYS keep the tv_nsec part positive so we can use
* the usual normalization.
@@ -799,59 +806,6 @@ static void update_wall_time(unsigned long ticks)
} while (ticks);
}
-static inline void do_process_times(struct task_struct *p,
- unsigned long user, unsigned long system)
-{
- unsigned long psecs;
-
- psecs = (p->utime += user);
- psecs += (p->stime += system);
- if (p->signal && !unlikely(p->state & (EXIT_DEAD|EXIT_ZOMBIE)) &&
- psecs / HZ >= p->signal->rlim[RLIMIT_CPU].rlim_cur) {
- /* Send SIGXCPU every second.. */
- if (!(psecs % HZ))
- send_sig(SIGXCPU, p, 1);
- /* and SIGKILL when we go over max.. */
- if (psecs / HZ >= p->signal->rlim[RLIMIT_CPU].rlim_max)
- send_sig(SIGKILL, p, 1);
- }
-}
-
-static inline void do_it_virt(struct task_struct * p, unsigned long ticks)
-{
- unsigned long it_virt = p->it_virt_value;
-
- if (it_virt) {
- it_virt -= ticks;
- if (!it_virt) {
- it_virt = p->it_virt_incr;
- send_sig(SIGVTALRM, p, 1);
- }
- p->it_virt_value = it_virt;
- }
-}
-
-static inline void do_it_prof(struct task_struct *p)
-{
- unsigned long it_prof = p->it_prof_value;
-
- if (it_prof) {
- if (--it_prof == 0) {
- it_prof = p->it_prof_incr;
- send_sig(SIGPROF, p, 1);
- }
- p->it_prof_value = it_prof;
- }
-}
-
-static void update_one_process(struct task_struct *p, unsigned long user,
- unsigned long system, int cpu)
-{
- do_process_times(p, user, system);
- do_it_virt(p, user);
- do_it_prof(p);
-}
-
/*
* Called from the timer interrupt handler to charge one tick to the current
* process. user_tick is 1 if the tick is user time, 0 for system.
@@ -859,11 +813,17 @@ static void update_one_process(struct task_struct *p, unsigned long user,
void update_process_times(int user_tick)
{
struct task_struct *p = current;
- int cpu = smp_processor_id(), system = user_tick ^ 1;
+ int cpu = smp_processor_id();
- update_one_process(p, user_tick, system, cpu);
+ /* Note: this timer irq context must be accounted for as well. */
+ if (user_tick)
+ account_user_time(p, jiffies_to_cputime(1));
+ else
+ account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1));
run_local_timers();
- scheduler_tick(user_tick, system);
+ if (rcu_pending(cpu))
+ rcu_check_callbacks(cpu, user_tick);
+ scheduler_tick();
}
/*
@@ -1438,7 +1398,7 @@ void __init init_timers(void)
struct time_interpolator *time_interpolator;
static struct time_interpolator *time_interpolator_list;
-static spinlock_t time_interpolator_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(time_interpolator_lock);
static inline u64 time_interpolator_get_cycles(unsigned int src)
{
diff --git a/kernel/user.c b/kernel/user.c
index 693487dc940e..18f63146602a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -26,7 +26,7 @@
static kmem_cache_t *uid_cachep;
static struct list_head uidhash_table[UIDHASH_SZ];
-static spinlock_t uidhash_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(uidhash_lock);
struct user_struct root_user = {
.__count = ATOMIC_INIT(1),
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ee77ccd01d04..3993f7bdf5c2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -8,7 +8,7 @@
*
* Derived from the taskqueue/keventd code by:
*
- * David Woodhouse <dwmw2@redhat.com>
+ * David Woodhouse <dwmw2@infradead.org>
* Andrew Morton <andrewm@uow.edu.au>
* Kai Petzke <wpp@marie.physik.tu-berlin.de>
* Theodore Ts'o <tytso@mit.edu>
@@ -64,7 +64,7 @@ struct workqueue_struct {
/* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
threads to each one as cpus come/go. */
-static spinlock_t workqueue_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(workqueue_lock);
static LIST_HEAD(workqueues);
/* If it's single threaded, it isn't in the list of workqueues. */
@@ -188,7 +188,7 @@ static int worker_thread(void *__cwq)
current->flags |= PF_NOFREEZE;
- set_user_nice(current, -10);
+ set_user_nice(current, -5);
/* Block and flush all signals */
sigfillset(&blocked);