diff options
| author | David S. Miller <davem@nuts.ninka.net> | 2002-10-15 07:41:35 -0700 |
|---|---|---|
| committer | David S. Miller <davem@nuts.ninka.net> | 2002-10-15 07:41:35 -0700 |
| commit | 8fbfe7cd5594010a23cb4e81786d1fb8015ffdee (patch) | |
| tree | b5be190f22984395209823ec3cac1c76fc93f67f /kernel | |
| parent | e22f7f5fd43205bfd20ea3a7bb4e689cb3f3d278 (diff) | |
| parent | 5a7728c6d3eb83df9d120944cca4cf476dd326a1 (diff) | |
Merge nuts.ninka.net:/home/davem/src/BK/network-2.5
into nuts.ninka.net:/home/davem/src/BK/net-2.5
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 8 | ||||
| -rw-r--r-- | kernel/exit.c | 10 | ||||
| -rw-r--r-- | kernel/futex.c | 29 | ||||
| -rw-r--r-- | kernel/profile.c | 121 | ||||
| -rw-r--r-- | kernel/rcupdate.c | 242 | ||||
| -rw-r--r-- | kernel/sched.c | 5 | ||||
| -rw-r--r-- | kernel/sys.c | 2 | ||||
| -rw-r--r-- | kernel/timer.c | 4 |
8 files changed, 395 insertions, 26 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index b3fce6d3ac9c..daf6cbd5d42a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -3,12 +3,14 @@ # export-objs = signal.o sys.o kmod.o workqueue.o ksyms.o pm.o exec_domain.o \ - printk.o platform.o suspend.o dma.o module.o cpufreq.o + printk.o platform.o suspend.o dma.o module.o cpufreq.o \ + profile.o rcupdate.o -obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ +obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ module.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o capability.o ptrace.o timer.o user.o \ - signal.o sys.o kmod.o workqueue.o futex.o platform.o pid.o + signal.o sys.o kmod.o workqueue.o futex.o platform.o pid.o \ + rcupdate.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += cpu.o diff --git a/kernel/exit.c b/kernel/exit.c index 6ed07def4c62..c2b0f6eeff0f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -19,6 +19,7 @@ #include <linux/file.h> #include <linux/binfmts.h> #include <linux/ptrace.h> +#include <linux/profile.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -59,11 +60,12 @@ void release_task(struct task_struct * p) { struct dentry *proc_dentry; task_t *leader; - - if (p->state < TASK_ZOMBIE) - BUG(); + + BUG_ON(p->state < TASK_ZOMBIE); + if (p != current) wait_task_inactive(p); + atomic_dec(&p->user->processes); security_ops->task_free_security(p); free_uid(p->user); @@ -635,6 +637,8 @@ NORET_TYPE void do_exit(long code) current->comm, current->pid, preempt_count()); + profile_exit_task(tsk); + fake_volatile: acct_process(code); __exit_mm(tsk); diff --git a/kernel/futex.c b/kernel/futex.c index d268c3c1b758..4aa2115c4d66 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -115,8 +115,9 @@ static struct page *__pin_page(unsigned long addr) * Do a quick atomic lookup first - this is the fastpath. */ page = follow_page(mm, addr, 0); - if (likely(page != NULL)) { - get_page(page); + if (likely(page != NULL)) { + if (!PageReserved(page)) + get_page(page); return page; } @@ -140,8 +141,10 @@ repeat_lookup: * check for races: */ tmp = follow_page(mm, addr, 0); - if (tmp != page) + if (tmp != page) { + put_page(page); goto repeat_lookup; + } return page; } @@ -176,6 +179,7 @@ static int futex_wake(unsigned long uaddr, int offset, int num) if (this->page == page && this->offset == offset) { list_del_init(i); + __detach_vcache(&this->vcache); tell_waiter(this); ret++; if (ret >= num) @@ -235,15 +239,15 @@ static inline int unqueue_me(struct futex_q *q) { int ret = 0; - detach_vcache(&q->vcache); - + spin_lock(&vcache_lock); spin_lock(&futex_lock); if (!list_empty(&q->list)) { list_del(&q->list); + __detach_vcache(&q->vcache); ret = 1; } spin_unlock(&futex_lock); - + spin_unlock(&vcache_lock); return ret; } @@ -314,13 +318,7 @@ static int futex_close(struct inode *inode, struct file *filp) { struct futex_q *q = filp->private_data; - spin_lock(&futex_lock); - if (!list_empty(&q->list)) { - list_del(&q->list); - /* Noone can be polling on us now. */ - BUG_ON(waitqueue_active(&q->waiters)); - } - spin_unlock(&futex_lock); + unqueue_me(q); unpin_page(q->page); kfree(filp->private_data); return 0; @@ -436,9 +434,8 @@ asmlinkage int sys_futex(unsigned long uaddr, int op, int val, struct timespec * pos_in_page = uaddr % PAGE_SIZE; - /* Must be "naturally" aligned, and not on page boundary. */ - if ((pos_in_page % __alignof__(int)) != 0 - || pos_in_page + sizeof(int) > PAGE_SIZE) + /* Must be "naturally" aligned */ + if (pos_in_page % sizeof(int)) return -EINVAL; switch (op) { diff --git a/kernel/profile.c b/kernel/profile.c new file mode 100644 index 000000000000..756f142b1f35 --- /dev/null +++ b/kernel/profile.c @@ -0,0 +1,121 @@ +/* + * linux/kernel/profile.c + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/profile.h> +#include <linux/bootmem.h> +#include <linux/notifier.h> +#include <linux/mm.h> + +extern char _stext, _etext; + +unsigned int * prof_buffer; +unsigned long prof_len; +unsigned long prof_shift; + +int __init profile_setup(char * str) +{ + int par; + if (get_option(&str,&par)) + prof_shift = par; + return 1; +} + + +void __init profile_init(void) +{ + unsigned int size; + + if (!prof_shift) + return; + + /* only text is profiled */ + prof_len = (unsigned long) &_etext - (unsigned long) &_stext; + prof_len >>= prof_shift; + + size = prof_len * sizeof(unsigned int) + PAGE_SIZE - 1; + prof_buffer = (unsigned int *) alloc_bootmem(size); +} + +/* Profile event notifications */ + +#ifdef CONFIG_PROFILING + +static DECLARE_RWSEM(profile_rwsem); +static struct notifier_block * exit_task_notifier; +static struct notifier_block * exit_mmap_notifier; +static struct notifier_block * exec_unmap_notifier; + +void profile_exit_task(struct task_struct * task) +{ + down_read(&profile_rwsem); + notifier_call_chain(&exit_task_notifier, 0, task); + up_read(&profile_rwsem); +} + +void profile_exit_mmap(struct mm_struct * mm) +{ + down_read(&profile_rwsem); + notifier_call_chain(&exit_mmap_notifier, 0, mm); + up_read(&profile_rwsem); +} + +void profile_exec_unmap(struct mm_struct * mm) +{ + down_read(&profile_rwsem); + notifier_call_chain(&exec_unmap_notifier, 0, mm); + up_read(&profile_rwsem); +} + +int profile_event_register(enum profile_type type, struct notifier_block * n) +{ + int err = -EINVAL; + + down_write(&profile_rwsem); + + switch (type) { + case EXIT_TASK: + err = notifier_chain_register(&exit_task_notifier, n); + break; + case EXIT_MMAP: + err = notifier_chain_register(&exit_mmap_notifier, n); + break; + case EXEC_UNMAP: + err = notifier_chain_register(&exec_unmap_notifier, n); + break; + } + + up_write(&profile_rwsem); + + return err; +} + + +int profile_event_unregister(enum profile_type type, struct notifier_block * n) +{ + int err = -EINVAL; + + down_write(&profile_rwsem); + + switch (type) { + case EXIT_TASK: + err = notifier_chain_unregister(&exit_task_notifier, n); + break; + case EXIT_MMAP: + err = notifier_chain_unregister(&exit_mmap_notifier, n); + break; + case EXEC_UNMAP: + err = notifier_chain_unregister(&exec_unmap_notifier, n); + break; + } + + up_write(&profile_rwsem); + return err; +} + +#endif /* CONFIG_PROFILING */ + +EXPORT_SYMBOL_GPL(profile_event_register); +EXPORT_SYMBOL_GPL(profile_event_unregister); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c new file mode 100644 index 000000000000..dfdf1774489d --- /dev/null +++ b/kernel/rcupdate.c @@ -0,0 +1,242 @@ +/* + * Read-Copy Update mechanism for mutual exclusion + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (c) IBM Corporation, 2001 + * + * Author: Dipankar Sarma <dipankar@in.ibm.com> + * + * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com> + * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen. + * Papers: + * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf + * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) + * + * For detailed explanation of Read-Copy Update mechanism see - + * http://lse.sourceforge.net/locking/rcupdate.html + * + */ +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/sched.h> +#include <asm/atomic.h> +#include <asm/bitops.h> +#include <linux/module.h> +#include <linux/completion.h> +#include <linux/percpu.h> +#include <linux/rcupdate.h> + +/* Definition for rcupdate control block. */ +struct rcu_ctrlblk rcu_ctrlblk = + { .mutex = SPIN_LOCK_UNLOCKED, .curbatch = 1, + .maxbatch = 1, .rcu_cpu_mask = 0 }; +struct rcu_data rcu_data[NR_CPUS] __cacheline_aligned; + +/* Fake initialization required by compiler */ +static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; +#define RCU_tasklet(cpu) (per_cpu(rcu_tasklet, cpu)) + +/** + * call_rcu - Queue an RCU update request. + * @head: structure to be used for queueing the RCU updates. + * @func: actual update function to be invoked after the grace period + * @arg: argument to be passed to the update function + * + * The update function will be invoked as soon as all CPUs have performed + * a context switch or been seen in the idle loop or in a user process. + * The read-side of critical section that use call_rcu() for updation must + * be protected by rcu_read_lock()/rcu_read_unlock(). + */ +void call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg) +{ + int cpu; + unsigned long flags; + + head->func = func; + head->arg = arg; + local_irq_save(flags); + cpu = smp_processor_id(); + list_add_tail(&head->list, &RCU_nxtlist(cpu)); + local_irq_restore(flags); +} + +/* + * Invoke the completed RCU callbacks. They are expected to be in + * a per-cpu list. + */ +static void rcu_do_batch(struct list_head *list) +{ + struct list_head *entry; + struct rcu_head *head; + + while (!list_empty(list)) { + entry = list->next; + list_del(entry); + head = list_entry(entry, struct rcu_head, list); + head->func(head->arg); + } +} + +/* + * Register a new batch of callbacks, and start it up if there is currently no + * active batch and the batch to be registered has not already occurred. + * Caller must hold the rcu_ctrlblk lock. + */ +static void rcu_start_batch(long newbatch) +{ + if (rcu_batch_before(rcu_ctrlblk.maxbatch, newbatch)) { + rcu_ctrlblk.maxbatch = newbatch; + } + if (rcu_batch_before(rcu_ctrlblk.maxbatch, rcu_ctrlblk.curbatch) || + (rcu_ctrlblk.rcu_cpu_mask != 0)) { + return; + } + rcu_ctrlblk.rcu_cpu_mask = cpu_online_map; +} + +/* + * Check if the cpu has gone through a quiescent state (say context + * switch). If so and if it already hasn't done so in this RCU + * quiescent cycle, then indicate that it has done so. + */ +static void rcu_check_quiescent_state(void) +{ + int cpu = smp_processor_id(); + + if (!test_bit(cpu, &rcu_ctrlblk.rcu_cpu_mask)) { + return; + } + + /* + * Races with local timer interrupt - in the worst case + * we may miss one quiescent state of that CPU. That is + * tolerable. So no need to disable interrupts. + */ + if (RCU_last_qsctr(cpu) == RCU_QSCTR_INVALID) { + RCU_last_qsctr(cpu) = RCU_qsctr(cpu); + return; + } + if (RCU_qsctr(cpu) == RCU_last_qsctr(cpu)) { + return; + } + + spin_lock(&rcu_ctrlblk.mutex); + if (!test_bit(cpu, &rcu_ctrlblk.rcu_cpu_mask)) { + spin_unlock(&rcu_ctrlblk.mutex); + return; + } + clear_bit(cpu, &rcu_ctrlblk.rcu_cpu_mask); + RCU_last_qsctr(cpu) = RCU_QSCTR_INVALID; + if (rcu_ctrlblk.rcu_cpu_mask != 0) { + spin_unlock(&rcu_ctrlblk.mutex); + return; + } + rcu_ctrlblk.curbatch++; + rcu_start_batch(rcu_ctrlblk.maxbatch); + spin_unlock(&rcu_ctrlblk.mutex); +} + + +/* + * This does the RCU processing work from tasklet context. + */ +static void rcu_process_callbacks(unsigned long unused) +{ + int cpu = smp_processor_id(); + LIST_HEAD(list); + + if (!list_empty(&RCU_curlist(cpu)) && + rcu_batch_after(rcu_ctrlblk.curbatch, RCU_batch(cpu))) { + list_splice(&RCU_curlist(cpu), &list); + INIT_LIST_HEAD(&RCU_curlist(cpu)); + } + + local_irq_disable(); + if (!list_empty(&RCU_nxtlist(cpu)) && list_empty(&RCU_curlist(cpu))) { + list_splice(&RCU_nxtlist(cpu), &RCU_curlist(cpu)); + INIT_LIST_HEAD(&RCU_nxtlist(cpu)); + local_irq_enable(); + + /* + * start the next batch of callbacks + */ + spin_lock(&rcu_ctrlblk.mutex); + RCU_batch(cpu) = rcu_ctrlblk.curbatch + 1; + rcu_start_batch(RCU_batch(cpu)); + spin_unlock(&rcu_ctrlblk.mutex); + } else { + local_irq_enable(); + } + rcu_check_quiescent_state(); + if (!list_empty(&list)) + rcu_do_batch(&list); +} + +void rcu_check_callbacks(int cpu, int user) +{ + if (user || + (idle_cpu(cpu) && !in_softirq() && hardirq_count() <= 1)) + RCU_qsctr(cpu)++; + tasklet_schedule(&RCU_tasklet(cpu)); +} + +/* + * Initializes rcu mechanism. Assumed to be called early. + * That is before local timer(SMP) or jiffie timer (uniproc) is setup. + * Note that rcu_qsctr and friends are implicitly + * initialized due to the choice of ``0'' for RCU_CTR_INVALID. + */ +void __init rcu_init(void) +{ + int i; + + memset(&rcu_data[0], 0, sizeof(rcu_data)); + for (i = 0; i < NR_CPUS; i++) { + tasklet_init(&RCU_tasklet(i), rcu_process_callbacks, 0UL); + INIT_LIST_HEAD(&RCU_nxtlist(i)); + INIT_LIST_HEAD(&RCU_curlist(i)); + } +} + +/* Because of FASTCALL declaration of complete, we use this wrapper */ +static void wakeme_after_rcu(void *completion) +{ + complete(completion); +} + +/** + * synchronize-kernel - wait until all the CPUs have gone + * through a "quiescent" state. It may sleep. + */ +void synchronize_kernel(void) +{ + struct rcu_head rcu; + DECLARE_COMPLETION(completion); + + /* Will wake me after RCU finished */ + call_rcu(&rcu, wakeme_after_rcu, &completion); + + /* Wait for it */ + wait_for_completion(&completion); +} + + +EXPORT_SYMBOL(call_rcu); +EXPORT_SYMBOL(synchronize_kernel); diff --git a/kernel/sched.c b/kernel/sched.c index 0464ac0649b8..20d2854c0bc6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -31,6 +31,7 @@ #include <linux/blkdev.h> #include <linux/delay.h> #include <linux/timer.h> +#include <linux/rcupdate.h> /* * Convert user-nice values [ -20 ... 0 ... 19 ] @@ -865,6 +866,9 @@ void scheduler_tick(int user_ticks, int sys_ticks) runqueue_t *rq = this_rq(); task_t *p = current; + if (rcu_pending(cpu)) + rcu_check_callbacks(cpu, user_ticks); + if (p == rq->idle) { /* note: this timer irq context must be accounted for as well */ if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET) @@ -1023,6 +1027,7 @@ pick_next_task: switch_tasks: prefetch(next); clear_tsk_need_resched(prev); + RCU_qsctr(prev->thread_info->cpu)++; if (likely(prev != next)) { rq->nr_switches++; diff --git a/kernel/sys.c b/kernel/sys.c index 5b7e84384cfa..3c2992ac68f2 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -20,6 +20,7 @@ #include <linux/device.h> #include <linux/times.h> #include <linux/security.h> +#include <linux/dcookies.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -202,6 +203,7 @@ asmlinkage long sys_ni_syscall(void) cond_syscall(sys_nfsservctl) cond_syscall(sys_quotactl) cond_syscall(sys_acct) +cond_syscall(sys_lookup_dcookie) static int set_one_prio(struct task_struct *p, int niceval, int error) { diff --git a/kernel/timer.c b/kernel/timer.c index bf0077634c93..2d30f7fd0ecb 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -406,10 +406,6 @@ long time_adj; /* tick adjust (scaled 1 / HZ) */ long time_reftime; /* time at last adjustment (s) */ long time_adjust; -unsigned int * prof_buffer; -unsigned long prof_len; -unsigned long prof_shift; - /* * this routine handles the overflow of the microsecond field * |
