summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorDavid S. Miller <davem@nuts.ninka.net>2002-10-15 07:41:35 -0700
committerDavid S. Miller <davem@nuts.ninka.net>2002-10-15 07:41:35 -0700
commit8fbfe7cd5594010a23cb4e81786d1fb8015ffdee (patch)
treeb5be190f22984395209823ec3cac1c76fc93f67f /kernel
parente22f7f5fd43205bfd20ea3a7bb4e689cb3f3d278 (diff)
parent5a7728c6d3eb83df9d120944cca4cf476dd326a1 (diff)
Merge nuts.ninka.net:/home/davem/src/BK/network-2.5
into nuts.ninka.net:/home/davem/src/BK/net-2.5
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile8
-rw-r--r--kernel/exit.c10
-rw-r--r--kernel/futex.c29
-rw-r--r--kernel/profile.c121
-rw-r--r--kernel/rcupdate.c242
-rw-r--r--kernel/sched.c5
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/timer.c4
8 files changed, 395 insertions, 26 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index b3fce6d3ac9c..daf6cbd5d42a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -3,12 +3,14 @@
#
export-objs = signal.o sys.o kmod.o workqueue.o ksyms.o pm.o exec_domain.o \
- printk.o platform.o suspend.o dma.o module.o cpufreq.o
+ printk.o platform.o suspend.o dma.o module.o cpufreq.o \
+ profile.o rcupdate.o
-obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
+obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
module.o exit.o itimer.o time.o softirq.o resource.o \
sysctl.o capability.o ptrace.o timer.o user.o \
- signal.o sys.o kmod.o workqueue.o futex.o platform.o pid.o
+ signal.o sys.o kmod.o workqueue.o futex.o platform.o pid.o \
+ rcupdate.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
obj-$(CONFIG_SMP) += cpu.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 6ed07def4c62..c2b0f6eeff0f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -19,6 +19,7 @@
#include <linux/file.h>
#include <linux/binfmts.h>
#include <linux/ptrace.h>
+#include <linux/profile.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -59,11 +60,12 @@ void release_task(struct task_struct * p)
{
struct dentry *proc_dentry;
task_t *leader;
-
- if (p->state < TASK_ZOMBIE)
- BUG();
+
+ BUG_ON(p->state < TASK_ZOMBIE);
+
if (p != current)
wait_task_inactive(p);
+
atomic_dec(&p->user->processes);
security_ops->task_free_security(p);
free_uid(p->user);
@@ -635,6 +637,8 @@ NORET_TYPE void do_exit(long code)
current->comm, current->pid,
preempt_count());
+ profile_exit_task(tsk);
+
fake_volatile:
acct_process(code);
__exit_mm(tsk);
diff --git a/kernel/futex.c b/kernel/futex.c
index d268c3c1b758..4aa2115c4d66 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -115,8 +115,9 @@ static struct page *__pin_page(unsigned long addr)
* Do a quick atomic lookup first - this is the fastpath.
*/
page = follow_page(mm, addr, 0);
- if (likely(page != NULL)) {
- get_page(page);
+ if (likely(page != NULL)) {
+ if (!PageReserved(page))
+ get_page(page);
return page;
}
@@ -140,8 +141,10 @@ repeat_lookup:
* check for races:
*/
tmp = follow_page(mm, addr, 0);
- if (tmp != page)
+ if (tmp != page) {
+ put_page(page);
goto repeat_lookup;
+ }
return page;
}
@@ -176,6 +179,7 @@ static int futex_wake(unsigned long uaddr, int offset, int num)
if (this->page == page && this->offset == offset) {
list_del_init(i);
+ __detach_vcache(&this->vcache);
tell_waiter(this);
ret++;
if (ret >= num)
@@ -235,15 +239,15 @@ static inline int unqueue_me(struct futex_q *q)
{
int ret = 0;
- detach_vcache(&q->vcache);
-
+ spin_lock(&vcache_lock);
spin_lock(&futex_lock);
if (!list_empty(&q->list)) {
list_del(&q->list);
+ __detach_vcache(&q->vcache);
ret = 1;
}
spin_unlock(&futex_lock);
-
+ spin_unlock(&vcache_lock);
return ret;
}
@@ -314,13 +318,7 @@ static int futex_close(struct inode *inode, struct file *filp)
{
struct futex_q *q = filp->private_data;
- spin_lock(&futex_lock);
- if (!list_empty(&q->list)) {
- list_del(&q->list);
- /* Noone can be polling on us now. */
- BUG_ON(waitqueue_active(&q->waiters));
- }
- spin_unlock(&futex_lock);
+ unqueue_me(q);
unpin_page(q->page);
kfree(filp->private_data);
return 0;
@@ -436,9 +434,8 @@ asmlinkage int sys_futex(unsigned long uaddr, int op, int val, struct timespec *
pos_in_page = uaddr % PAGE_SIZE;
- /* Must be "naturally" aligned, and not on page boundary. */
- if ((pos_in_page % __alignof__(int)) != 0
- || pos_in_page + sizeof(int) > PAGE_SIZE)
+ /* Must be "naturally" aligned */
+ if (pos_in_page % sizeof(int))
return -EINVAL;
switch (op) {
diff --git a/kernel/profile.c b/kernel/profile.c
new file mode 100644
index 000000000000..756f142b1f35
--- /dev/null
+++ b/kernel/profile.c
@@ -0,0 +1,121 @@
+/*
+ * linux/kernel/profile.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/profile.h>
+#include <linux/bootmem.h>
+#include <linux/notifier.h>
+#include <linux/mm.h>
+
+extern char _stext, _etext;
+
+unsigned int * prof_buffer;
+unsigned long prof_len;
+unsigned long prof_shift;
+
+int __init profile_setup(char * str)
+{
+ int par;
+ if (get_option(&str,&par))
+ prof_shift = par;
+ return 1;
+}
+
+
+void __init profile_init(void)
+{
+ unsigned int size;
+
+ if (!prof_shift)
+ return;
+
+ /* only text is profiled */
+ prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
+ prof_len >>= prof_shift;
+
+ size = prof_len * sizeof(unsigned int) + PAGE_SIZE - 1;
+ prof_buffer = (unsigned int *) alloc_bootmem(size);
+}
+
+/* Profile event notifications */
+
+#ifdef CONFIG_PROFILING
+
+static DECLARE_RWSEM(profile_rwsem);
+static struct notifier_block * exit_task_notifier;
+static struct notifier_block * exit_mmap_notifier;
+static struct notifier_block * exec_unmap_notifier;
+
+void profile_exit_task(struct task_struct * task)
+{
+ down_read(&profile_rwsem);
+ notifier_call_chain(&exit_task_notifier, 0, task);
+ up_read(&profile_rwsem);
+}
+
+void profile_exit_mmap(struct mm_struct * mm)
+{
+ down_read(&profile_rwsem);
+ notifier_call_chain(&exit_mmap_notifier, 0, mm);
+ up_read(&profile_rwsem);
+}
+
+void profile_exec_unmap(struct mm_struct * mm)
+{
+ down_read(&profile_rwsem);
+ notifier_call_chain(&exec_unmap_notifier, 0, mm);
+ up_read(&profile_rwsem);
+}
+
+int profile_event_register(enum profile_type type, struct notifier_block * n)
+{
+ int err = -EINVAL;
+
+ down_write(&profile_rwsem);
+
+ switch (type) {
+ case EXIT_TASK:
+ err = notifier_chain_register(&exit_task_notifier, n);
+ break;
+ case EXIT_MMAP:
+ err = notifier_chain_register(&exit_mmap_notifier, n);
+ break;
+ case EXEC_UNMAP:
+ err = notifier_chain_register(&exec_unmap_notifier, n);
+ break;
+ }
+
+ up_write(&profile_rwsem);
+
+ return err;
+}
+
+
+int profile_event_unregister(enum profile_type type, struct notifier_block * n)
+{
+ int err = -EINVAL;
+
+ down_write(&profile_rwsem);
+
+ switch (type) {
+ case EXIT_TASK:
+ err = notifier_chain_unregister(&exit_task_notifier, n);
+ break;
+ case EXIT_MMAP:
+ err = notifier_chain_unregister(&exit_mmap_notifier, n);
+ break;
+ case EXEC_UNMAP:
+ err = notifier_chain_unregister(&exec_unmap_notifier, n);
+ break;
+ }
+
+ up_write(&profile_rwsem);
+ return err;
+}
+
+#endif /* CONFIG_PROFILING */
+
+EXPORT_SYMBOL_GPL(profile_event_register);
+EXPORT_SYMBOL_GPL(profile_event_unregister);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
new file mode 100644
index 000000000000..dfdf1774489d
--- /dev/null
+++ b/kernel/rcupdate.c
@@ -0,0 +1,242 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) IBM Corporation, 2001
+ *
+ * Author: Dipankar Sarma <dipankar@in.ibm.com>
+ *
+ * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * http://lse.sourceforge.net/locking/rcupdate.html
+ *
+ */
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+#include <asm/bitops.h>
+#include <linux/module.h>
+#include <linux/completion.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+
+/* Definition for rcupdate control block. */
+struct rcu_ctrlblk rcu_ctrlblk =
+ { .mutex = SPIN_LOCK_UNLOCKED, .curbatch = 1,
+ .maxbatch = 1, .rcu_cpu_mask = 0 };
+struct rcu_data rcu_data[NR_CPUS] __cacheline_aligned;
+
+/* Fake initialization required by compiler */
+static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
+#define RCU_tasklet(cpu) (per_cpu(rcu_tasklet, cpu))
+
+/**
+ * call_rcu - Queue an RCU update request.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual update function to be invoked after the grace period
+ * @arg: argument to be passed to the update function
+ *
+ * The update function will be invoked as soon as all CPUs have performed
+ * a context switch or been seen in the idle loop or in a user process.
+ * The read-side of critical section that use call_rcu() for updation must
+ * be protected by rcu_read_lock()/rcu_read_unlock().
+ */
+void call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg)
+{
+ int cpu;
+ unsigned long flags;
+
+ head->func = func;
+ head->arg = arg;
+ local_irq_save(flags);
+ cpu = smp_processor_id();
+ list_add_tail(&head->list, &RCU_nxtlist(cpu));
+ local_irq_restore(flags);
+}
+
+/*
+ * Invoke the completed RCU callbacks. They are expected to be in
+ * a per-cpu list.
+ */
+static void rcu_do_batch(struct list_head *list)
+{
+ struct list_head *entry;
+ struct rcu_head *head;
+
+ while (!list_empty(list)) {
+ entry = list->next;
+ list_del(entry);
+ head = list_entry(entry, struct rcu_head, list);
+ head->func(head->arg);
+ }
+}
+
+/*
+ * Register a new batch of callbacks, and start it up if there is currently no
+ * active batch and the batch to be registered has not already occurred.
+ * Caller must hold the rcu_ctrlblk lock.
+ */
+static void rcu_start_batch(long newbatch)
+{
+ if (rcu_batch_before(rcu_ctrlblk.maxbatch, newbatch)) {
+ rcu_ctrlblk.maxbatch = newbatch;
+ }
+ if (rcu_batch_before(rcu_ctrlblk.maxbatch, rcu_ctrlblk.curbatch) ||
+ (rcu_ctrlblk.rcu_cpu_mask != 0)) {
+ return;
+ }
+ rcu_ctrlblk.rcu_cpu_mask = cpu_online_map;
+}
+
+/*
+ * Check if the cpu has gone through a quiescent state (say context
+ * switch). If so and if it already hasn't done so in this RCU
+ * quiescent cycle, then indicate that it has done so.
+ */
+static void rcu_check_quiescent_state(void)
+{
+ int cpu = smp_processor_id();
+
+ if (!test_bit(cpu, &rcu_ctrlblk.rcu_cpu_mask)) {
+ return;
+ }
+
+ /*
+ * Races with local timer interrupt - in the worst case
+ * we may miss one quiescent state of that CPU. That is
+ * tolerable. So no need to disable interrupts.
+ */
+ if (RCU_last_qsctr(cpu) == RCU_QSCTR_INVALID) {
+ RCU_last_qsctr(cpu) = RCU_qsctr(cpu);
+ return;
+ }
+ if (RCU_qsctr(cpu) == RCU_last_qsctr(cpu)) {
+ return;
+ }
+
+ spin_lock(&rcu_ctrlblk.mutex);
+ if (!test_bit(cpu, &rcu_ctrlblk.rcu_cpu_mask)) {
+ spin_unlock(&rcu_ctrlblk.mutex);
+ return;
+ }
+ clear_bit(cpu, &rcu_ctrlblk.rcu_cpu_mask);
+ RCU_last_qsctr(cpu) = RCU_QSCTR_INVALID;
+ if (rcu_ctrlblk.rcu_cpu_mask != 0) {
+ spin_unlock(&rcu_ctrlblk.mutex);
+ return;
+ }
+ rcu_ctrlblk.curbatch++;
+ rcu_start_batch(rcu_ctrlblk.maxbatch);
+ spin_unlock(&rcu_ctrlblk.mutex);
+}
+
+
+/*
+ * This does the RCU processing work from tasklet context.
+ */
+static void rcu_process_callbacks(unsigned long unused)
+{
+ int cpu = smp_processor_id();
+ LIST_HEAD(list);
+
+ if (!list_empty(&RCU_curlist(cpu)) &&
+ rcu_batch_after(rcu_ctrlblk.curbatch, RCU_batch(cpu))) {
+ list_splice(&RCU_curlist(cpu), &list);
+ INIT_LIST_HEAD(&RCU_curlist(cpu));
+ }
+
+ local_irq_disable();
+ if (!list_empty(&RCU_nxtlist(cpu)) && list_empty(&RCU_curlist(cpu))) {
+ list_splice(&RCU_nxtlist(cpu), &RCU_curlist(cpu));
+ INIT_LIST_HEAD(&RCU_nxtlist(cpu));
+ local_irq_enable();
+
+ /*
+ * start the next batch of callbacks
+ */
+ spin_lock(&rcu_ctrlblk.mutex);
+ RCU_batch(cpu) = rcu_ctrlblk.curbatch + 1;
+ rcu_start_batch(RCU_batch(cpu));
+ spin_unlock(&rcu_ctrlblk.mutex);
+ } else {
+ local_irq_enable();
+ }
+ rcu_check_quiescent_state();
+ if (!list_empty(&list))
+ rcu_do_batch(&list);
+}
+
+void rcu_check_callbacks(int cpu, int user)
+{
+ if (user ||
+ (idle_cpu(cpu) && !in_softirq() && hardirq_count() <= 1))
+ RCU_qsctr(cpu)++;
+ tasklet_schedule(&RCU_tasklet(cpu));
+}
+
+/*
+ * Initializes rcu mechanism. Assumed to be called early.
+ * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
+ * Note that rcu_qsctr and friends are implicitly
+ * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
+ */
+void __init rcu_init(void)
+{
+ int i;
+
+ memset(&rcu_data[0], 0, sizeof(rcu_data));
+ for (i = 0; i < NR_CPUS; i++) {
+ tasklet_init(&RCU_tasklet(i), rcu_process_callbacks, 0UL);
+ INIT_LIST_HEAD(&RCU_nxtlist(i));
+ INIT_LIST_HEAD(&RCU_curlist(i));
+ }
+}
+
+/* Because of FASTCALL declaration of complete, we use this wrapper */
+static void wakeme_after_rcu(void *completion)
+{
+ complete(completion);
+}
+
+/**
+ * synchronize-kernel - wait until all the CPUs have gone
+ * through a "quiescent" state. It may sleep.
+ */
+void synchronize_kernel(void)
+{
+ struct rcu_head rcu;
+ DECLARE_COMPLETION(completion);
+
+ /* Will wake me after RCU finished */
+ call_rcu(&rcu, wakeme_after_rcu, &completion);
+
+ /* Wait for it */
+ wait_for_completion(&completion);
+}
+
+
+EXPORT_SYMBOL(call_rcu);
+EXPORT_SYMBOL(synchronize_kernel);
diff --git a/kernel/sched.c b/kernel/sched.c
index 0464ac0649b8..20d2854c0bc6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -31,6 +31,7 @@
#include <linux/blkdev.h>
#include <linux/delay.h>
#include <linux/timer.h>
+#include <linux/rcupdate.h>
/*
* Convert user-nice values [ -20 ... 0 ... 19 ]
@@ -865,6 +866,9 @@ void scheduler_tick(int user_ticks, int sys_ticks)
runqueue_t *rq = this_rq();
task_t *p = current;
+ if (rcu_pending(cpu))
+ rcu_check_callbacks(cpu, user_ticks);
+
if (p == rq->idle) {
/* note: this timer irq context must be accounted for as well */
if (irq_count() - HARDIRQ_OFFSET >= SOFTIRQ_OFFSET)
@@ -1023,6 +1027,7 @@ pick_next_task:
switch_tasks:
prefetch(next);
clear_tsk_need_resched(prev);
+ RCU_qsctr(prev->thread_info->cpu)++;
if (likely(prev != next)) {
rq->nr_switches++;
diff --git a/kernel/sys.c b/kernel/sys.c
index 5b7e84384cfa..3c2992ac68f2 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -20,6 +20,7 @@
#include <linux/device.h>
#include <linux/times.h>
#include <linux/security.h>
+#include <linux/dcookies.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -202,6 +203,7 @@ asmlinkage long sys_ni_syscall(void)
cond_syscall(sys_nfsservctl)
cond_syscall(sys_quotactl)
cond_syscall(sys_acct)
+cond_syscall(sys_lookup_dcookie)
static int set_one_prio(struct task_struct *p, int niceval, int error)
{
diff --git a/kernel/timer.c b/kernel/timer.c
index bf0077634c93..2d30f7fd0ecb 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -406,10 +406,6 @@ long time_adj; /* tick adjust (scaled 1 / HZ) */
long time_reftime; /* time at last adjustment (s) */
long time_adjust;
-unsigned int * prof_buffer;
-unsigned long prof_len;
-unsigned long prof_shift;
-
/*
* this routine handles the overflow of the microsecond field
*