summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorPaul Mackerras <paulus@samba.org>2003-05-27 04:55:14 +1000
committerPaul Mackerras <paulus@samba.org>2003-05-27 04:55:14 +1000
commit927bc10f742f603ced09dae59c94be4c70b5e949 (patch)
tree1c55e3fcb149050f4a4b81144515d47f276730f5 /kernel
parent95c39f479c9ff6e36348ffd99c71dde12e6bcf32 (diff)
parentdc2f9764e8784817355504b5a78bed08578e2d46 (diff)
Merge samba.org:/stuff/paulus/kernel/linux-2.5
into samba.org:/stuff/paulus/kernel/for-linus-ppc
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/compat.c11
-rw-r--r--kernel/exit.c40
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/futex.c156
-rw-r--r--kernel/signal.c8
-rw-r--r--kernel/sys.c5
7 files changed, 152 insertions, 73 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 2929aae0c2fe..1e652214037c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -5,9 +5,10 @@
obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
exit.o itimer.o time.o softirq.o resource.o \
sysctl.o capability.o ptrace.o timer.o user.o \
- signal.o sys.o kmod.o workqueue.o futex.o pid.o \
+ signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o intermodule.o extable.o params.o posix-timers.o
+obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
obj-$(CONFIG_SMP) += cpu.o
obj-$(CONFIG_UID16) += uid16.o
diff --git a/kernel/compat.c b/kernel/compat.c
index 0dcab5fc6acd..e0998f98b72b 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -18,6 +18,7 @@
#include <linux/signal.h>
#include <linux/sched.h> /* for MAX_SCHEDULE_TIMEOUT */
#include <linux/futex.h> /* for FUTEX_WAIT */
+#include <linux/unistd.h>
#include <asm/uaccess.h>
@@ -211,21 +212,25 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t *set,
return ret;
}
-extern long do_futex(unsigned long, int, int, unsigned long);
-
+#ifdef CONFIG_FUTEX
asmlinkage long compat_sys_futex(u32 *uaddr, int op, int val,
struct compat_timespec *utime)
{
struct timespec t;
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+ int val2 = 0;
if ((op == FUTEX_WAIT) && utime) {
if (get_compat_timespec(&t, utime))
return -EFAULT;
timeout = timespec_to_jiffies(&t) + 1;
}
- return do_futex((unsigned long)uaddr, op, val, timeout);
+ if (op == FUTEX_REQUEUE)
+ val2 = (int) utime;
+
+ return do_futex((unsigned long)uaddr, op, val, timeout, uaddr2, val2);
}
+#endif
asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim);
diff --git a/kernel/exit.c b/kernel/exit.c
index c4130eb03ca1..c5b8ec241a83 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -21,6 +21,7 @@
#include <linux/ptrace.h>
#include <linux/profile.h>
#include <linux/mount.h>
+#include <linux/proc_fs.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -31,10 +32,8 @@ extern struct task_struct *child_reaper;
int getrusage(struct task_struct *, int, struct rusage *);
-static struct dentry * __unhash_process(struct task_struct *p)
+static void __unhash_process(struct task_struct *p)
{
- struct dentry *proc_dentry;
-
nr_threads--;
detach_pid(p, PIDTYPE_PID);
detach_pid(p, PIDTYPE_TGID);
@@ -46,34 +45,25 @@ static struct dentry * __unhash_process(struct task_struct *p)
}
REMOVE_LINKS(p);
- proc_dentry = p->proc_dentry;
- if (unlikely(proc_dentry != NULL)) {
- spin_lock(&dcache_lock);
- if (!d_unhashed(proc_dentry)) {
- dget_locked(proc_dentry);
- __d_drop(proc_dentry);
- } else
- proc_dentry = NULL;
- spin_unlock(&dcache_lock);
- }
- return proc_dentry;
}
void release_task(struct task_struct * p)
{
- struct dentry *proc_dentry;
task_t *leader;
+ struct dentry *proc_dentry;
BUG_ON(p->state < TASK_ZOMBIE);
atomic_dec(&p->user->processes);
+ spin_lock(&p->proc_lock);
+ proc_dentry = proc_pid_unhash(p);
write_lock_irq(&tasklist_lock);
if (unlikely(p->ptrace))
__ptrace_unlink(p);
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
__exit_signal(p);
__exit_sighand(p);
- proc_dentry = __unhash_process(p);
+ __unhash_process(p);
/*
* If we are the last non-leader member of the thread
@@ -92,11 +82,8 @@ void release_task(struct task_struct * p)
p->parent->cnswap += p->nswap + p->cnswap;
sched_exit(p);
write_unlock_irq(&tasklist_lock);
-
- if (unlikely(proc_dentry != NULL)) {
- shrink_dcache_parent(proc_dentry);
- dput(proc_dentry);
- }
+ spin_unlock(&p->proc_lock);
+ proc_pid_flush(proc_dentry);
release_thread(p);
put_task_struct(p);
}
@@ -107,14 +94,13 @@ void unhash_process(struct task_struct *p)
{
struct dentry *proc_dentry;
+ spin_lock(&p->proc_lock);
+ proc_dentry = proc_pid_unhash(p);
write_lock_irq(&tasklist_lock);
- proc_dentry = __unhash_process(p);
+ __unhash_process(p);
write_unlock_irq(&tasklist_lock);
-
- if (unlikely(proc_dentry != NULL)) {
- shrink_dcache_parent(proc_dentry);
- dput(proc_dentry);
- }
+ spin_unlock(&p->proc_lock);
+ proc_pid_flush(proc_dentry);
}
/*
diff --git a/kernel/fork.c b/kernel/fork.c
index a509e6da132f..23c6d34f800f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -457,7 +457,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
* not set up a proper pointer then tough luck.
*/
put_user(0, tidptr);
- sys_futex(tidptr, FUTEX_WAKE, 1, NULL);
+ sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL);
}
}
diff --git a/kernel/futex.c b/kernel/futex.c
index ce5c894e459e..df2dcbf557d0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2,6 +2,9 @@
* Fast Userspace Mutexes (which I call "Futexes!").
* (C) Rusty Russell, IBM 2002
*
+ * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar
+ * (C) Copyright 2003 Red Hat Inc, All Rights Reserved
+ *
* Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
* enough at me, Linus for the original (flawed) idea, Matthew
* Kirkwood for proof-of-concept implementation.
@@ -9,9 +12,6 @@
* "The futexes are also cursed."
* "But they come in a choice of three flavours!"
*
- * Generalized futexes for every mapping type, Ingo Molnar, 2002
- *
- *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
@@ -93,19 +93,18 @@ static inline struct list_head *hash_futex(struct page *page, int offset)
FUTEX_HASHBITS)];
}
-/* Waiter either waiting in FUTEX_WAIT or poll(), or expecting signal */
-static inline void tell_waiter(struct futex_q *q)
-{
- wake_up_all(&q->waiters);
- if (q->filp)
- send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
-}
-
/*
* Get kernel address of the user page and pin it.
*
* Must be called with (and returns with) all futex-MM locks held.
*/
+static inline struct page *__pin_page_atomic (struct page *page)
+{
+ if (!PageReserved(page))
+ get_page(page);
+ return page;
+}
+
static struct page *__pin_page(unsigned long addr)
{
struct mm_struct *mm = current->mm;
@@ -116,11 +115,8 @@ static struct page *__pin_page(unsigned long addr)
* Do a quick atomic lookup first - this is the fastpath.
*/
page = follow_page(mm, addr, 0);
- if (likely(page != NULL)) {
- if (!PageReserved(page))
- get_page(page);
- return page;
- }
+ if (likely(page != NULL))
+ return __pin_page_atomic(page);
/*
* No luck - need to fault in the page:
@@ -150,16 +146,11 @@ repeat_lookup:
return page;
}
-static inline void unpin_page(struct page *page)
-{
- put_page(page);
-}
-
/*
* Wake up all waiters hashed on the physical page that is mapped
* to this virtual address:
*/
-static int futex_wake(unsigned long uaddr, int offset, int num)
+static inline int futex_wake(unsigned long uaddr, int offset, int num)
{
struct list_head *i, *next, *head;
struct page *page;
@@ -181,7 +172,9 @@ static int futex_wake(unsigned long uaddr, int offset, int num)
if (this->page == page && this->offset == offset) {
list_del_init(i);
__detach_vcache(&this->vcache);
- tell_waiter(this);
+ wake_up_all(&this->waiters);
+ if (this->filp)
+ send_sigio(&this->filp->f_owner, this->fd, POLL_IN);
ret++;
if (ret >= num)
break;
@@ -189,7 +182,7 @@ static int futex_wake(unsigned long uaddr, int offset, int num)
}
unlock_futex_mm();
- unpin_page(page);
+ put_page(page);
return ret;
}
@@ -208,7 +201,9 @@ static void futex_vcache_callback(vcache_t *vcache, struct page *new_page)
spin_lock(&futex_lock);
if (!list_empty(&q->list)) {
+ put_page(q->page);
q->page = new_page;
+ __pin_page_atomic(new_page);
list_del(&q->list);
list_add_tail(&q->list, head);
}
@@ -216,6 +211,65 @@ static void futex_vcache_callback(vcache_t *vcache, struct page *new_page)
spin_unlock(&futex_lock);
}
+/*
+ * Requeue all waiters hashed on one physical page to another
+ * physical page.
+ */
+static inline int futex_requeue(unsigned long uaddr1, int offset1,
+ unsigned long uaddr2, int offset2, int nr_wake, int nr_requeue)
+{
+ struct list_head *i, *next, *head1, *head2;
+ struct page *page1 = NULL, *page2 = NULL;
+ int ret = 0;
+
+ lock_futex_mm();
+
+ page1 = __pin_page(uaddr1 - offset1);
+ if (!page1)
+ goto out;
+ page2 = __pin_page(uaddr2 - offset2);
+ if (!page2)
+ goto out;
+
+ head1 = hash_futex(page1, offset1);
+ head2 = hash_futex(page2, offset2);
+
+ list_for_each_safe(i, next, head1) {
+ struct futex_q *this = list_entry(i, struct futex_q, list);
+
+ if (this->page == page1 && this->offset == offset1) {
+ list_del_init(i);
+ __detach_vcache(&this->vcache);
+ if (++ret <= nr_wake) {
+ wake_up_all(&this->waiters);
+ if (this->filp)
+ send_sigio(&this->filp->f_owner,
+ this->fd, POLL_IN);
+ } else {
+ put_page(this->page);
+ __pin_page_atomic (page2);
+ list_add_tail(i, head2);
+ __attach_vcache(&this->vcache, uaddr2,
+ current->mm, futex_vcache_callback);
+ this->offset = offset2;
+ this->page = page2;
+ if (ret - nr_wake >= nr_requeue)
+ break;
+ }
+ }
+ }
+
+out:
+ unlock_futex_mm();
+
+ if (page1)
+ put_page(page1);
+ if (page2)
+ put_page(page2);
+
+ return ret;
+}
+
static inline void __queue_me(struct futex_q *q, struct page *page,
unsigned long uaddr, int offset,
int fd, struct file *filp)
@@ -252,7 +306,7 @@ static inline int unqueue_me(struct futex_q *q)
return ret;
}
-static int futex_wait(unsigned long uaddr,
+static inline int futex_wait(unsigned long uaddr,
int offset,
int val,
unsigned long time)
@@ -273,14 +327,17 @@ static int futex_wait(unsigned long uaddr,
}
__queue_me(&q, page, uaddr, offset, -1, NULL);
- unlock_futex_mm();
-
- /* Page is pinned, but may no longer be in this address space. */
+ /*
+ * Page is pinned, but may no longer be in this address space.
+ * It cannot schedule, so we access it with the spinlock held.
+ */
if (get_user(curval, (int *)uaddr) != 0) {
+ unlock_futex_mm();
ret = -EFAULT;
goto out;
}
if (curval != val) {
+ unlock_futex_mm();
ret = -EWOULDBLOCK;
goto out;
}
@@ -288,13 +345,15 @@ static int futex_wait(unsigned long uaddr,
* The get_user() above might fault and schedule so we
* cannot just set TASK_INTERRUPTIBLE state when queueing
* ourselves into the futex hash. This code thus has to
- * rely on the FUTEX_WAKE code doing a wakeup after removing
+ * rely on the futex_wake() code doing a wakeup after removing
* the waiter from the list.
*/
add_wait_queue(&q.waiters, &wait);
set_current_state(TASK_INTERRUPTIBLE);
- if (!list_empty(&q.list))
+ if (!list_empty(&q.list)) {
+ unlock_futex_mm();
time = schedule_timeout(time);
+ }
set_current_state(TASK_RUNNING);
/*
* NOTE: we don't remove ourselves from the waitqueue because
@@ -310,7 +369,7 @@ out:
/* Were we woken up anyway? */
if (!unqueue_me(&q))
ret = 0;
- unpin_page(page);
+ put_page(q.page);
return ret;
}
@@ -320,7 +379,7 @@ static int futex_close(struct inode *inode, struct file *filp)
struct futex_q *q = filp->private_data;
unqueue_me(q);
- unpin_page(q->page);
+ put_page(q->page);
kfree(filp->private_data);
return 0;
}
@@ -416,11 +475,12 @@ static int futex_fd(unsigned long uaddr, int offset, int signal)
page = NULL;
out:
if (page)
- unpin_page(page);
+ put_page(page);
return ret;
}
-long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout)
+long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
+ unsigned long uaddr2, int val2)
{
unsigned long pos_in_page;
int ret;
@@ -442,23 +502,45 @@ long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout)
/* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
ret = futex_fd(uaddr, pos_in_page, val);
break;
+ case FUTEX_REQUEUE:
+ {
+ unsigned long pos_in_page2 = uaddr2 % PAGE_SIZE;
+
+ /* Must be "naturally" aligned */
+ if (pos_in_page2 % sizeof(u32))
+ return -EINVAL;
+
+ ret = futex_requeue(uaddr, pos_in_page, uaddr2, pos_in_page2,
+ val, val2);
+ break;
+ }
default:
- ret = -EINVAL;
+ ret = -ENOSYS;
}
return ret;
}
-asmlinkage long sys_futex(u32 __user *uaddr, int op, int val, struct timespec __user *utime)
+
+asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
+ struct timespec __user *utime, u32 __user *uaddr2)
{
struct timespec t;
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+ int val2 = 0;
if ((op == FUTEX_WAIT) && utime) {
if (copy_from_user(&t, utime, sizeof(t)) != 0)
return -EFAULT;
timeout = timespec_to_jiffies(&t) + 1;
}
- return do_futex((unsigned long)uaddr, op, val, timeout);
+ /*
+ * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
+ */
+ if (op == FUTEX_REQUEUE)
+ val2 = (int) utime;
+
+ return do_futex((unsigned long)uaddr, op, val, timeout,
+ (unsigned long)uaddr2, val2);
}
static struct super_block *
diff --git a/kernel/signal.c b/kernel/signal.c
index d15a55ec9e5e..d1f02ea4b7b1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -336,7 +336,7 @@ void __exit_signal(struct task_struct *tsk)
* If there is any task waiting for the group exit
* then notify it:
*/
- if (sig->group_exit_task && atomic_read(&sig->count) <= 2) {
+ if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) {
wake_up_process(sig->group_exit_task);
sig->group_exit_task = NULL;
}
@@ -1346,6 +1346,9 @@ do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent)
spin_unlock_irqrestore(&sighand->siglock, flags);
}
+
+#ifndef HAVE_ARCH_GET_SIGNAL_TO_DELIVER
+
static void
finish_stop(int stop_count)
{
@@ -1460,9 +1463,6 @@ do_signal_stop(int signr)
finish_stop(stop_count);
}
-
-#ifndef HAVE_ARCH_GET_SIGNAL_TO_DELIVER
-
/*
* Do appropriate magic when group_stop_count > 0.
* We return nonzero if we stopped, after releasing the siglock.
diff --git a/kernel/sys.c b/kernel/sys.c
index 8e7de84e1df5..5c2c439ae6bc 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -226,6 +226,11 @@ cond_syscall(sys_shutdown)
cond_syscall(sys_sendmsg)
cond_syscall(sys_recvmsg)
cond_syscall(sys_socketcall)
+cond_syscall(sys_futex)
+cond_syscall(compat_sys_futex)
+cond_syscall(sys_epoll_create)
+cond_syscall(sys_epoll_ctl)
+cond_syscall(sys_epoll_wait)
static int set_one_prio(struct task_struct *p, int niceval, int error)
{