diff options
| author | Paul Mackerras <paulus@samba.org> | 2003-05-27 04:55:14 +1000 |
|---|---|---|
| committer | Paul Mackerras <paulus@samba.org> | 2003-05-27 04:55:14 +1000 |
| commit | 927bc10f742f603ced09dae59c94be4c70b5e949 (patch) | |
| tree | 1c55e3fcb149050f4a4b81144515d47f276730f5 /kernel | |
| parent | 95c39f479c9ff6e36348ffd99c71dde12e6bcf32 (diff) | |
| parent | dc2f9764e8784817355504b5a78bed08578e2d46 (diff) | |
Merge samba.org:/stuff/paulus/kernel/linux-2.5
into samba.org:/stuff/paulus/kernel/for-linus-ppc
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 3 | ||||
| -rw-r--r-- | kernel/compat.c | 11 | ||||
| -rw-r--r-- | kernel/exit.c | 40 | ||||
| -rw-r--r-- | kernel/fork.c | 2 | ||||
| -rw-r--r-- | kernel/futex.c | 156 | ||||
| -rw-r--r-- | kernel/signal.c | 8 | ||||
| -rw-r--r-- | kernel/sys.c | 5 |
7 files changed, 152 insertions, 73 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 2929aae0c2fe..1e652214037c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -5,9 +5,10 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ exit.o itimer.o time.o softirq.o resource.o \ sysctl.o capability.o ptrace.o timer.o user.o \ - signal.o sys.o kmod.o workqueue.o futex.o pid.o \ + signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o intermodule.o extable.o params.o posix-timers.o +obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += cpu.o obj-$(CONFIG_UID16) += uid16.o diff --git a/kernel/compat.c b/kernel/compat.c index 0dcab5fc6acd..e0998f98b72b 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -18,6 +18,7 @@ #include <linux/signal.h> #include <linux/sched.h> /* for MAX_SCHEDULE_TIMEOUT */ #include <linux/futex.h> /* for FUTEX_WAIT */ +#include <linux/unistd.h> #include <asm/uaccess.h> @@ -211,21 +212,25 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t *set, return ret; } -extern long do_futex(unsigned long, int, int, unsigned long); - +#ifdef CONFIG_FUTEX asmlinkage long compat_sys_futex(u32 *uaddr, int op, int val, struct compat_timespec *utime) { struct timespec t; unsigned long timeout = MAX_SCHEDULE_TIMEOUT; + int val2 = 0; if ((op == FUTEX_WAIT) && utime) { if (get_compat_timespec(&t, utime)) return -EFAULT; timeout = timespec_to_jiffies(&t) + 1; } - return do_futex((unsigned long)uaddr, op, val, timeout); + if (op == FUTEX_REQUEUE) + val2 = (int) utime; + + return do_futex((unsigned long)uaddr, op, val, timeout, uaddr2, val2); } +#endif asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim); diff --git a/kernel/exit.c b/kernel/exit.c index c4130eb03ca1..c5b8ec241a83 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -21,6 +21,7 @@ #include <linux/ptrace.h> #include <linux/profile.h> #include <linux/mount.h> +#include <linux/proc_fs.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -31,10 +32,8 @@ extern struct task_struct *child_reaper; int getrusage(struct task_struct *, int, struct rusage *); -static struct dentry * __unhash_process(struct task_struct *p) +static void __unhash_process(struct task_struct *p) { - struct dentry *proc_dentry; - nr_threads--; detach_pid(p, PIDTYPE_PID); detach_pid(p, PIDTYPE_TGID); @@ -46,34 +45,25 @@ static struct dentry * __unhash_process(struct task_struct *p) } REMOVE_LINKS(p); - proc_dentry = p->proc_dentry; - if (unlikely(proc_dentry != NULL)) { - spin_lock(&dcache_lock); - if (!d_unhashed(proc_dentry)) { - dget_locked(proc_dentry); - __d_drop(proc_dentry); - } else - proc_dentry = NULL; - spin_unlock(&dcache_lock); - } - return proc_dentry; } void release_task(struct task_struct * p) { - struct dentry *proc_dentry; task_t *leader; + struct dentry *proc_dentry; BUG_ON(p->state < TASK_ZOMBIE); atomic_dec(&p->user->processes); + spin_lock(&p->proc_lock); + proc_dentry = proc_pid_unhash(p); write_lock_irq(&tasklist_lock); if (unlikely(p->ptrace)) __ptrace_unlink(p); BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); __exit_signal(p); __exit_sighand(p); - proc_dentry = __unhash_process(p); + __unhash_process(p); /* * If we are the last non-leader member of the thread @@ -92,11 +82,8 @@ void release_task(struct task_struct * p) p->parent->cnswap += p->nswap + p->cnswap; sched_exit(p); write_unlock_irq(&tasklist_lock); - - if (unlikely(proc_dentry != NULL)) { - shrink_dcache_parent(proc_dentry); - dput(proc_dentry); - } + spin_unlock(&p->proc_lock); + proc_pid_flush(proc_dentry); release_thread(p); put_task_struct(p); } @@ -107,14 +94,13 @@ void unhash_process(struct task_struct *p) { struct dentry *proc_dentry; + spin_lock(&p->proc_lock); + proc_dentry = proc_pid_unhash(p); write_lock_irq(&tasklist_lock); - proc_dentry = __unhash_process(p); + __unhash_process(p); write_unlock_irq(&tasklist_lock); - - if (unlikely(proc_dentry != NULL)) { - shrink_dcache_parent(proc_dentry); - dput(proc_dentry); - } + spin_unlock(&p->proc_lock); + proc_pid_flush(proc_dentry); } /* diff --git a/kernel/fork.c b/kernel/fork.c index a509e6da132f..23c6d34f800f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -457,7 +457,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) * not set up a proper pointer then tough luck. */ put_user(0, tidptr); - sys_futex(tidptr, FUTEX_WAKE, 1, NULL); + sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL); } } diff --git a/kernel/futex.c b/kernel/futex.c index ce5c894e459e..df2dcbf557d0 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2,6 +2,9 @@ * Fast Userspace Mutexes (which I call "Futexes!"). * (C) Rusty Russell, IBM 2002 * + * Generalized futexes, futex requeueing, misc fixes by Ingo Molnar + * (C) Copyright 2003 Red Hat Inc, All Rights Reserved + * * Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly * enough at me, Linus for the original (flawed) idea, Matthew * Kirkwood for proof-of-concept implementation. @@ -9,9 +12,6 @@ * "The futexes are also cursed." * "But they come in a choice of three flavours!" * - * Generalized futexes for every mapping type, Ingo Molnar, 2002 - * - * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -93,19 +93,18 @@ static inline struct list_head *hash_futex(struct page *page, int offset) FUTEX_HASHBITS)]; } -/* Waiter either waiting in FUTEX_WAIT or poll(), or expecting signal */ -static inline void tell_waiter(struct futex_q *q) -{ - wake_up_all(&q->waiters); - if (q->filp) - send_sigio(&q->filp->f_owner, q->fd, POLL_IN); -} - /* * Get kernel address of the user page and pin it. * * Must be called with (and returns with) all futex-MM locks held. */ +static inline struct page *__pin_page_atomic (struct page *page) +{ + if (!PageReserved(page)) + get_page(page); + return page; +} + static struct page *__pin_page(unsigned long addr) { struct mm_struct *mm = current->mm; @@ -116,11 +115,8 @@ static struct page *__pin_page(unsigned long addr) * Do a quick atomic lookup first - this is the fastpath. */ page = follow_page(mm, addr, 0); - if (likely(page != NULL)) { - if (!PageReserved(page)) - get_page(page); - return page; - } + if (likely(page != NULL)) + return __pin_page_atomic(page); /* * No luck - need to fault in the page: @@ -150,16 +146,11 @@ repeat_lookup: return page; } -static inline void unpin_page(struct page *page) -{ - put_page(page); -} - /* * Wake up all waiters hashed on the physical page that is mapped * to this virtual address: */ -static int futex_wake(unsigned long uaddr, int offset, int num) +static inline int futex_wake(unsigned long uaddr, int offset, int num) { struct list_head *i, *next, *head; struct page *page; @@ -181,7 +172,9 @@ static int futex_wake(unsigned long uaddr, int offset, int num) if (this->page == page && this->offset == offset) { list_del_init(i); __detach_vcache(&this->vcache); - tell_waiter(this); + wake_up_all(&this->waiters); + if (this->filp) + send_sigio(&this->filp->f_owner, this->fd, POLL_IN); ret++; if (ret >= num) break; @@ -189,7 +182,7 @@ static int futex_wake(unsigned long uaddr, int offset, int num) } unlock_futex_mm(); - unpin_page(page); + put_page(page); return ret; } @@ -208,7 +201,9 @@ static void futex_vcache_callback(vcache_t *vcache, struct page *new_page) spin_lock(&futex_lock); if (!list_empty(&q->list)) { + put_page(q->page); q->page = new_page; + __pin_page_atomic(new_page); list_del(&q->list); list_add_tail(&q->list, head); } @@ -216,6 +211,65 @@ static void futex_vcache_callback(vcache_t *vcache, struct page *new_page) spin_unlock(&futex_lock); } +/* + * Requeue all waiters hashed on one physical page to another + * physical page. + */ +static inline int futex_requeue(unsigned long uaddr1, int offset1, + unsigned long uaddr2, int offset2, int nr_wake, int nr_requeue) +{ + struct list_head *i, *next, *head1, *head2; + struct page *page1 = NULL, *page2 = NULL; + int ret = 0; + + lock_futex_mm(); + + page1 = __pin_page(uaddr1 - offset1); + if (!page1) + goto out; + page2 = __pin_page(uaddr2 - offset2); + if (!page2) + goto out; + + head1 = hash_futex(page1, offset1); + head2 = hash_futex(page2, offset2); + + list_for_each_safe(i, next, head1) { + struct futex_q *this = list_entry(i, struct futex_q, list); + + if (this->page == page1 && this->offset == offset1) { + list_del_init(i); + __detach_vcache(&this->vcache); + if (++ret <= nr_wake) { + wake_up_all(&this->waiters); + if (this->filp) + send_sigio(&this->filp->f_owner, + this->fd, POLL_IN); + } else { + put_page(this->page); + __pin_page_atomic (page2); + list_add_tail(i, head2); + __attach_vcache(&this->vcache, uaddr2, + current->mm, futex_vcache_callback); + this->offset = offset2; + this->page = page2; + if (ret - nr_wake >= nr_requeue) + break; + } + } + } + +out: + unlock_futex_mm(); + + if (page1) + put_page(page1); + if (page2) + put_page(page2); + + return ret; +} + static inline void __queue_me(struct futex_q *q, struct page *page, unsigned long uaddr, int offset, int fd, struct file *filp) @@ -252,7 +306,7 @@ static inline int unqueue_me(struct futex_q *q) return ret; } -static int futex_wait(unsigned long uaddr, +static inline int futex_wait(unsigned long uaddr, int offset, int val, unsigned long time) @@ -273,14 +327,17 @@ static int futex_wait(unsigned long uaddr, } __queue_me(&q, page, uaddr, offset, -1, NULL); - unlock_futex_mm(); - - /* Page is pinned, but may no longer be in this address space. */ + /* + * Page is pinned, but may no longer be in this address space. + * It cannot schedule, so we access it with the spinlock held. + */ if (get_user(curval, (int *)uaddr) != 0) { + unlock_futex_mm(); ret = -EFAULT; goto out; } if (curval != val) { + unlock_futex_mm(); ret = -EWOULDBLOCK; goto out; } @@ -288,13 +345,15 @@ static int futex_wait(unsigned long uaddr, * The get_user() above might fault and schedule so we * cannot just set TASK_INTERRUPTIBLE state when queueing * ourselves into the futex hash. This code thus has to - * rely on the FUTEX_WAKE code doing a wakeup after removing + * rely on the futex_wake() code doing a wakeup after removing * the waiter from the list. */ add_wait_queue(&q.waiters, &wait); set_current_state(TASK_INTERRUPTIBLE); - if (!list_empty(&q.list)) + if (!list_empty(&q.list)) { + unlock_futex_mm(); time = schedule_timeout(time); + } set_current_state(TASK_RUNNING); /* * NOTE: we don't remove ourselves from the waitqueue because @@ -310,7 +369,7 @@ out: /* Were we woken up anyway? */ if (!unqueue_me(&q)) ret = 0; - unpin_page(page); + put_page(q.page); return ret; } @@ -320,7 +379,7 @@ static int futex_close(struct inode *inode, struct file *filp) struct futex_q *q = filp->private_data; unqueue_me(q); - unpin_page(q->page); + put_page(q->page); kfree(filp->private_data); return 0; } @@ -416,11 +475,12 @@ static int futex_fd(unsigned long uaddr, int offset, int signal) page = NULL; out: if (page) - unpin_page(page); + put_page(page); return ret; } -long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout) +long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, + unsigned long uaddr2, int val2) { unsigned long pos_in_page; int ret; @@ -442,23 +502,45 @@ long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout) /* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */ ret = futex_fd(uaddr, pos_in_page, val); break; + case FUTEX_REQUEUE: + { + unsigned long pos_in_page2 = uaddr2 % PAGE_SIZE; + + /* Must be "naturally" aligned */ + if (pos_in_page2 % sizeof(u32)) + return -EINVAL; + + ret = futex_requeue(uaddr, pos_in_page, uaddr2, pos_in_page2, + val, val2); + break; + } default: - ret = -EINVAL; + ret = -ENOSYS; } return ret; } -asmlinkage long sys_futex(u32 __user *uaddr, int op, int val, struct timespec __user *utime) + +asmlinkage long sys_futex(u32 __user *uaddr, int op, int val, + struct timespec __user *utime, u32 __user *uaddr2) { struct timespec t; unsigned long timeout = MAX_SCHEDULE_TIMEOUT; + int val2 = 0; if ((op == FUTEX_WAIT) && utime) { if (copy_from_user(&t, utime, sizeof(t)) != 0) return -EFAULT; timeout = timespec_to_jiffies(&t) + 1; } - return do_futex((unsigned long)uaddr, op, val, timeout); + /* + * requeue parameter in 'utime' if op == FUTEX_REQUEUE. + */ + if (op == FUTEX_REQUEUE) + val2 = (int) utime; + + return do_futex((unsigned long)uaddr, op, val, timeout, + (unsigned long)uaddr2, val2); } static struct super_block * diff --git a/kernel/signal.c b/kernel/signal.c index d15a55ec9e5e..d1f02ea4b7b1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -336,7 +336,7 @@ void __exit_signal(struct task_struct *tsk) * If there is any task waiting for the group exit * then notify it: */ - if (sig->group_exit_task && atomic_read(&sig->count) <= 2) { + if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) { wake_up_process(sig->group_exit_task); sig->group_exit_task = NULL; } @@ -1346,6 +1346,9 @@ do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent) spin_unlock_irqrestore(&sighand->siglock, flags); } + +#ifndef HAVE_ARCH_GET_SIGNAL_TO_DELIVER + static void finish_stop(int stop_count) { @@ -1460,9 +1463,6 @@ do_signal_stop(int signr) finish_stop(stop_count); } - -#ifndef HAVE_ARCH_GET_SIGNAL_TO_DELIVER - /* * Do appropriate magic when group_stop_count > 0. * We return nonzero if we stopped, after releasing the siglock. diff --git a/kernel/sys.c b/kernel/sys.c index 8e7de84e1df5..5c2c439ae6bc 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -226,6 +226,11 @@ cond_syscall(sys_shutdown) cond_syscall(sys_sendmsg) cond_syscall(sys_recvmsg) cond_syscall(sys_socketcall) +cond_syscall(sys_futex) +cond_syscall(compat_sys_futex) +cond_syscall(sys_epoll_create) +cond_syscall(sys_epoll_ctl) +cond_syscall(sys_epoll_wait) static int set_one_prio(struct task_struct *p, int niceval, int error) { |
