summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorAndrew Morton <akpm@digeo.com>2003-05-25 01:10:37 -0700
committerLinus Torvalds <torvalds@home.transmeta.com>2003-05-25 01:10:37 -0700
commit055e188d5ce5d5edd5d916a0cd549ebb195388a3 (patch)
treeb1f2d4dd72df0d360f528aa8107a62fe47fe5e6d /fs
parent05cdeac3efd3bbffd5f01a1822c4f941e19ba31d (diff)
[PATCH] Fix dcache_lock/tasklist_lock ranking bug
__unhash_process acquires the dcache_lock while holding the tasklist_lock for writing. This can deadlock. Additionally, fs/proc/base.c incorrectly assumed that p->pid would be set to 0 during release_task. The patch fixes that by adding a new spinlock to the task structure and fixing all references to (!p->pid). The alternative to the new spinlock would be to hold dcache_lock around __unhash_process. - fs/proc/base.c assumed that p->pid is reset to 0 during exit. This is not the case anymore. I now look at the count of the pid structure for PIDTYPE_PID. - de_thread now tested - as broken as it was before: open handles to /proc/<pid> are either stale or invalid after an exec of a nptl process, if the exec was call from a secondary thread. - a few lock_kernels removed - that part of /proc doesn't need it. - additional instances of 'if(current->pid)' replaced with pid_alive.
Diffstat (limited to 'fs')
-rw-r--r--fs/exec.c37
-rw-r--r--fs/proc/base.c154
-rw-r--r--fs/proc/root.c2
3 files changed, 117 insertions, 76 deletions
diff --git a/fs/exec.c b/fs/exec.c
index e214e30ca525..9bbd3b4e76dc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -529,30 +529,6 @@ static int exec_mmap(struct mm_struct *mm)
return 0;
}
-static struct dentry *clean_proc_dentry(struct task_struct *p)
-{
- struct dentry *proc_dentry = p->proc_dentry;
-
- if (proc_dentry) {
- spin_lock(&dcache_lock);
- if (!d_unhashed(proc_dentry)) {
- dget_locked(proc_dentry);
- __d_drop(proc_dentry);
- } else
- proc_dentry = NULL;
- spin_unlock(&dcache_lock);
- }
- return proc_dentry;
-}
-
-static inline void put_proc_dentry(struct dentry *dentry)
-{
- if (dentry) {
- shrink_dcache_parent(dentry);
- dput(dentry);
- }
-}
-
/*
* This function makes sure the current process has its own signal table,
* so that flush_signal_handlers can later reset the handlers without
@@ -660,9 +636,11 @@ static inline int de_thread(struct task_struct *tsk)
while (leader->state != TASK_ZOMBIE)
yield();
+ spin_lock(&leader->proc_lock);
+ spin_lock(&current->proc_lock);
+ proc_dentry1 = proc_pid_unhash(current);
+ proc_dentry2 = proc_pid_unhash(leader);
write_lock_irq(&tasklist_lock);
- proc_dentry1 = clean_proc_dentry(current);
- proc_dentry2 = clean_proc_dentry(leader);
if (leader->tgid != current->tgid)
BUG();
@@ -702,9 +680,10 @@ static inline int de_thread(struct task_struct *tsk)
state = leader->state;
write_unlock_irq(&tasklist_lock);
-
- put_proc_dentry(proc_dentry1);
- put_proc_dentry(proc_dentry2);
+ spin_unlock(&leader->proc_lock);
+ spin_unlock(&current->proc_lock);
+ proc_pid_flush(proc_dentry1);
+ proc_pid_flush(proc_dentry2);
if (state != TASK_ZOMBIE)
BUG();
diff --git a/fs/proc/base.c b/fs/proc/base.c
index cad41397fd3f..a657b562beb6 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -624,6 +624,12 @@ static struct inode_operations proc_pid_link_inode_operations = {
.follow_link = proc_pid_follow_link
};
+static int pid_alive(struct task_struct *p)
+{
+ BUG_ON(p->pids[PIDTYPE_PID].pidptr != &p->pids[PIDTYPE_PID].pid);
+ return atomic_read(&p->pids[PIDTYPE_PID].pid.count);
+}
+
#define NUMBUF 10
static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
@@ -635,6 +641,9 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
char buf[NUMBUF];
struct files_struct * files;
+ retval = -ENOENT;
+ if (!pid_alive(p))
+ goto out;
retval = 0;
pid = p->pid;
@@ -696,48 +705,46 @@ static int proc_base_readdir(struct file * filp,
int pid;
struct inode *inode = filp->f_dentry->d_inode;
struct pid_entry *p;
- int ret = 0;
+ int ret;
- lock_kernel();
+ ret = -ENOENT;
+ if (!pid_alive(proc_task(inode)))
+ goto out;
+ ret = 0;
pid = proc_task(inode)->pid;
- if (!pid) {
- ret = -ENOENT;
- goto out;
- }
i = filp->f_pos;
switch (i) {
- case 0:
- if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0)
- goto out;
- i++;
- filp->f_pos++;
- /* fall through */
- case 1:
- if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0)
+ case 0:
+ if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0)
+ goto out;
+ i++;
+ filp->f_pos++;
+ /* fall through */
+ case 1:
+ if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0)
+ goto out;
+ i++;
+ filp->f_pos++;
+ /* fall through */
+ default:
+ i -= 2;
+ if (i >= ARRAY_SIZE(base_stuff)) {
+ ret = 1;
+ goto out;
+ }
+ p = base_stuff + i;
+ while (p->name) {
+ if (filldir(dirent, p->name, p->len, filp->f_pos,
+ fake_ino(pid, p->type), p->mode >> 12) < 0)
goto out;
- i++;
filp->f_pos++;
- /* fall through */
- default:
- i -= 2;
- if (i>=sizeof(base_stuff)/sizeof(base_stuff[0])) {
- ret = 1;
- goto out;
- }
- p = base_stuff + i;
- while (p->name) {
- if (filldir(dirent, p->name, p->len, filp->f_pos,
- fake_ino(pid, p->type), p->mode >> 12) < 0)
- goto out;
- filp->f_pos++;
- p++;
- }
+ p++;
+ }
}
ret = 1;
out:
- unlock_kernel();
return ret;
}
@@ -774,7 +781,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode->i_ino = fake_ino(task->pid, ino);
- if (!task->pid)
+ if (!pid_alive(task))
goto out_unlock;
/*
@@ -808,7 +815,7 @@ out_unlock:
*/
static int pid_revalidate(struct dentry * dentry, int flags)
{
- if (proc_task(dentry->d_inode)->pid)
+ if (pid_alive(proc_task(dentry->d_inode)))
return 1;
d_drop(dentry);
return 0;
@@ -842,18 +849,23 @@ static int pid_fd_revalidate(struct dentry * dentry, int flags)
static void pid_base_iput(struct dentry *dentry, struct inode *inode)
{
struct task_struct *task = proc_task(inode);
- write_lock_irq(&tasklist_lock);
+ spin_lock(&task->proc_lock);
if (task->proc_dentry == dentry)
task->proc_dentry = NULL;
- write_unlock_irq(&tasklist_lock);
+ spin_unlock(&task->proc_lock);
iput(inode);
}
static int pid_delete_dentry(struct dentry * dentry)
{
- return proc_task(dentry->d_inode)->pid == 0;
+ /* Is the task we represent dead?
+ * If so, then don't put the dentry on the lru list,
+ * kill it immediately.
+ */
+ return !pid_alive(proc_task(dentry->d_inode));
}
+
static struct dentry_operations pid_fd_dentry_operations =
{
.d_revalidate = pid_fd_revalidate,
@@ -909,6 +921,8 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
if (fd == ~0U)
goto out;
+ if (!pid_alive(task))
+ goto out;
inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd);
if (!inode)
@@ -937,8 +951,6 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
ei->op.proc_get_link = proc_fd_link;
dentry->d_op = &pid_fd_dentry_operations;
d_add(dentry, inode);
- if (!proc_task(dentry->d_inode)->pid)
- d_drop(dentry);
return NULL;
out_unlock2:
@@ -975,6 +987,9 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
error = -ENOENT;
inode = NULL;
+ if (!pid_alive(task))
+ goto out;
+
for (p = base_stuff; p->name; p++) {
if (p->len != dentry->d_name.len)
continue;
@@ -1056,8 +1071,6 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
}
dentry->d_op = &pid_dentry_operations;
d_add(dentry, inode);
- if (!proc_task(dentry->d_inode)->pid)
- d_drop(dentry);
return NULL;
out:
@@ -1095,6 +1108,55 @@ static struct inode_operations proc_self_inode_operations = {
.follow_link = proc_self_follow_link,
};
+/**
+ * proc_pid_unhash - Unhash /proc/<pid> entry from the dcache.
+ * @p: task that should be flushed.
+ *
+ * Drops the /proc/<pid> dcache entry from the hash chains.
+ *
+ * Dropping /proc/<pid> entries and detach_pid must be synchroneous,
+ * otherwise e.g. /proc/<pid>/exe might point to the wrong executable,
+ * if the pid value is immediately reused. This is enforced by
+ * - caller must acquire spin_lock(p->proc_lock)
+ * - must be called before detach_pid()
+ * - proc_pid_lookup acquires proc_lock, and checks that
+ * the target is not dead by looking at the attach count
+ * of PIDTYPE_PID.
+ */
+
+struct dentry *proc_pid_unhash(struct task_struct *p)
+{
+ struct dentry *proc_dentry;
+
+ proc_dentry = p->proc_dentry;
+ if (proc_dentry != NULL) {
+
+ spin_lock(&dcache_lock);
+ if (!d_unhashed(proc_dentry)) {
+ dget_locked(proc_dentry);
+ __d_drop(proc_dentry);
+ } else
+ proc_dentry = NULL;
+ spin_unlock(&dcache_lock);
+ }
+ return proc_dentry;
+}
+
+/**
+ * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries
+ * @proc_entry: directoy to prune.
+ *
+ * Shrink the /proc directory that was used by the just killed thread.
+ */
+
+void proc_pid_flush(struct dentry *proc_dentry)
+{
+ if(proc_dentry != NULL) {
+ shrink_dcache_parent(proc_dentry);
+ dput(proc_dentry);
+ }
+}
+
/* SMP-safe */
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
{
@@ -1143,12 +1205,12 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
inode->i_flags|=S_IMMUTABLE;
dentry->d_op = &pid_base_dentry_operations;
+
+ spin_lock(&task->proc_lock);
+ task->proc_dentry = dentry;
d_add(dentry, inode);
- read_lock(&tasklist_lock);
- proc_task(dentry->d_inode)->proc_dentry = dentry;
- read_unlock(&tasklist_lock);
- if (!proc_task(dentry->d_inode)->pid)
- d_drop(dentry);
+ spin_unlock(&task->proc_lock);
+
return NULL;
out:
return ERR_PTR(-ENOENT);
@@ -1171,7 +1233,7 @@ static int get_pid_list(int index, unsigned int *pids)
read_lock(&tasklist_lock);
for_each_process(p) {
int pid = p->pid;
- if (!pid)
+ if (!pid_alive(p))
continue;
if (--index >= 0)
continue;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index c4d71953bc6a..d49f353378df 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -110,9 +110,9 @@ static int proc_root_readdir(struct file * filp,
}
filp->f_pos = FIRST_PROCESS_ENTRY;
}
+ unlock_kernel();
ret = proc_pid_readdir(filp, dirent, filldir);
- unlock_kernel();
return ret;
}