diff options
| author | Andrew Morton <akpm@digeo.com> | 2003-05-25 01:10:37 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@home.transmeta.com> | 2003-05-25 01:10:37 -0700 |
| commit | 055e188d5ce5d5edd5d916a0cd549ebb195388a3 (patch) | |
| tree | b1f2d4dd72df0d360f528aa8107a62fe47fe5e6d /fs | |
| parent | 05cdeac3efd3bbffd5f01a1822c4f941e19ba31d (diff) | |
[PATCH] Fix dcache_lock/tasklist_lock ranking bug
__unhash_process acquires the dcache_lock while holding the
tasklist_lock for writing. This can deadlock. Additionally,
fs/proc/base.c incorrectly assumed that p->pid would be set to 0 during
release_task.
The patch fixes that by adding a new spinlock to the task structure and
fixing all references to (!p->pid).
The alternative to the new spinlock would be to hold dcache_lock around
__unhash_process.
- fs/proc/base.c assumed that p->pid is reset to 0 during exit. This is
not the case anymore. I now look at the count of the pid structure for
PIDTYPE_PID.
- de_thread now tested - as broken as it was before: open handles to
/proc/<pid> are either stale or invalid after an exec of a nptl process,
if the exec was call from a secondary thread.
- a few lock_kernels removed - that part of /proc doesn't need it.
- additional instances of 'if(current->pid)' replaced with pid_alive.
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/exec.c | 37 | ||||
| -rw-r--r-- | fs/proc/base.c | 154 | ||||
| -rw-r--r-- | fs/proc/root.c | 2 |
3 files changed, 117 insertions, 76 deletions
diff --git a/fs/exec.c b/fs/exec.c index e214e30ca525..9bbd3b4e76dc 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -529,30 +529,6 @@ static int exec_mmap(struct mm_struct *mm) return 0; } -static struct dentry *clean_proc_dentry(struct task_struct *p) -{ - struct dentry *proc_dentry = p->proc_dentry; - - if (proc_dentry) { - spin_lock(&dcache_lock); - if (!d_unhashed(proc_dentry)) { - dget_locked(proc_dentry); - __d_drop(proc_dentry); - } else - proc_dentry = NULL; - spin_unlock(&dcache_lock); - } - return proc_dentry; -} - -static inline void put_proc_dentry(struct dentry *dentry) -{ - if (dentry) { - shrink_dcache_parent(dentry); - dput(dentry); - } -} - /* * This function makes sure the current process has its own signal table, * so that flush_signal_handlers can later reset the handlers without @@ -660,9 +636,11 @@ static inline int de_thread(struct task_struct *tsk) while (leader->state != TASK_ZOMBIE) yield(); + spin_lock(&leader->proc_lock); + spin_lock(¤t->proc_lock); + proc_dentry1 = proc_pid_unhash(current); + proc_dentry2 = proc_pid_unhash(leader); write_lock_irq(&tasklist_lock); - proc_dentry1 = clean_proc_dentry(current); - proc_dentry2 = clean_proc_dentry(leader); if (leader->tgid != current->tgid) BUG(); @@ -702,9 +680,10 @@ static inline int de_thread(struct task_struct *tsk) state = leader->state; write_unlock_irq(&tasklist_lock); - - put_proc_dentry(proc_dentry1); - put_proc_dentry(proc_dentry2); + spin_unlock(&leader->proc_lock); + spin_unlock(¤t->proc_lock); + proc_pid_flush(proc_dentry1); + proc_pid_flush(proc_dentry2); if (state != TASK_ZOMBIE) BUG(); diff --git a/fs/proc/base.c b/fs/proc/base.c index cad41397fd3f..a657b562beb6 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -624,6 +624,12 @@ static struct inode_operations proc_pid_link_inode_operations = { .follow_link = proc_pid_follow_link }; +static int pid_alive(struct task_struct *p) +{ + BUG_ON(p->pids[PIDTYPE_PID].pidptr != &p->pids[PIDTYPE_PID].pid); + return atomic_read(&p->pids[PIDTYPE_PID].pid.count); +} + #define NUMBUF 10 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) @@ -635,6 +641,9 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) char buf[NUMBUF]; struct files_struct * files; + retval = -ENOENT; + if (!pid_alive(p)) + goto out; retval = 0; pid = p->pid; @@ -696,48 +705,46 @@ static int proc_base_readdir(struct file * filp, int pid; struct inode *inode = filp->f_dentry->d_inode; struct pid_entry *p; - int ret = 0; + int ret; - lock_kernel(); + ret = -ENOENT; + if (!pid_alive(proc_task(inode))) + goto out; + ret = 0; pid = proc_task(inode)->pid; - if (!pid) { - ret = -ENOENT; - goto out; - } i = filp->f_pos; switch (i) { - case 0: - if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0) - goto out; - i++; - filp->f_pos++; - /* fall through */ - case 1: - if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0) + case 0: + if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0) + goto out; + i++; + filp->f_pos++; + /* fall through */ + case 1: + if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0) + goto out; + i++; + filp->f_pos++; + /* fall through */ + default: + i -= 2; + if (i >= ARRAY_SIZE(base_stuff)) { + ret = 1; + goto out; + } + p = base_stuff + i; + while (p->name) { + if (filldir(dirent, p->name, p->len, filp->f_pos, + fake_ino(pid, p->type), p->mode >> 12) < 0) goto out; - i++; filp->f_pos++; - /* fall through */ - default: - i -= 2; - if (i>=sizeof(base_stuff)/sizeof(base_stuff[0])) { - ret = 1; - goto out; - } - p = base_stuff + i; - while (p->name) { - if (filldir(dirent, p->name, p->len, filp->f_pos, - fake_ino(pid, p->type), p->mode >> 12) < 0) - goto out; - filp->f_pos++; - p++; - } + p++; + } } ret = 1; out: - unlock_kernel(); return ret; } @@ -774,7 +781,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_ino = fake_ino(task->pid, ino); - if (!task->pid) + if (!pid_alive(task)) goto out_unlock; /* @@ -808,7 +815,7 @@ out_unlock: */ static int pid_revalidate(struct dentry * dentry, int flags) { - if (proc_task(dentry->d_inode)->pid) + if (pid_alive(proc_task(dentry->d_inode))) return 1; d_drop(dentry); return 0; @@ -842,18 +849,23 @@ static int pid_fd_revalidate(struct dentry * dentry, int flags) static void pid_base_iput(struct dentry *dentry, struct inode *inode) { struct task_struct *task = proc_task(inode); - write_lock_irq(&tasklist_lock); + spin_lock(&task->proc_lock); if (task->proc_dentry == dentry) task->proc_dentry = NULL; - write_unlock_irq(&tasklist_lock); + spin_unlock(&task->proc_lock); iput(inode); } static int pid_delete_dentry(struct dentry * dentry) { - return proc_task(dentry->d_inode)->pid == 0; + /* Is the task we represent dead? + * If so, then don't put the dentry on the lru list, + * kill it immediately. + */ + return !pid_alive(proc_task(dentry->d_inode)); } + static struct dentry_operations pid_fd_dentry_operations = { .d_revalidate = pid_fd_revalidate, @@ -909,6 +921,8 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry) if (fd == ~0U) goto out; + if (!pid_alive(task)) + goto out; inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd); if (!inode) @@ -937,8 +951,6 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry) ei->op.proc_get_link = proc_fd_link; dentry->d_op = &pid_fd_dentry_operations; d_add(dentry, inode); - if (!proc_task(dentry->d_inode)->pid) - d_drop(dentry); return NULL; out_unlock2: @@ -975,6 +987,9 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) error = -ENOENT; inode = NULL; + if (!pid_alive(task)) + goto out; + for (p = base_stuff; p->name; p++) { if (p->len != dentry->d_name.len) continue; @@ -1056,8 +1071,6 @@ static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) } dentry->d_op = &pid_dentry_operations; d_add(dentry, inode); - if (!proc_task(dentry->d_inode)->pid) - d_drop(dentry); return NULL; out: @@ -1095,6 +1108,55 @@ static struct inode_operations proc_self_inode_operations = { .follow_link = proc_self_follow_link, }; +/** + * proc_pid_unhash - Unhash /proc/<pid> entry from the dcache. + * @p: task that should be flushed. + * + * Drops the /proc/<pid> dcache entry from the hash chains. + * + * Dropping /proc/<pid> entries and detach_pid must be synchroneous, + * otherwise e.g. /proc/<pid>/exe might point to the wrong executable, + * if the pid value is immediately reused. This is enforced by + * - caller must acquire spin_lock(p->proc_lock) + * - must be called before detach_pid() + * - proc_pid_lookup acquires proc_lock, and checks that + * the target is not dead by looking at the attach count + * of PIDTYPE_PID. + */ + +struct dentry *proc_pid_unhash(struct task_struct *p) +{ + struct dentry *proc_dentry; + + proc_dentry = p->proc_dentry; + if (proc_dentry != NULL) { + + spin_lock(&dcache_lock); + if (!d_unhashed(proc_dentry)) { + dget_locked(proc_dentry); + __d_drop(proc_dentry); + } else + proc_dentry = NULL; + spin_unlock(&dcache_lock); + } + return proc_dentry; +} + +/** + * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries + * @proc_entry: directoy to prune. + * + * Shrink the /proc directory that was used by the just killed thread. + */ + +void proc_pid_flush(struct dentry *proc_dentry) +{ + if(proc_dentry != NULL) { + shrink_dcache_parent(proc_dentry); + dput(proc_dentry); + } +} + /* SMP-safe */ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry) { @@ -1143,12 +1205,12 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry) inode->i_flags|=S_IMMUTABLE; dentry->d_op = &pid_base_dentry_operations; + + spin_lock(&task->proc_lock); + task->proc_dentry = dentry; d_add(dentry, inode); - read_lock(&tasklist_lock); - proc_task(dentry->d_inode)->proc_dentry = dentry; - read_unlock(&tasklist_lock); - if (!proc_task(dentry->d_inode)->pid) - d_drop(dentry); + spin_unlock(&task->proc_lock); + return NULL; out: return ERR_PTR(-ENOENT); @@ -1171,7 +1233,7 @@ static int get_pid_list(int index, unsigned int *pids) read_lock(&tasklist_lock); for_each_process(p) { int pid = p->pid; - if (!pid) + if (!pid_alive(p)) continue; if (--index >= 0) continue; diff --git a/fs/proc/root.c b/fs/proc/root.c index c4d71953bc6a..d49f353378df 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -110,9 +110,9 @@ static int proc_root_readdir(struct file * filp, } filp->f_pos = FIRST_PROCESS_ENTRY; } + unlock_kernel(); ret = proc_pid_readdir(filp, dirent, filldir); - unlock_kernel(); return ret; } |
