diff options
Diffstat (limited to 'fs')
37 files changed, 889 insertions, 500 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index b3eca7db8051..99960d002026 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -127,10 +127,10 @@ static int kafscmd(void *arg) complete(&kafscmd_alive); /* only certain signals are of interest */ - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); siginitsetinv(¤t->blocked,0); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); /* loop around looking for things to attend to */ do { diff --git a/fs/afs/internal.h b/fs/afs/internal.h index a875684e3d4b..7de072e495c0 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -46,9 +46,9 @@ static inline void afs_discard_my_signals(void) while (signal_pending(current)) { siginfo_t sinfo; - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); dequeue_signal(¤t->blocked,&sinfo); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); } } diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c index caedebc20095..e546a6da5015 100644 --- a/fs/afs/kafsasyncd.c +++ b/fs/afs/kafsasyncd.c @@ -101,10 +101,10 @@ static int kafsasyncd(void *arg) complete(&kafsasyncd_alive); /* only certain signals are of interest */ - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); siginitsetinv(¤t->blocked,0); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); /* loop around looking for things to attend to */ do { diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c index 0d3f30a73657..2b0f5a9d84e9 100644 --- a/fs/afs/kafstimod.c +++ b/fs/afs/kafstimod.c @@ -78,10 +78,10 @@ static int kafstimod(void *arg) complete(&kafstimod_alive); /* only certain signals are of interest */ - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); siginitsetinv(¤t->blocked,0); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); /* loop around looking for things to attend to */ loop: diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c index c212015631b9..6c82dc144b33 100644 --- a/fs/autofs/waitq.c +++ b/fs/autofs/waitq.c @@ -70,10 +70,10 @@ static int autofs_write(struct file *file, const void *addr, int bytes) /* Keep the currently executing process from receiving a SIGPIPE unless it was already supposed to get one */ if (wr == -EPIPE && !sigpipe) { - spin_lock_irqsave(¤t->sig->siglock, flags); + spin_lock_irqsave(¤t->sighand->siglock, flags); sigdelset(¤t->pending.signal, SIGPIPE); recalc_sigpending(); - spin_unlock_irqrestore(¤t->sig->siglock, flags); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); } return (bytes > 0); @@ -161,18 +161,18 @@ int autofs_wait(struct autofs_sb_info *sbi, struct qstr *name) sigset_t oldset; unsigned long irqflags; - spin_lock_irqsave(¤t->sig->siglock, irqflags); + spin_lock_irqsave(¤t->sighand->siglock, irqflags); oldset = current->blocked; siginitsetinv(¤t->blocked, SHUTDOWN_SIGS & ~oldset.sig[0]); recalc_sigpending(); - spin_unlock_irqrestore(¤t->sig->siglock, irqflags); + spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); interruptible_sleep_on(&wq->queue); - spin_lock_irqsave(¤t->sig->siglock, irqflags); + spin_lock_irqsave(¤t->sighand->siglock, irqflags); current->blocked = oldset; recalc_sigpending(); - spin_unlock_irqrestore(¤t->sig->siglock, irqflags); + spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); } else { DPRINTK(("autofs_wait: skipped sleeping\n")); } diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 7af5f71e16b9..c1b7279cae81 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -74,10 +74,10 @@ static int autofs4_write(struct file *file, const void *addr, int bytes) /* Keep the currently executing process from receiving a SIGPIPE unless it was already supposed to get one */ if (wr == -EPIPE && !sigpipe) { - spin_lock_irqsave(¤t->sig->siglock, flags); + spin_lock_irqsave(¤t->sighand->siglock, flags); sigdelset(¤t->pending.signal, SIGPIPE); recalc_sigpending(); - spin_unlock_irqrestore(¤t->sig->siglock, flags); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); } return (bytes > 0); @@ -198,18 +198,18 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct qstr *name, sigset_t oldset; unsigned long irqflags; - spin_lock_irqsave(¤t->sig->siglock, irqflags); + spin_lock_irqsave(¤t->sighand->siglock, irqflags); oldset = current->blocked; siginitsetinv(¤t->blocked, SHUTDOWN_SIGS & ~oldset.sig[0]); recalc_sigpending(); - spin_unlock_irqrestore(¤t->sig->siglock, irqflags); + spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); interruptible_sleep_on(&wq->queue); - spin_lock_irqsave(¤t->sig->siglock, irqflags); + spin_lock_irqsave(¤t->sighand->siglock, irqflags); current->blocked = oldset; recalc_sigpending(); - spin_unlock_irqrestore(¤t->sig->siglock, irqflags); + spin_unlock_irqrestore(¤t->sighand->siglock, irqflags); } else { DPRINTK(("autofs_wait: skipped sleeping\n")); } diff --git a/fs/befs/ChangeLog b/fs/befs/ChangeLog index 8e09a0bd8ebb..6774a4e815b2 100644 --- a/fs/befs/ChangeLog +++ b/fs/befs/ChangeLog @@ -60,7 +60,7 @@ Version 0.63 (2002-01-31) * Documentation improvements in source. [WD] -* Makefile fix for independant module when CONFIG_MODVERSION is set in +* Makefile fix for independent module when CONFIG_MODVERSION is set in kernel config [Pavel Roskin <proski@gnu.org>] * Compile warning fix for namei.c. [Sergey S. Kostyliov <rathamahata@php4.ru>] diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0d214c4a54fd..a95b66f560c7 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1058,7 +1058,7 @@ static inline void fill_note(struct memelfnote *note, const char *name, int type /* * fill up all the fields in prstatus from the given task struct, except registers - * which need to be filled up seperately. + * which need to be filled up separately. */ static inline void fill_prstatus(struct elf_prstatus *prstatus, struct task_struct *p, long signr) { diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 7f088639326e..7975056a6995 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -403,7 +403,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) set_personality(PER_LINUX); /* - * there are a couple of cases here, the seperate code/data + * there are a couple of cases here, the separate code/data * case, and then the fully copied to RAM case which lumps * it all together. */ diff --git a/fs/buffer.c b/fs/buffer.c index 3fc9e47c5a0a..bf6ae714c730 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -127,9 +127,10 @@ void __wait_on_buffer(struct buffer_head * bh) get_bh(bh); do { prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); - blk_run_queues(); - if (buffer_locked(bh)) + if (buffer_locked(bh)) { + blk_run_queues(); io_schedule(); + } } while (buffer_locked(bh)); put_bh(bh); finish_wait(wqh, &wait); @@ -959,8 +960,6 @@ no_grow: * the reserve list is empty, we're sure there are * async buffer heads in use. */ - blk_run_queues(); - free_more_memory(); goto try_again; } diff --git a/fs/char_dev.c b/fs/char_dev.c index ff34b5e336cd..ec9489c3a387 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -1,5 +1,5 @@ /* - * linux/fs/block_dev.c + * linux/fs/char_dev.c * * Copyright (C) 1991, 1992 Linus Torvalds */ @@ -38,16 +38,13 @@ static kmem_cache_t * cdev_cachep; ((struct char_device *) kmem_cache_alloc(cdev_cachep, SLAB_KERNEL)) #define destroy_cdev(cdev) kmem_cache_free(cdev_cachep, (cdev)) -static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) +static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) { - struct char_device * cdev = (struct char_device *) foo; + struct char_device *cdev = (struct char_device *) foo; if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) - { memset(cdev, 0, sizeof(*cdev)); - sema_init(&cdev->sem, 1); - } } void __init cdev_cache_init(void) diff --git a/fs/exec.c b/fs/exec.c index 8be3fa7c0ff2..a63d5c43da1f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -300,6 +300,8 @@ void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long a pgd = pgd_offset(tsk->mm, address); pte_chain = pte_chain_alloc(GFP_KERNEL); + if (!pte_chain) + goto out_sig; spin_lock(&tsk->mm->page_table_lock); pmd = pmd_alloc(tsk->mm, pgd, address); if (!pmd) @@ -325,6 +327,7 @@ void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long a return; out: spin_unlock(&tsk->mm->page_table_lock); +out_sig: __free_page(page); force_sig(SIGKILL, tsk); pte_chain_free(pte_chain); @@ -556,35 +559,65 @@ static inline void put_proc_dentry(struct dentry *dentry) * disturbing other processes. (Other processes might share the signal * table via the CLONE_SIGHAND option to clone().) */ -static inline int de_thread(struct signal_struct *oldsig) +static inline int de_thread(struct task_struct *tsk) { - struct signal_struct *newsig; + struct signal_struct *newsig, *oldsig = tsk->signal; + struct sighand_struct *newsighand, *oldsighand = tsk->sighand; + spinlock_t *lock = &oldsighand->siglock; int count; - if (atomic_read(¤t->sig->count) <= 1) + /* + * If we don't share sighandlers, then we aren't sharing anything + * and we can just re-use it all. + */ + if (atomic_read(&oldsighand->count) <= 1) return 0; - newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL); - if (!newsig) + newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); + if (!newsighand) return -ENOMEM; + spin_lock_init(&newsighand->siglock); + atomic_set(&newsighand->count, 1); + memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action)); + + /* + * See if we need to allocate a new signal structure + */ + newsig = NULL; + if (atomic_read(&oldsig->count) > 1) { + newsig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); + if (!newsig) { + kmem_cache_free(sighand_cachep, newsighand); + return -ENOMEM; + } + atomic_set(&newsig->count, 1); + newsig->group_exit = 0; + newsig->group_exit_code = 0; + newsig->group_exit_task = NULL; + newsig->group_stop_count = 0; + init_sigpending(&newsig->shared_pending); + } + if (thread_group_empty(current)) - goto out; + goto no_thread_group; /* * Kill all other threads in the thread group: */ - spin_lock_irq(&oldsig->siglock); + spin_lock_irq(lock); if (oldsig->group_exit) { /* * Another group action in progress, just * return so that the signal is processed. */ - spin_unlock_irq(&oldsig->siglock); - kmem_cache_free(sigact_cachep, newsig); + spin_unlock_irq(lock); + kmem_cache_free(sighand_cachep, newsighand); + if (newsig) + kmem_cache_free(signal_cachep, newsig); return -EAGAIN; } oldsig->group_exit = 1; - __broadcast_thread_group(current, SIGKILL); + zap_other_threads(current); /* * Account for the thread group leader hanging around: @@ -595,13 +628,13 @@ static inline int de_thread(struct signal_struct *oldsig) while (atomic_read(&oldsig->count) > count) { oldsig->group_exit_task = current; current->state = TASK_UNINTERRUPTIBLE; - spin_unlock_irq(&oldsig->siglock); + spin_unlock_irq(lock); schedule(); - spin_lock_irq(&oldsig->siglock); + spin_lock_irq(lock); if (oldsig->group_exit_task) BUG(); } - spin_unlock_irq(&oldsig->siglock); + spin_unlock_irq(lock); /* * At this point all other threads have exited, all we have to @@ -656,7 +689,8 @@ static inline int de_thread(struct signal_struct *oldsig) current->ptrace = ptrace; __ptrace_link(current, parent); } - + + list_del(¤t->tasks); list_add_tail(¤t->tasks, &init_task.tasks); current->exit_signal = SIGCHLD; state = leader->state; @@ -671,31 +705,29 @@ static inline int de_thread(struct signal_struct *oldsig) release_task(leader); } -out: - spin_lock_init(&newsig->siglock); - atomic_set(&newsig->count, 1); - newsig->group_exit = 0; - newsig->group_exit_code = 0; - newsig->group_exit_task = NULL; - memcpy(newsig->action, current->sig->action, sizeof(newsig->action)); - init_sigpending(&newsig->shared_pending); +no_thread_group: write_lock_irq(&tasklist_lock); - spin_lock(&oldsig->siglock); - spin_lock(&newsig->siglock); + spin_lock(&oldsighand->siglock); + spin_lock(&newsighand->siglock); if (current == oldsig->curr_target) oldsig->curr_target = next_thread(current); - current->sig = newsig; + if (newsig) + current->signal = newsig; + current->sighand = newsighand; init_sigpending(¤t->pending); recalc_sigpending(); - spin_unlock(&newsig->siglock); - spin_unlock(&oldsig->siglock); + spin_unlock(&newsighand->siglock); + spin_unlock(&oldsighand->siglock); write_unlock_irq(&tasklist_lock); - if (atomic_dec_and_test(&oldsig->count)) - kmem_cache_free(sigact_cachep, oldsig); + if (newsig && atomic_dec_and_test(&oldsig->count)) + kmem_cache_free(signal_cachep, oldsig); + + if (atomic_dec_and_test(&oldsighand->count)) + kmem_cache_free(sighand_cachep, oldsighand); if (!thread_group_empty(current)) BUG(); @@ -741,21 +773,20 @@ int flush_old_exec(struct linux_binprm * bprm) { char * name; int i, ch, retval; - struct signal_struct * oldsig = current->sig; /* * Release all of the old mmap stuff */ retval = exec_mmap(bprm->mm); if (retval) - goto mmap_failed; + goto out; /* * Make sure we have a private signal table and that * we are unassociated from the previous thread group. */ - retval = de_thread(oldsig); + retval = de_thread(current); if (retval) - goto flush_failed; + goto out; /* This is the point of no return */ @@ -789,14 +820,7 @@ int flush_old_exec(struct linux_binprm * bprm) return 0; -mmap_failed: -flush_failed: - spin_lock_irq(¤t->sig->siglock); - if (current->sig != oldsig) { - kmem_cache_free(sigact_cachep, current->sig); - current->sig = oldsig; - } - spin_unlock_irq(¤t->sig->siglock); +out: return retval; } @@ -880,7 +904,7 @@ void compute_creds(struct linux_binprm *bprm) if (must_not_trace_exec(current) || atomic_read(¤t->fs->count) > 1 || atomic_read(¤t->files->count) > 1 - || atomic_read(¤t->sig->count) > 1) { + || atomic_read(¤t->sighand->count) > 1) { if(!capable(CAP_SETUID)) { bprm->e_uid = current->uid; bprm->e_gid = current->gid; @@ -1297,8 +1321,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) } mm->dumpable = 0; init_completion(&mm->core_done); - current->sig->group_exit = 1; - current->sig->group_exit_code = exit_code; + current->signal->group_exit = 1; + current->signal->group_exit_code = exit_code; coredump_wait(mm); if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump) @@ -1325,7 +1349,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) retval = binfmt->core_dump(signr, regs, file); - current->sig->group_exit_code |= 0x80; + current->signal->group_exit_code |= 0x80; close_fail: filp_close(file, NULL); fail_unlock: diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index ccdb52c9cc77..24897acf33da 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -99,6 +99,34 @@ int ext3_forget(handle_t *handle, int is_metadata, return err; } +/* + * Work out how many blocks we need to progress with the next chunk of a + * truncate transaction. + */ + +static unsigned long blocks_for_truncate(struct inode *inode) +{ + unsigned long needed; + + needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9); + + /* Give ourselves just enough room to cope with inodes in which + * i_blocks is corrupt: we've seen disk corruptions in the past + * which resulted in random data in an inode which looked enough + * like a regular file for ext3 to try to delete it. Things + * will go a bit crazy if that happens, but at least we should + * try not to panic the whole kernel. */ + if (needed < 2) + needed = 2; + + /* But we need to bound the transaction so we don't overflow the + * journal. */ + if (needed > EXT3_MAX_TRANS_DATA) + needed = EXT3_MAX_TRANS_DATA; + + return EXT3_DATA_TRANS_BLOCKS + needed; +} + /* * Truncate transactions can be complex and absolutely huge. So we need to * be able to restart the transaction at a conventient checkpoint to make @@ -112,14 +140,9 @@ int ext3_forget(handle_t *handle, int is_metadata, static handle_t *start_transaction(struct inode *inode) { - long needed; handle_t *result; - needed = inode->i_blocks; - if (needed > EXT3_MAX_TRANS_DATA) - needed = EXT3_MAX_TRANS_DATA; - - result = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS + needed); + result = ext3_journal_start(inode, blocks_for_truncate(inode)); if (!IS_ERR(result)) return result; @@ -135,14 +158,9 @@ static handle_t *start_transaction(struct inode *inode) */ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) { - long needed; - if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS) return 0; - needed = inode->i_blocks; - if (needed > EXT3_MAX_TRANS_DATA) - needed = EXT3_MAX_TRANS_DATA; - if (!ext3_journal_extend(handle, EXT3_RESERVE_TRANS_BLOCKS + needed)) + if (!ext3_journal_extend(handle, blocks_for_truncate(inode))) return 0; return 1; } @@ -154,11 +172,8 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode) */ static int ext3_journal_test_restart(handle_t *handle, struct inode *inode) { - long needed = inode->i_blocks; - if (needed > EXT3_MAX_TRANS_DATA) - needed = EXT3_MAX_TRANS_DATA; jbd_debug(2, "restarting handle %p\n", handle); - return ext3_journal_restart(handle, EXT3_DATA_TRANS_BLOCKS + needed); + return ext3_journal_restart(handle, blocks_for_truncate(inode)); } /* diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c index f0d4e5a7f128..bf8f5e3f90da 100644 --- a/fs/freevxfs/vxfs_fshead.c +++ b/fs/freevxfs/vxfs_fshead.c @@ -111,13 +111,15 @@ vxfs_read_fshead(struct super_block *sbp) struct vxfs_fsh *pfp, *sfp; struct vxfs_inode_info *vip, *tip; - if (!(vip = vxfs_blkiget(sbp, infp->vsi_iext, infp->vsi_fshino))) { + vip = vxfs_blkiget(sbp, infp->vsi_iext, infp->vsi_fshino); + if (!vip) { printk(KERN_ERR "vxfs: unabled to read fsh inode\n"); return -EINVAL; - } else if (!VXFS_ISFSH(vip)) { + } + if (!VXFS_ISFSH(vip)) { printk(KERN_ERR "vxfs: fsh list inode is of wrong type (%x)\n", vip->vii_mode & VXFS_TYPE_MASK); - return -EINVAL; + goto out_free_fship; } @@ -126,23 +128,26 @@ vxfs_read_fshead(struct super_block *sbp) vxfs_dumpi(vip, infp->vsi_fshino); #endif - if (!(infp->vsi_fship = vxfs_get_fake_inode(sbp, vip))) { + infp->vsi_fship = vxfs_get_fake_inode(sbp, vip); + if (!infp->vsi_fship) { printk(KERN_ERR "vxfs: unabled to get fsh inode\n"); - return -EINVAL; + goto out_free_fship; } - if (!(sfp = vxfs_getfsh(infp->vsi_fship, 0))) { + sfp = vxfs_getfsh(infp->vsi_fship, 0); + if (!sfp) { printk(KERN_ERR "vxfs: unabled to get structural fsh\n"); - return -EINVAL; + goto out_iput_fship; } #ifdef DIAGNOSTIC vxfs_dumpfsh(sfp); #endif - if (!(pfp = vxfs_getfsh(infp->vsi_fship, 1))) { + pfp = vxfs_getfsh(infp->vsi_fship, 1); + if (!pfp) { printk(KERN_ERR "vxfs: unabled to get primary fsh\n"); - return -EINVAL; + goto out_free_sfp; } #ifdef DIAGNOSTIC @@ -150,24 +155,50 @@ vxfs_read_fshead(struct super_block *sbp) #endif tip = vxfs_blkiget(sbp, infp->vsi_iext, sfp->fsh_ilistino[0]); - if (!tip || ((infp->vsi_stilist = vxfs_get_fake_inode(sbp, tip)) == NULL)) { + if (!tip) + goto out_free_pfp; + + infp->vsi_stilist = vxfs_get_fake_inode(sbp, tip); + if (!infp->vsi_stilist) { printk(KERN_ERR "vxfs: unabled to get structual list inode\n"); - return -EINVAL; - } else if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) { + kfree(tip); + goto out_free_pfp; + } + if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) { printk(KERN_ERR "vxfs: structual list inode is of wrong type (%x)\n", VXFS_INO(infp->vsi_stilist)->vii_mode & VXFS_TYPE_MASK); - return -EINVAL; + goto out_iput_stilist; } tip = vxfs_stiget(sbp, pfp->fsh_ilistino[0]); - if (!tip || ((infp->vsi_ilist = vxfs_get_fake_inode(sbp, tip)) == NULL)) { + if (!tip) + goto out_iput_stilist; + infp->vsi_ilist = vxfs_get_fake_inode(sbp, tip); + if (!infp->vsi_ilist) { printk(KERN_ERR "vxfs: unabled to get inode list inode\n"); - return -EINVAL; - } else if (!VXFS_ISILT(VXFS_INO(infp->vsi_ilist))) { + kfree(tip); + goto out_iput_stilist; + } + if (!VXFS_ISILT(VXFS_INO(infp->vsi_ilist))) { printk(KERN_ERR "vxfs: inode list inode is of wrong type (%x)\n", VXFS_INO(infp->vsi_ilist)->vii_mode & VXFS_TYPE_MASK); - return -EINVAL; + goto out_iput_ilist; } return 0; + + out_iput_ilist: + iput(infp->vsi_ilist); + out_iput_stilist: + iput(infp->vsi_stilist); + out_free_pfp: + kfree(pfp); + out_free_sfp: + kfree(sfp); + out_iput_fship: + iput(infp->vsi_fship); + return -EINVAL; + out_free_fship: + kfree(vip); + return -EINVAL; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d3db0faa9abe..ad8ef0487ad2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -61,6 +61,12 @@ void __mark_inode_dirty(struct inode *inode, int flags) sb->s_op->dirty_inode(inode); } + /* + * make sure that changes are seen by all cpus before we test i_state + * -- mikulas + */ + smp_mb(); + /* avoid the locking if we can */ if ((inode->i_state & flags) == flags) return; @@ -137,6 +143,12 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) inode->i_state |= I_LOCK; inode->i_state &= ~I_DIRTY; + /* + * smp_rmb(); note: if you remove write_lock below, you must add this. + * mark_inode_dirty doesn't take spinlock, make sure that inode is not + * read speculatively by this cpu before &= ~I_DIRTY -- mikulas + */ + write_lock(&mapping->page_lock); if (wait || !wbc->for_kupdate || list_empty(&mapping->io_pages)) list_splice_init(&mapping->dirty_pages, &mapping->io_pages); @@ -334,7 +346,6 @@ writeback_inodes(struct writeback_control *wbc) } spin_unlock(&sb_lock); spin_unlock(&inode_lock); - blk_run_queues(); } /* diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index bb8bf302da95..5ce105bd3d1e 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -34,6 +34,7 @@ static struct super_operations hugetlbfs_ops; static struct address_space_operations hugetlbfs_aops; struct file_operations hugetlbfs_file_operations; static struct inode_operations hugetlbfs_dir_inode_operations; +static struct inode_operations hugetlbfs_inode_operations; static struct backing_dev_info hugetlbfs_backing_dev_info = { .ra_pages = 0, /* No readahead */ @@ -44,7 +45,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode =file->f_dentry->d_inode; struct address_space *mapping = inode->i_mapping; - size_t len; int ret; if (!capable(CAP_IPC_LOCK)) @@ -65,15 +65,52 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_HUGETLB | VM_RESERVED; vma->vm_ops = &hugetlb_vm_ops; ret = hugetlb_prefault(mapping, vma); - len = (vma->vm_end - vma->vm_start) + (vma->vm_pgoff << PAGE_SHIFT); - if (inode->i_size < len) - inode->i_size = len; - up(&inode->i_sem); return ret; } /* + * Called under down_write(mmap_sem), page_table_lock is not held + */ + +#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags); +#else +static unsigned long +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + if (len & ~HPAGE_MASK) + return -EINVAL; + if (len > TASK_SIZE) + return -ENOMEM; + + if (addr) { + addr = ALIGN(addr, HPAGE_SIZE); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + addr = ALIGN(mm->free_area_cache, HPAGE_SIZE); + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (TASK_SIZE - len < addr) + return -ENOMEM; + if (!vma || addr + len <= vma->vm_start) + return addr; + addr = ALIGN(vma->vm_end, HPAGE_SIZE); + } +} +#endif + +/* * Read a page. Again trivial. If it didn't already exist * in the page cache, it is zero-filled. */ @@ -83,12 +120,14 @@ static int hugetlbfs_readpage(struct file *file, struct page * page) return -EINVAL; } -static int hugetlbfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to) +static int hugetlbfs_prepare_write(struct file *file, + struct page *page, unsigned offset, unsigned to) { return -EINVAL; } -static int hugetlbfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) +static int hugetlbfs_commit_write(struct file *file, + struct page *page, unsigned offset, unsigned to) { return -EINVAL; } @@ -103,28 +142,8 @@ void huge_pagevec_release(struct pagevec *pvec) pagevec_reinit(pvec); } -void truncate_partial_hugepage(struct page *page, unsigned partial) +void truncate_huge_page(struct page *page) { - int i; - const unsigned piece = partial & (PAGE_SIZE - 1); - const unsigned tailstart = PAGE_SIZE - piece; - const unsigned whole_pages = partial / PAGE_SIZE; - const unsigned last_page_offset = HPAGE_SIZE/PAGE_SIZE - whole_pages; - - for (i = HPAGE_SIZE/PAGE_SIZE - 1; i >= last_page_offset; ++i) - memclear_highpage_flush(&page[i], 0, PAGE_SIZE); - - if (!piece) - return; - - memclear_highpage_flush(&page[last_page_offset - 1], tailstart, piece); -} - -void truncate_huge_page(struct address_space *mapping, struct page *page) -{ - if (page->mapping != mapping) - return; - clear_page_dirty(page); ClearPageUptodate(page); remove_from_page_cache(page); @@ -133,52 +152,13 @@ void truncate_huge_page(struct address_space *mapping, struct page *page) void truncate_hugepages(struct address_space *mapping, loff_t lstart) { - const pgoff_t start = (lstart + HPAGE_SIZE - 1) >> HPAGE_SHIFT; - const unsigned partial = lstart & (HPAGE_SIZE - 1); + const pgoff_t start = lstart >> HPAGE_SHIFT; struct pagevec pvec; pgoff_t next; int i; pagevec_init(&pvec, 0); next = start; - - while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { - for (i = 0; i < pagevec_count(&pvec); ++i) { - struct page *page = pvec.pages[i]; - pgoff_t page_index = page->index; - - if (page_index > next) - next = page_index; - - ++next; - - if (TestSetPageLocked(page)) - continue; - - if (PageWriteback(page)) { - unlock_page(page); - continue; - } - - truncate_huge_page(mapping, page); - unlock_page(page); - } - huge_pagevec_release(&pvec); - cond_resched(); - } - - if (partial) { - struct page *page = find_lock_page(mapping, start - 1); - if (page) { - wait_on_page_writeback(page); - truncate_partial_hugepage(page, partial); - unlock_page(page); - huge_page_release(page); - } - } - - next = start; - while (1) { if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { if (next == start) @@ -191,11 +171,10 @@ void truncate_hugepages(struct address_space *mapping, loff_t lstart) struct page *page = pvec.pages[i]; lock_page(page); - wait_on_page_writeback(page); if (page->index > next) next = page->index; ++next; - truncate_huge_page(mapping, page); + truncate_huge_page(page); unlock_page(page); } huge_pagevec_release(&pvec); @@ -259,70 +238,73 @@ static void hugetlbfs_drop_inode(struct inode *inode) hugetlbfs_forget_inode(inode); } -static void hugetlb_vmtruncate_list(struct list_head *list, unsigned long pgoff) +/* + * h_pgoff is in HPAGE_SIZE units. + * vma->vm_pgoff is in PAGE_SIZE units. + */ +static void +hugetlb_vmtruncate_list(struct list_head *list, unsigned long h_pgoff) { - unsigned long start, end, length, delta; struct vm_area_struct *vma; list_for_each_entry(vma, list, shared) { - start = vma->vm_start; - end = vma->vm_end; - length = end - start; - - if (vma->vm_pgoff >= pgoff) { - zap_hugepage_range(vma, start, length); + unsigned long h_vm_pgoff; + unsigned long v_length; + unsigned long h_length; + unsigned long v_offset; + + h_vm_pgoff = vma->vm_pgoff << (HPAGE_SHIFT - PAGE_SHIFT); + v_length = vma->vm_end - vma->vm_start; + h_length = v_length >> HPAGE_SHIFT; + v_offset = (h_pgoff - h_vm_pgoff) << HPAGE_SHIFT; + + /* + * Is this VMA fully outside the truncation point? + */ + if (h_vm_pgoff >= h_pgoff) { + zap_hugepage_range(vma, vma->vm_start, v_length); continue; } - length >>= PAGE_SHIFT; - delta = pgoff = vma->vm_pgoff; - if (delta >= length) + /* + * Is this VMA fully inside the truncaton point? + */ + if (h_vm_pgoff + (v_length >> HPAGE_SHIFT) <= h_pgoff) continue; - start += delta << PAGE_SHIFT; - length = (length - delta) << PAGE_SHIFT; - zap_hugepage_range(vma, start, length); + /* + * The VMA straddles the truncation point. v_offset is the + * offset (in bytes) into the VMA where the point lies. + */ + zap_hugepage_range(vma, + vma->vm_start + v_offset, + v_length - v_offset); } } +/* + * Expanding truncates are not allowed. + */ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) { unsigned long pgoff; struct address_space *mapping = inode->i_mapping; - unsigned long limit; - pgoff = (offset + HPAGE_SIZE - 1) >> HPAGE_SHIFT; + if (offset > inode->i_size) + return -EINVAL; - if (inode->i_size < offset) - goto do_expand; + BUG_ON(offset & ~HPAGE_MASK); + pgoff = offset >> HPAGE_SHIFT; inode->i_size = offset; down(&mapping->i_shared_sem); - if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared)) - goto out_unlock; if (!list_empty(&mapping->i_mmap)) hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); if (!list_empty(&mapping->i_mmap_shared)) hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff); - -out_unlock: up(&mapping->i_shared_sem); truncate_hugepages(mapping, offset); return 0; - -do_expand: - limit = current->rlim[RLIMIT_FSIZE].rlim_cur; - if (limit != RLIM_INFINITY && offset > limit) - goto out_sig; - if (offset > inode->i_sb->s_maxbytes) - goto out; - inode->i_size = offset; - return 0; - -out_sig: - send_sig(SIGXFSZ, current, 0); -out: - return -EFBIG; } static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) @@ -341,15 +323,10 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) error = security_inode_setattr(dentry, attr); if (error) goto out; - - if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) - error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0; - if (error) - goto out; - if (ia_valid & ATTR_SIZE) { - error = hugetlb_vmtruncate(inode, attr->ia_size); + error = -EINVAL; + if (!(attr->ia_size & ~HPAGE_MASK)) + error = hugetlb_vmtruncate(inode, attr->ia_size); if (error) goto out; attr->ia_valid &= ~ATTR_SIZE; @@ -364,8 +341,8 @@ out: return error; } -static struct inode * -hugetlbfs_get_inode(struct super_block *sb, int mode, dev_t dev) +static struct inode *hugetlbfs_get_inode(struct super_block *sb, + int mode, dev_t dev) { struct inode * inode = new_inode(sb); @@ -377,13 +354,14 @@ hugetlbfs_get_inode(struct super_block *sb, int mode, dev_t dev) inode->i_blocks = 0; inode->i_rdev = NODEV; inode->i_mapping->a_ops = &hugetlbfs_aops; - inode->i_mapping->backing_dev_info = &hugetlbfs_backing_dev_info; + inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev); break; case S_IFREG: + inode->i_op = &hugetlbfs_inode_operations; inode->i_fop = &hugetlbfs_file_operations; break; case S_IFDIR: @@ -405,8 +383,8 @@ hugetlbfs_get_inode(struct super_block *sb, int mode, dev_t dev) * File creation. Allocate an inode, and we're done.. */ /* SMP-safe */ -static int -hugetlbfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) +static int hugetlbfs_mknod(struct inode *dir, + struct dentry *dentry, int mode, dev_t dev) { struct inode * inode = hugetlbfs_get_inode(dir->i_sb, mode, dev); int error = -ENOSPC; @@ -419,7 +397,7 @@ hugetlbfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) return error; } -static int hugetlbfs_mkdir(struct inode * dir, struct dentry * dentry, int mode) +static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); if (!retval) @@ -432,7 +410,8 @@ static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode) return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); } -static int hugetlbfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname) +static int hugetlbfs_symlink(struct inode *dir, + struct dentry *dentry, const char *symname) { struct inode *inode; int error = -ENOSPC; @@ -450,15 +429,25 @@ static int hugetlbfs_symlink(struct inode * dir, struct dentry *dentry, const ch return error; } +/* + * For direct-IO reads into hugetlb pages + */ +int hugetlbfs_set_page_dirty(struct page *page) +{ + return 0; +} + static struct address_space_operations hugetlbfs_aops = { .readpage = hugetlbfs_readpage, .prepare_write = hugetlbfs_prepare_write, - .commit_write = hugetlbfs_commit_write + .commit_write = hugetlbfs_commit_write, + .set_page_dirty = hugetlbfs_set_page_dirty, }; struct file_operations hugetlbfs_file_operations = { - .mmap = hugetlbfs_file_mmap, - .fsync = simple_sync_file, + .mmap = hugetlbfs_file_mmap, + .fsync = simple_sync_file, + .get_unmapped_area = hugetlb_get_unmapped_area, }; static struct inode_operations hugetlbfs_dir_inode_operations = { @@ -474,12 +463,17 @@ static struct inode_operations hugetlbfs_dir_inode_operations = { .setattr = hugetlbfs_setattr, }; +static struct inode_operations hugetlbfs_inode_operations = { + .setattr = hugetlbfs_setattr, +}; + static struct super_operations hugetlbfs_ops = { .statfs = simple_statfs, .drop_inode = hugetlbfs_drop_inode, }; -static int hugetlbfs_fill_super(struct super_block * sb, void * data, int silent) +static int +hugetlbfs_fill_super(struct super_block * sb, void * data, int silent) { struct inode * inode; struct dentry * root; diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index d2f5935ef972..a106e23956f7 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -205,10 +205,10 @@ int kjournald(void *arg) lock_kernel(); daemonize(); - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); sigfillset(¤t->blocked); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); sprintf(current->comm, "kjournald"); @@ -732,14 +732,21 @@ fail: * need to set up all of the mapping information to tell the journaling * system where the journal blocks are. * - * journal_init_dev creates a journal which maps a fixed contiguous - * range of blocks on an arbitrary block device. - * - * journal_init_inode creates a journal which maps an on-disk inode as - * the journal. The inode must exist already, must support bmap() and - * must have all data blocks preallocated. */ +/** + * journal_t * journal_init_dev() - creates an initialises a journal structure + * @bdev: Block device on which to create the journal + * @fs_dev: Device which hold journalled filesystem for this journal. + * @start: Block nr Start of journal. + * @len: Lenght of the journal in blocks. + * @blocksize: blocksize of journalling device + * @returns: a newly created journal_t * + * + * journal_init_dev creates a journal which maps a fixed contiguous + * range of blocks on an arbitrary block device. + * + */ journal_t * journal_init_dev(struct block_device *bdev, struct block_device *fs_dev, int start, int len, int blocksize) @@ -763,7 +770,15 @@ journal_t * journal_init_dev(struct block_device *bdev, return journal; } - + +/** + * journal_t * journal_init_inode () - creates a journal which maps to a inode. + * @inode: An inode to create the journal in + * + * journal_init_inode creates a journal which maps an on-disk inode as + * the journal. The inode must exist already, must support bmap() and + * must have all data blocks preallocated. + */ journal_t * journal_init_inode (struct inode *inode) { struct buffer_head *bh; @@ -852,12 +867,15 @@ static int journal_reset (journal_t *journal) return 0; } -/* +/** + * int journal_create() - Initialise the new journal file + * @journal: Journal to create. This structure must have been initialised + * * Given a journal_t structure which tells us which disk blocks we can * use, create a new journal superblock and initialise all of the - * journal fields from scratch. */ - -int journal_create (journal_t *journal) + * journal fields from scratch. + **/ +int journal_create(journal_t *journal) { unsigned long blocknr; struct buffer_head *bh; @@ -920,11 +938,14 @@ int journal_create (journal_t *journal) return journal_reset(journal); } -/* +/** + * void journal_update_superblock() - Update journal sb on disk. + * @journal: The journal to update. + * @wait: Set to '0' if you don't want to wait for IO completion. + * * Update a journal's dynamic superblock fields and write it to disk, * optionally waiting for the IO to complete. -*/ - + */ void journal_update_superblock(journal_t *journal, int wait) { journal_superblock_t *sb = journal->j_superblock; @@ -1040,12 +1061,14 @@ static int load_superblock(journal_t *journal) } -/* +/** + * int journal_load() - Read journal from disk. + * @journal: Journal to act on. + * * Given a journal_t structure which tells us which disk blocks contain * a journal, read the journal from disk to initialise the in-memory * structures. */ - int journal_load(journal_t *journal) { int err; @@ -1090,11 +1113,13 @@ recovery_error: return -EIO; } -/* +/** + * void journal_destroy() - Release a journal_t structure. + * @journal: Journal to act on. +* * Release a journal_t structure once it is no longer in use by the * journaled object. */ - void journal_destroy (journal_t *journal) { /* Wait for the commit thread to wake up and die. */ @@ -1131,8 +1156,12 @@ void journal_destroy (journal_t *journal) } -/* Published API: Check whether the journal uses all of a given set of - * features. Return true (non-zero) if it does. */ +/** + *int journal_check_used_features () - Check if features specified are used. + * + * Check whether the journal uses all of a given set of + * features. Return true (non-zero) if it does. + **/ int journal_check_used_features (journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) @@ -1154,7 +1183,10 @@ int journal_check_used_features (journal_t *journal, unsigned long compat, return 0; } -/* Published API: Check whether the journaling code supports the use of +/** + * int journal_check_available_features() - Check feature set in journalling layer + * + * Check whether the journaling code supports the use of * all of a given set of features on this journal. Return true * (non-zero) if it can. */ @@ -1183,8 +1215,13 @@ int journal_check_available_features (journal_t *journal, unsigned long compat, return 0; } -/* Published API: Mark a given journal feature as present on the - * superblock. Returns true if the requested features could be set. */ +/** + * int journal_set_features () - Mark a given journal feature in the superblock + * + * Mark a given journal feature as present on the + * superblock. Returns true if the requested features could be set. + * + */ int journal_set_features (journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) @@ -1210,12 +1247,12 @@ int journal_set_features (journal_t *journal, unsigned long compat, } -/* - * Published API: +/** + * int journal_update_format () - Update on-disk journal structure. + * * Given an initialised but unloaded journal struct, poke about in the * on-disk structure to update it to the most recent supported version. */ - int journal_update_format (journal_t *journal) { journal_superblock_t *sb; @@ -1265,7 +1302,10 @@ static int journal_convert_superblock_v1(journal_t *journal, } -/* +/** + * int journal_flush () - Flush journal + * @journal: Journal to act on. + * * Flush all data for a given journal to disk and empty the journal. * Filesystems can use this when remounting readonly to ensure that * recovery does not need to happen on remount. @@ -1319,12 +1359,16 @@ int journal_flush (journal_t *journal) return err; } -/* +/** + * int journal_wipe() - Wipe journal contents + * @journal: Journal to act on. + * @write: flag (see below) + * * Wipe out all of the contents of a journal, safely. This will produce * a warning if the journal contains any valid recovery information. * Must be called between journal_init_*() and journal_load(). * - * If (write) is non-zero, then we wipe out the journal on disk; otherwise + * If 'write' is non-zero, then we wipe out the journal on disk; otherwise * we merely suppress recovery. */ @@ -1373,43 +1417,11 @@ const char * journal_dev_name(journal_t *journal) } /* - * journal_abort: perform a complete, immediate shutdown of the ENTIRE - * journal (not of a single transaction). This operation cannot be - * undone without closing and reopening the journal. - * - * The journal_abort function is intended to support higher level error - * recovery mechanisms such as the ext2/ext3 remount-readonly error - * mode. - * - * Journal abort has very specific semantics. Any existing dirty, - * unjournaled buffers in the main filesystem will still be written to - * disk by bdflush, but the journaling mechanism will be suspended - * immediately and no further transaction commits will be honoured. - * - * Any dirty, journaled buffers will be written back to disk without - * hitting the journal. Atomicity cannot be guaranteed on an aborted - * filesystem, but we _do_ attempt to leave as much data as possible - * behind for fsck to use for cleanup. - * - * Any attempt to get a new transaction handle on a journal which is in - * ABORT state will just result in an -EROFS error return. A - * journal_stop on an existing handle will return -EIO if we have - * entered abort state during the update. + * Journal abort has very specific semantics, which we describe + * for journal abort. * - * Recursive transactions are not disturbed by journal abort until the - * final journal_stop, which will receive the -EIO error. - * - * Finally, the journal_abort call allows the caller to supply an errno - * which will be recored (if possible) in the journal superblock. This - * allows a client to record failure conditions in the middle of a - * transaction without having to complete the transaction to record the - * failure to disk. ext3_error, for example, now uses this - * functionality. - * - * Errors which originate from within the journaling layer will NOT - * supply an errno; a null errno implies that absolutely no further - * writes are done to the journal (unless there are any already in - * progress). + * Two internal function, which provide abort to te jbd layer + * itself are here. */ /* Quick version for internal journal use (doesn't lock the journal). @@ -1447,7 +1459,52 @@ void __journal_abort_soft (journal_t *journal, int errno) journal_update_superblock(journal, 1); } -/* Full version for external use */ +/** + * void journal_abort () - Shutdown the journal immediately. + * @journal: the journal to shutdown. + * @errno: an error number to record in the journal indicating + * the reason for the shutdown. + * + * Perform a complete, immediate shutdown of the ENTIRE + * journal (not of a single transaction). This operation cannot be + * undone without closing and reopening the journal. + * + * The journal_abort function is intended to support higher level error + * recovery mechanisms such as the ext2/ext3 remount-readonly error + * mode. + * + * Journal abort has very specific semantics. Any existing dirty, + * unjournaled buffers in the main filesystem will still be written to + * disk by bdflush, but the journaling mechanism will be suspended + * immediately and no further transaction commits will be honoured. + * + * Any dirty, journaled buffers will be written back to disk without + * hitting the journal. Atomicity cannot be guaranteed on an aborted + * filesystem, but we _do_ attempt to leave as much data as possible + * behind for fsck to use for cleanup. + * + * Any attempt to get a new transaction handle on a journal which is in + * ABORT state will just result in an -EROFS error return. A + * journal_stop on an existing handle will return -EIO if we have + * entered abort state during the update. + * + * Recursive transactions are not disturbed by journal abort until the + * final journal_stop, which will receive the -EIO error. + * + * Finally, the journal_abort call allows the caller to supply an errno + * which will be recorded (if possible) in the journal superblock. This + * allows a client to record failure conditions in the middle of a + * transaction without having to complete the transaction to record the + * failure to disk. ext3_error, for example, now uses this + * functionality. + * + * Errors which originate from within the journaling layer will NOT + * supply an errno; a null errno implies that absolutely no further + * writes are done to the journal (unless there are any already in + * progress). + * + */ + void journal_abort (journal_t *journal, int errno) { lock_journal(journal); @@ -1455,6 +1512,17 @@ void journal_abort (journal_t *journal, int errno) unlock_journal(journal); } +/** + * int journal_errno () - returns the journal's error state. + * @journal: journal to examine. + * + * This is the errno numbet set with journal_abort(), the last + * time the journal was mounted - if the journal was stopped + * without calling abort this will be 0. + * + * If the journal has been aborted on this mount time -EROFS will + * be returned. + */ int journal_errno (journal_t *journal) { int err; @@ -1468,6 +1536,14 @@ int journal_errno (journal_t *journal) return err; } + + +/** + * int journal_clear_err () - clears the journal's error state + * + * An error must be cleared or Acked to take a FS out of readonly + * mode. + */ int journal_clear_err (journal_t *journal) { int err = 0; @@ -1481,6 +1557,13 @@ int journal_clear_err (journal_t *journal) return err; } + +/** + * void journal_ack_err() - Ack journal err. + * + * An error must be cleared or Acked to take a FS out of readonly + * mode. + */ void journal_ack_err (journal_t *journal) { lock_journal(journal); diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index e6a96d3c30ce..f82d7f3cc507 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -206,20 +206,22 @@ do { \ var -= ((journal)->j_last - (journal)->j_first); \ } while (0) -/* - * journal_recover - * +/** + * int journal_recover(journal_t *journal) - recovers a on-disk journal + * @journal: the journal to recover + * * The primary function for recovering the log contents when mounting a * journaled device. - * + */ +int journal_recover(journal_t *journal) +{ +/* * Recovery is done in three passes. In the first pass, we look for the * end of the log. In the second, we assemble the list of revoke * blocks. In the third and final pass, we replay any un-revoked blocks * in the log. */ -int journal_recover(journal_t *journal) -{ int err; journal_superblock_t * sb; @@ -263,20 +265,23 @@ int journal_recover(journal_t *journal) return err; } -/* - * journal_skip_recovery - * +/** + * int journal_skip_recovery() - Start journal and wipe exiting records + * @journal: journal to startup + * * Locate any valid recovery information from the journal and set up the * journal structures in memory to ignore it (presumably because the * caller has evidence that it is out of date). - * + * This function does'nt appear to be exorted.. + */ +int journal_skip_recovery(journal_t *journal) +{ +/* * We perform one pass over the journal to allow us to tell the user how * much recovery information is being erased, and to let us initialise * the journal transaction sequence numbers to the next unused ID. */ -int journal_skip_recovery(journal_t *journal) -{ int err; journal_superblock_t * sb; diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 597562cf47fe..14ca5228e9d6 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -222,19 +222,20 @@ static handle_t *new_handle(int nblocks) return handle; } -/* - * Obtain a new handle. +/** + * handle_t *journal_start() - Obtain a new handle. + * @journal: Journal to start transaction on. + * @nblocks: number of block buffer we might modify * * We make sure that the transaction can guarantee at least nblocks of * modified buffers in the log. We block until the log can guarantee * that much space. * - * This function is visible to journal users (like ext2fs), so is not + * This function is visible to journal users (like ext3fs), so is not * called with the journal already locked. * * Return a pointer to a newly allocated handle, or NULL on failure */ - handle_t *journal_start(journal_t *journal, int nblocks) { handle_t *handle = journal_current_handle(); @@ -324,7 +325,11 @@ fail_unlock: return ret; } -/* +/** + * handle_t *journal_try_start() - Don't block, but try and get a handle + * @journal: Journal to start transaction on. + * @nblocks: number of block buffer we might modify + * * Try to start a handle, but non-blockingly. If we weren't able * to, return an ERR_PTR value. */ @@ -368,16 +373,18 @@ handle_t *journal_try_start(journal_t *journal, int nblocks) return handle; } -/* - * journal_extend: extend buffer credits. - * +/** + * int journal_extend() - extend buffer credits. + * @handle: handle to 'extend' + * @nblocks: nr blocks to try to extend by. + * * Some transactions, such as large extends and truncates, can be done * atomically all at once or in several stages. The operation requests * a credit for a number of buffer modications in advance, but can * extend its credit if it needs more. * * journal_extend tries to give the running handle more buffer credits. - * It does not guarantee that allocation: this is a best-effort only. + * It does not guarantee that allocation - this is a best-effort only. * The calling process MUST be able to deal cleanly with a failure to * extend here. * @@ -386,7 +393,6 @@ handle_t *journal_try_start(journal_t *journal, int nblocks) * return code < 0 implies an error * return code > 0 implies normal transaction-full status. */ - int journal_extend (handle_t *handle, int nblocks) { transaction_t *transaction = handle->h_transaction; @@ -435,8 +441,12 @@ error_out: } -/* - * journal_restart: restart a handle for a multi-transaction filesystem +/** + * int journal_restart() - restart a handle . + * @handle: handle to restart + * @nblocks: nr credits requested + * + * Restart a handle for a multi-transaction filesystem * operation. * * If the journal_extend() call above fails to grant new buffer credits @@ -478,8 +488,9 @@ int journal_restart(handle_t *handle, int nblocks) } -/* - * Barrier operation: establish a transaction barrier. +/** + * void journal_lock_updates () - establish a transaction barrier. + * @journal: Journal to establish a barrier on. * * This locks out any further updates from being started, and blocks * until all existing updates have completed, returning only once the @@ -487,7 +498,6 @@ int journal_restart(handle_t *handle, int nblocks) * * The journal lock should not be held on entry. */ - void journal_lock_updates (journal_t *journal) { lock_journal(journal); @@ -515,12 +525,14 @@ void journal_lock_updates (journal_t *journal) down(&journal->j_barrier); } -/* +/** + * void journal_unlock_updates (journal_t* journal) - release barrier + * @journal: Journal to release the barrier on. + * * Release a transaction barrier obtained with journal_lock_updates(). * * Should be called without the journal lock held. */ - void journal_unlock_updates (journal_t *journal) { lock_journal(journal); @@ -566,9 +578,6 @@ static void jbd_unexpected_dirty_buffer(struct journal_head *jh) } /* - * journal_get_write_access: notify intent to modify a buffer for metadata - * (not data) update. - * * If the buffer is already part of the current transaction, then there * is nothing we need to do. If it is already part of a prior * transaction which we are still committing to disk, then we need to @@ -577,7 +586,6 @@ static void jbd_unexpected_dirty_buffer(struct journal_head *jh) * the handle's metadata buffer credits (unless the buffer is already * part of the transaction, that is). * - * Returns an error code or 0 on success. */ static int @@ -786,6 +794,17 @@ out_unlocked: return error; } +/** + * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. + * @handle: transaction to add buffer modifications to + * @bh: bh to be used for metadata writes + * + * Returns an error code or 0 on success. + * + * In full data journalling mode the buffer may be of type BJ_AsyncData, + * because we're write()ing a buffer which is also part of a shared mapping. + */ + int journal_get_write_access (handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; @@ -816,6 +835,13 @@ int journal_get_write_access (handle_t *handle, struct buffer_head *bh) * There is no lock ranking violation: it was a newly created, * unlocked buffer beforehand. */ +/** + * int journal_get_create_access () - notify intent to use newly created bh + * @handle: transaction to new buffer to + * @bh: new buffer. + * + * Call this if you create a new bh. + */ int journal_get_create_access (handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; @@ -875,13 +901,14 @@ out: -/* - * journal_get_undo_access: Notify intent to modify metadata with non- - * rewindable consequences - * +/** + * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences + * @handle: transaction + * @bh: buffer to undo + * * Sometimes there is a need to distinguish between metadata which has * been committed to disk and that which has not. The ext3fs code uses - * this for freeing and allocating space: we have to make sure that we + * this for freeing and allocating space, we have to make sure that we * do not reuse freed space until the deallocation has been committed, * since if we overwrote that space we would make the delete * un-rewindable in case of a crash. @@ -893,13 +920,12 @@ out: * as we know that the buffer has definitely been committed to disk. * * We never need to know which transaction the committed data is part - * of: buffers touched here are guaranteed to be dirtied later and so + * of, buffers touched here are guaranteed to be dirtied later and so * will be committed to a new transaction in due course, at which point * we can discard the old committed data pointer. * * Returns error number or 0 on success. */ - int journal_get_undo_access (handle_t *handle, struct buffer_head *bh) { journal_t *journal = handle->h_transaction->t_journal; @@ -942,21 +968,23 @@ out: return err; } -/* - * journal_dirty_data: mark a buffer as containing dirty data which - * needs to be flushed before we can commit the current transaction. - * +/** + * int journal_dirty_data() - mark a buffer as containing dirty data which needs to be flushed before we can commit the current transaction. + * @handle: transaction + * @bh: bufferhead to mark + * * The buffer is placed on the transaction's data list and is marked as * belonging to the transaction. * * Returns error number or 0 on success. - * + */ +int journal_dirty_data (handle_t *handle, struct buffer_head *bh) +{ +/* * journal_dirty_data() can be called via page_launder->ext3_writepage * by kswapd. So it cannot block. Happily, there's nothing here * which needs lock_journal if `async' is set. */ -int journal_dirty_data (handle_t *handle, struct buffer_head *bh) -{ journal_t *journal = handle->h_transaction->t_journal; int need_brelse = 0; struct journal_head *jh; @@ -1097,24 +1125,28 @@ no_journal: return 0; } -/* - * journal_dirty_metadata: mark a buffer as containing dirty metadata - * which needs to be journaled as part of the current transaction. +/** + * int journal_dirty_metadata() - mark a buffer as containing dirty metadata + * @handle: transaction to add buffer to. + * @bh: buffer to mark + * + * mark dirty metadata which needs to be journaled as part of the current transaction. * * The buffer is placed on the transaction's metadata list and is marked * as belonging to the transaction. * + * Returns error number or 0 on success. + */ +int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh) +{ +/* * Special care needs to be taken if the buffer already belongs to the * current committing transaction (in which case we should have frozen * data present for that commit). In that case, we don't relink the * buffer: that only gets done when the old transaction finally * completes its commit. * - * Returns error number or 0 on success. */ - -int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh) -{ transaction_t *transaction = handle->h_transaction; journal_t *journal = transaction->t_journal; struct journal_head *jh = bh2jh(bh); @@ -1199,9 +1231,12 @@ void journal_release_buffer (handle_t *handle, struct buffer_head *bh) } #endif -/* - * journal_forget: bforget() for potentially-journaled buffers. We can - * only do the bforget if there are no commits pending against the +/** + * void journal_forget() - bforget() for potentially-journaled buffers. + * @handle: transaction handle + * @bh: bh to 'forget' + * + * We can only do the bforget if there are no commits pending against the * buffer. If the buffer is dirty in the current running transaction we * can safely unlink it. * @@ -1213,7 +1248,6 @@ void journal_release_buffer (handle_t *handle, struct buffer_head *bh) * Allow this call even if the handle has aborted --- it may be part of * the caller's cleanup after an abort. */ - void journal_forget (handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; @@ -1352,8 +1386,14 @@ out: } #endif -/* - * Register a callback function for this handle. The function will be +/** + * void journal_callback_set() - Register a callback function for this handle. + * @handle: handle to attach the callback to. + * @func: function to callback. + * @jcb: structure with additional information required by func() , and + * some space for jbd internal information. + * + * The function will be * called when the transaction that this handle is part of has been * committed to disk with the original callback data struct and the * error status of the journal as parameters. There is no guarantee of @@ -1374,7 +1414,11 @@ void journal_callback_set(handle_t *handle, jcb->jcb_func = func; } -/* + +/** + * int journal_stop() - complete a transaction + * @handle: tranaction to complete. + * * All done for a particular handle. * * There is not much action needed here. We just return any remaining @@ -1387,7 +1431,6 @@ void journal_callback_set(handle_t *handle, * return -EIO if a journal_abort has been executed since the * transaction began. */ - int journal_stop(handle_t *handle) { transaction_t *transaction = handle->h_transaction; @@ -1473,8 +1516,10 @@ int journal_stop(handle_t *handle) return err; } -/* - * For synchronous operations: force any uncommitted trasnactions +/**int journal_force_commit() - force any uncommitted transactions + * @journal: journal to force + * + * For synchronous operations: force any uncommitted transactions * to disk. May seem kludgy, but it reuses all the handle batching * code in a very simple manner. */ @@ -1667,6 +1712,26 @@ out: return 0; } + +/** + * int journal_try_to_free_buffers() - try to free page buffers. + * @journal: journal for operation + * @page: to try and free + * @gfp_mask: 'IO' mode for try_to_free_buffers() + * + * + * For all the buffers on this page, + * if they are fully written out ordered data, move them onto BUF_CLEAN + * so try_to_free_buffers() can reap them. + * + * This function returns non-zero if we wish try_to_free_buffers() + * to be called. We do this if the page is releasable by try_to_free_buffers(). + * We also do it if the page has locked or dirty buffers and the caller wants + * us to perform sync or async writeout. + */ +int journal_try_to_free_buffers(journal_t *journal, + struct page *page, int unused_gfp_mask) +{ /* * journal_try_to_free_buffers(). Try to remove all this page's buffers * from the journal. @@ -1689,9 +1754,6 @@ out: * cannot happen because we never reallocate freed data as metadata * while the data is part of a transaction. Yes? */ -int journal_try_to_free_buffers(journal_t *journal, - struct page *page, int unused_gfp_mask) -{ struct buffer_head *head; struct buffer_head *bh; int ret = 0; @@ -1886,8 +1948,15 @@ zap_buffer: return may_free; } -/* - * Return non-zero if the page's buffers were successfully reaped +/** + * int journal_invalidatepage() + * @journal: journal to use for flush... + * @page: page to flush + * @offset: length of page to invalidate. + * + * Reap page buffers containing data after offset in page. + * + * Return non-zero if the page's buffers were successfully reaped. */ int journal_invalidatepage(journal_t *journal, struct page *page, diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c index d5b053e5b73a..6cf3d86a5d79 100644 --- a/fs/jffs/intrep.c +++ b/fs/jffs/intrep.c @@ -3347,10 +3347,10 @@ jffs_garbage_collect_thread(void *ptr) current->session = 1; current->pgrp = 1; init_completion(&c->gc_thread_comp); /* barrier */ - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); siginitsetinv (¤t->blocked, sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGCONT)); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); strcpy(current->comm, "jffs_gcd"); D1(printk (KERN_NOTICE "jffs_garbage_collect_thread(): Starting infinite loop.\n")); @@ -3378,9 +3378,9 @@ jffs_garbage_collect_thread(void *ptr) siginfo_t info; unsigned long signr = 0; - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); signr = dequeue_signal(¤t->blocked, &info); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); switch(signr) { case SIGSTOP: diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index b1654cff562b..a5c35fdb51c8 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -54,7 +54,7 @@ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,40) #define current_sig_lock current->sigmask_lock #else -#define current_sig_lock current->sig->siglock +#define current_sig_lock current->sighand->siglock #endif static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index e3d931ff7ca2..360139794557 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -2139,10 +2139,10 @@ int jfsIOWait(void *arg) unlock_kernel(); - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); sigfillset(¤t->blocked); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); complete(&jfsIOwait); diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 58df2f7c38cd..6af148d0387c 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -2780,10 +2780,10 @@ int jfs_lazycommit(void *arg) jfsCommitTask = current; - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); sigfillset(¤t->blocked); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); LAZY_LOCK_INIT(); TxAnchor.unlock_queue = TxAnchor.unlock_tail = 0; @@ -2815,7 +2815,7 @@ restart: txLazyCommit(tblk); /* - * We can be running indefinately if other processors + * We can be running indefinitely if other processors * are adding transactions to this list */ cond_resched(); @@ -2985,10 +2985,10 @@ int jfs_sync(void *arg) unlock_kernel(); - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); sigfillset(¤t->blocked); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); complete(&jfsIOwait); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 404ac2d3a95b..c4c4e0595163 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -139,7 +139,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) } /* Keep the old signal mask */ - spin_lock_irqsave(¤t->sig->siglock, flags); + spin_lock_irqsave(¤t->sighand->siglock, flags); oldset = current->blocked; /* If we're cleaning up locks because the process is exiting, @@ -149,7 +149,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) && (current->flags & PF_EXITING)) { sigfillset(¤t->blocked); /* Mask all signals */ recalc_sigpending(); - spin_unlock_irqrestore(¤t->sig->siglock, flags); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); call = nlmclnt_alloc_call(); if (!call) { @@ -158,7 +158,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) } call->a_flags = RPC_TASK_ASYNC; } else { - spin_unlock_irqrestore(¤t->sig->siglock, flags); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); memset(call, 0, sizeof(*call)); locks_init_lock(&call->a_args.lock.fl); locks_init_lock(&call->a_res.lock.fl); @@ -183,10 +183,10 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl) kfree(call); out_restore: - spin_lock_irqsave(¤t->sig->siglock, flags); + spin_lock_irqsave(¤t->sighand->siglock, flags); current->blocked = oldset; recalc_sigpending(); - spin_unlock_irqrestore(¤t->sig->siglock, flags); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); done: dprintk("lockd: clnt proc returns %d\n", status); @@ -588,11 +588,11 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl) int status; /* Block all signals while setting up call */ - spin_lock_irqsave(¤t->sig->siglock, flags); + spin_lock_irqsave(¤t->sighand->siglock, flags); oldset = current->blocked; sigfillset(¤t->blocked); recalc_sigpending(); - spin_unlock_irqrestore(¤t->sig->siglock, flags); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); req = nlmclnt_alloc_call(); if (!req) @@ -607,10 +607,10 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl) if (status < 0) kfree(req); - spin_lock_irqsave(¤t->sig->siglock, flags); + spin_lock_irqsave(¤t->sighand->siglock, flags); current->blocked = oldset; recalc_sigpending(); - spin_unlock_irqrestore(¤t->sig->siglock, flags); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); return status; } diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index f608fbc8354b..a0cafbdfbb0a 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -101,10 +101,10 @@ lockd(struct svc_rqst *rqstp) sprintf(current->comm, "lockd"); /* Process request with signals blocked. */ - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); siginitsetinv(¤t->blocked, sigmask(SIGKILL)); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); /* kick rpciod */ rpciod_up(); @@ -126,9 +126,9 @@ lockd(struct svc_rqst *rqstp) { long timeout = MAX_SCHEDULE_TIMEOUT; if (signalled()) { - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); flush_signals(current); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); if (nlmsvc_ops) { nlmsvc_invalidate_all(); grace_period_expire = set_grace_period(); @@ -297,9 +297,9 @@ lockd_down(void) "lockd_down: lockd failed to exit, clearing pid\n"); nlmsvc_pid = 0; } - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); out: up(&nlmsvc_sema); } diff --git a/fs/mpage.c b/fs/mpage.c index a44993cd7927..3460144c1894 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -116,6 +116,49 @@ mpage_alloc(struct block_device *bdev, return bio; } +/* + * support function for mpage_readpages. The fs supplied get_block might + * return an up to date buffer. This is used to map that buffer into + * the page, which allows readpage to avoid triggering a duplicate call + * to get_block. + * + * The idea is to avoid adding buffers to pages that don't already have + * them. So when the buffer is up to date and the page size == block size, + * this marks the page up to date instead of adding new buffers. + */ +static void +map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) +{ + struct inode *inode = page->mapping->host; + struct buffer_head *page_bh, *head; + int block = 0; + + if (!page_has_buffers(page)) { + /* + * don't make any buffers if there is only one buffer on + * the page and the page just needs to be set up to date + */ + if (inode->i_blkbits == PAGE_CACHE_SHIFT && + buffer_uptodate(bh)) { + SetPageUptodate(page); + return; + } + create_empty_buffers(page, 1 << inode->i_blkbits, 0); + } + head = page_buffers(page); + page_bh = head; + do { + if (block == page_block) { + page_bh->b_state = bh->b_state; + page_bh->b_bdev = bh->b_bdev; + page_bh->b_blocknr = bh->b_blocknr; + break; + } + page_bh = page_bh->b_this_page; + block++; + } while (page_bh != head); +} + /** * mpage_readpages - populate an address space with some pages, and * start reads against them. @@ -186,6 +229,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); last_block = (inode->i_size + blocksize - 1) >> blkbits; + bh.b_page = page; for (page_block = 0; page_block < blocks_per_page; page_block++, block_in_file++) { bh.b_state = 0; @@ -200,6 +244,17 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, first_hole = page_block; continue; } + + /* some filesystems will copy data into the page during + * the get_block call, in which case we don't want to + * read it again. map_buffer_to_page copies the data + * we just collected from get_block into the page's buffers + * so readpage doesn't have to repeat the get_block call + */ + if (buffer_uptodate(&bh)) { + map_buffer_to_page(page, &bh, page_block); + goto confused; + } if (first_hole != blocks_per_page) goto confused; /* hole -> non-hole */ @@ -256,7 +311,10 @@ out: confused: if (bio) bio = mpage_bio_submit(READ, bio); - block_read_full_page(page, get_block); + if (!PageUptodate(page)) + block_read_full_page(page, get_block); + else + unlock_page(page); goto out; } @@ -344,6 +402,7 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, sector_t boundary_block = 0; struct block_device *boundary_bdev = NULL; int length; + struct buffer_head map_bh; if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); @@ -401,8 +460,8 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, BUG_ON(!PageUptodate(page)); block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); last_block = (inode->i_size - 1) >> blkbits; + map_bh.b_page = page; for (page_block = 0; page_block < blocks_per_page; ) { - struct buffer_head map_bh; map_bh.b_state = 0; if (get_block(inode, block_in_file, &map_bh, 1)) @@ -559,7 +618,6 @@ mpage_writepages(struct address_space *mapping, int (*writepage)(struct page *page, struct writeback_control *wbc); if (wbc->nonblocking && bdi_write_congested(bdi)) { - blk_run_queues(); wbc->encountered_congestion = 1; return 0; } @@ -614,7 +672,6 @@ mpage_writepages(struct address_space *mapping, if (ret || (--(wbc->nr_to_write) <= 0)) done = 1; if (wbc->nonblocking && bdi_write_congested(bdi)) { - blk_run_queues(); wbc->encountered_congestion = 1; done = 1; } diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c index 93ba7610dde0..f01c538eb282 100644 --- a/fs/ncpfs/sock.c +++ b/fs/ncpfs/sock.c @@ -745,7 +745,7 @@ static int ncp_do_request(struct ncp_server *server, int size, sigset_t old_set; unsigned long mask, flags; - spin_lock_irqsave(¤t->sig->siglock, flags); + spin_lock_irqsave(¤t->sighand->siglock, flags); old_set = current->blocked; if (current->flags & PF_EXITING) mask = 0; @@ -764,7 +764,7 @@ static int ncp_do_request(struct ncp_server *server, int size, } siginitsetinv(¤t->blocked, mask); recalc_sigpending(); - spin_unlock_irqrestore(¤t->sig->siglock, flags); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); fs = get_fs(); set_fs(get_ds()); @@ -773,10 +773,10 @@ static int ncp_do_request(struct ncp_server *server, int size, set_fs(fs); - spin_lock_irqsave(¤t->sig->siglock, flags); + spin_lock_irqsave(¤t->sighand->siglock, flags); current->blocked = old_set; recalc_sigpending(); - spin_unlock_irqrestore(¤t->sig->siglock, flags); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); } DDPRINTK("do_ncp_rpc_call returned %d\n", result); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7cfd12436c79..1f1ab0213a87 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -743,8 +743,8 @@ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, goto out; /* - * Now we do a seperate LOOKUP for each component of the mount path. - * The LOOKUPs are done seperately so that we can conveniently + * Now we do a separate LOOKUP for each component of the mount path. + * The LOOKUPs are done separately so that we can conveniently * catch an ERR_WRONGSEC if it occurs along the way... */ p = server->mnt_path; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 94f48ae35e95..3919e77036e3 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -189,10 +189,10 @@ nfsd(struct svc_rqst *rqstp) */ for (;;) { /* Block all but the shutdown signals */ - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); siginitsetinv(¤t->blocked, SHUTDOWN_SIGS); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); /* * Find a socket with data available and call its @@ -210,10 +210,10 @@ nfsd(struct svc_rqst *rqstp) exp_readlock(); /* Process request with signals blocked. */ - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); siginitsetinv(¤t->blocked, ALLOWED_SIGS); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); svc_process(serv, rqstp); diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 9a33c941cd5c..e0d448b3a6c9 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c @@ -510,7 +510,7 @@ static BOOL ldm_validate_vmdb (struct block_device *bdev, unsigned long base, /* Are there uncommitted transactions? */ if (BE16(data + 0x10) != 0x01) { - ldm_crit ("Database is not in a consistant state. Aborting."); + ldm_crit ("Database is not in a consistent state. Aborting."); goto out; } diff --git a/fs/proc/array.c b/fs/proc/array.c index e135ac5a1080..df1501a0f332 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -190,16 +190,16 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign, sigemptyset(catch); read_lock(&tasklist_lock); - if (p->sig) { - spin_lock_irq(&p->sig->siglock); - k = p->sig->action; + if (p->sighand) { + spin_lock_irq(&p->sighand->siglock); + k = p->sighand->action; for (i = 1; i <= _NSIG; ++i, ++k) { if (k->sa.sa_handler == SIG_IGN) sigaddset(ign, i); else if (k->sa.sa_handler != SIG_DFL) sigaddset(catch, i); } - spin_unlock_irq(&p->sig->siglock); + spin_unlock_irq(&p->sighand->siglock); } read_unlock(&tasklist_lock); } diff --git a/fs/quota_v2.c b/fs/quota_v2.c index 64811521d0ce..c051de09c559 100644 --- a/fs/quota_v2.c +++ b/fs/quota_v2.c @@ -306,6 +306,7 @@ static uint find_free_dqentry(struct dquot *dquot, int *err) blk = get_free_dqblk(filp, info); if ((int)blk < 0) { *err = blk; + freedqbuf(buf); return 0; } memset(buf, 0, V2_DQBLKSIZE); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 817c5c465d19..a9e5003b3589 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -11,6 +11,8 @@ #include <asm/uaccess.h> #include <asm/unaligned.h> #include <linux/buffer_head.h> +#include <linux/mpage.h> +#include <linux/writeback.h> /* args for the create parameter of reiserfs_get_block */ #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */ @@ -262,7 +264,10 @@ research: blocknr = get_block_num(ind_item, path.pos_in_item) ; ret = 0 ; if (blocknr) { - map_bh(bh_result, inode->i_sb, blocknr); + map_bh(bh_result, inode->i_sb, blocknr); + if (path.pos_in_item == ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) { + set_buffer_boundary(bh_result); + } } else // We do not return -ENOENT if there is a hole but page is uptodate, because it means // That there is some MMAPED data associated with it that is yet to be written to disk. @@ -286,7 +291,7 @@ research: return -ENOENT; } - /* if we've got a direct item, and the buffer was uptodate, + /* if we've got a direct item, and the buffer or page was uptodate, ** we don't want to pull data off disk again. skip to the ** end, where we map the buffer and return */ @@ -367,7 +372,9 @@ research: finished: pathrelse (&path); - /* I _really_ doubt that you want it. Chris? */ + /* this buffer has valid data, but isn't valid for io. mapping it to + * block #0 tells the rest of reiserfs it just has a tail in it + */ map_bh(bh_result, inode->i_sb, 0); set_buffer_uptodate (bh_result); return 0; @@ -842,6 +849,12 @@ int reiserfs_get_block (struct inode * inode, sector_t block, return retval; } +static int +reiserfs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); +} // // BAD: new directories have stat data of new type and all other items @@ -1809,13 +1822,19 @@ static int map_block_for_writepage(struct inode *inode, int use_get_block = 0 ; int bytes_copied = 0 ; int copy_size ; + int trans_running = 0; + + /* catch places below that try to log something without starting a trans */ + th.t_trans_id = 0; + + if (!buffer_uptodate(bh_result)) { + buffer_error(); + return -EIO; + } kmap(bh_result->b_page) ; start_over: reiserfs_write_lock(inode->i_sb); - journal_begin(&th, inode->i_sb, jbegin_count) ; - reiserfs_update_inode_transaction(inode) ; - make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ; research: @@ -1841,7 +1860,6 @@ research: goto out ; } set_block_dev_mapped(bh_result, get_block_num(item,pos_in_item),inode); - set_buffer_uptodate(bh_result); } else if (is_direct_le_ih(ih)) { char *p ; p = page_address(bh_result->b_page) ; @@ -1850,7 +1868,20 @@ research: fs_gen = get_generation(inode->i_sb) ; copy_item_head(&tmp_ih, ih) ; + + if (!trans_running) { + /* vs-3050 is gone, no need to drop the path */ + journal_begin(&th, inode->i_sb, jbegin_count) ; + reiserfs_update_inode_transaction(inode) ; + trans_running = 1; + if (fs_changed(fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) { + reiserfs_restore_prepared_buffer(inode->i_sb, bh) ; + goto research; + } + } + reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ; + if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { reiserfs_restore_prepared_buffer(inode->i_sb, bh) ; goto research; @@ -1861,7 +1892,6 @@ research: journal_mark_dirty(&th, inode->i_sb, bh) ; bytes_copied += copy_size ; set_block_dev_mapped(bh_result, 0, inode); - set_buffer_uptodate(bh_result); /* are there still bytes left? */ if (bytes_copied < bh_result->b_size && @@ -1878,7 +1908,10 @@ research: out: pathrelse(&path) ; - journal_end(&th, inode->i_sb, jbegin_count) ; + if (trans_running) { + journal_end(&th, inode->i_sb, jbegin_count) ; + trans_running = 0; + } reiserfs_write_unlock(inode->i_sb); /* this is where we fill in holes in the file. */ @@ -1894,49 +1927,77 @@ out: } } kunmap(bh_result->b_page) ; + + if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) { + /* we've copied data from the page into the direct item, so the + * buffer in the page is now clean, mark it to reflect that. + */ + lock_buffer(bh_result); + clear_buffer_dirty(bh_result); + unlock_buffer(bh_result); + } return retval ; } -/* helper func to get a buffer head ready for writepage to send to -** ll_rw_block -*/ -static inline void submit_bh_for_writepage(struct buffer_head **bhp, int nr) { - struct buffer_head *bh ; - int i; - for(i = 0 ; i < nr ; i++) { - bh = bhp[i] ; - lock_buffer(bh) ; - mark_buffer_async_write(bh) ; - /* submit_bh doesn't care if the buffer is dirty, but nobody - ** later on in the call chain will be cleaning it. So, we - ** clean the buffer here, it still gets written either way. - */ - clear_buffer_dirty(bh) ; - set_buffer_uptodate(bh) ; - submit_bh(WRITE, bh) ; +/* + * does the right thing for deciding when to lock a buffer and + * mark it for io during a writepage. make sure the buffer is + * dirty before sending it here though. + */ +static void lock_buffer_for_writepage(struct page *page, + struct writeback_control *wbc, + struct buffer_head *bh) +{ + if (wbc->sync_mode != WB_SYNC_NONE) { + lock_buffer(bh); + } else { + if (test_set_buffer_locked(bh)) { + __set_page_dirty_nobuffers(page); + return; + } + } + if (test_clear_buffer_dirty(bh)) { + if (!buffer_uptodate(bh)) + buffer_error(); + mark_buffer_async_write(bh); + } else { + unlock_buffer(bh); } } +/* + * mason@suse.com: updated in 2.5.54 to follow the same general io + * start/recovery path as __block_write_full_page, along with special + * code to handle reiserfs tails. + */ static int reiserfs_write_full_page(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host ; unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ; - unsigned last_offset = PAGE_CACHE_SIZE; int error = 0; unsigned long block ; - unsigned cur_offset = 0 ; - struct buffer_head *head, *bh ; + struct buffer_head *head, *bh; int partial = 0 ; - struct buffer_head *arr[PAGE_CACHE_SIZE/512] ; - int nr = 0 ; + int nr = 0; - if (!page_has_buffers(page)) - block_prepare_write(page, 0, 0, NULL) ; + /* The page dirty bit is cleared before writepage is called, which + * means we have to tell create_empty_buffers to make dirty buffers + * The page really should be up to date at this point, so tossing + * in the BH_Uptodate is just a sanity check. + */ + if (!page_has_buffers(page)) { + if (!PageUptodate(page)) + buffer_error(); + create_empty_buffers(page, inode->i_sb->s_blocksize, + (1 << BH_Dirty) | (1 << BH_Uptodate)); + } + head = page_buffers(page) ; /* last page in the file, zero out any contents past the ** last byte in the file */ if (page->index >= end_index) { char *kaddr; + unsigned last_offset; last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ; /* no file contents in this page */ @@ -1949,66 +2010,107 @@ static int reiserfs_write_full_page(struct page *page, struct writeback_control flush_dcache_page(page) ; kunmap_atomic(kaddr, KM_USER0) ; } - head = page_buffers(page) ; bh = head ; block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits) ; do { - /* if this offset in the page is outside the file */ - if (cur_offset >= last_offset) { - if (!buffer_uptodate(bh)) - partial = 1 ; - } else { - /* fast path, buffer mapped to an unformatted node */ + get_bh(bh); + if (buffer_dirty(bh)) { if (buffer_mapped(bh) && bh->b_blocknr != 0) { - arr[nr++] = bh ; + /* buffer mapped to an unformatted node */ + lock_buffer_for_writepage(page, wbc, bh); } else { - /* buffer not mapped yet, or points to a direct item. - ** search and dirty or log - */ + /* not mapped yet, or it points to a direct item, search + * the btree for the mapping info, and log any direct + * items found + */ if ((error = map_block_for_writepage(inode, bh, block))) { goto fail ; } - /* map_block_for_writepage either found an unformatted node - ** and mapped it for us, or it found a direct item - ** and logged the changes. - */ - if (buffer_mapped(bh) && bh->b_blocknr != 0) { - arr[nr++] = bh ; - } + if (buffer_mapped(bh) && bh->b_blocknr != 0) { + lock_buffer_for_writepage(page, wbc, bh); + } } } - bh = bh->b_this_page ; - cur_offset += bh->b_size ; - block++ ; + bh = bh->b_this_page; + block++; } while(bh != head) ; - if (!partial) - SetPageUptodate(page) ; BUG_ON(PageWriteback(page)); SetPageWriteback(page); unlock_page(page); - /* if this page only had a direct item, it is very possible for - ** nr == 0 without there being any kind of error. - */ - if (nr) { - submit_bh_for_writepage(arr, nr) ; - } else { - end_page_writeback(page) ; + /* + * since any buffer might be the only dirty buffer on the page, + * the first submit_bh can bring the page out of writeback. + * be careful with the buffers. + */ + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + submit_bh(WRITE, bh); + nr++; + } + put_bh(bh); + bh = next; + } while(bh != head); + + error = 0; +done: + if (nr == 0) { + /* + * if this page only had a direct item, it is very possible for + * no io to be required without there being an error. Or, + * someone else could have locked them and sent them down the + * pipe without locking the page + */ + do { + if (!buffer_uptodate(bh)) { + partial = 1; + break; + } + } while(bh != head); + if (!partial) + SetPageUptodate(page); + end_page_writeback(page); } - - return 0 ; + return error; fail: - if (nr) { - SetPageWriteback(page); - unlock_page(page); - submit_bh_for_writepage(arr, nr) ; - } else { - unlock_page(page) ; - } - ClearPageUptodate(page) ; - return error ; + /* catches various errors, we need to make sure any valid dirty blocks + * get to the media. The page is currently locked and not marked for + * writeback + */ + ClearPageUptodate(page); + bh = head; + do { + get_bh(bh); + if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) { + lock_buffer(bh); + mark_buffer_async_write(bh); + } else { + /* + * clear any dirty bits that might have come from getting + * attached to a dirty page + */ + clear_buffer_dirty(bh); + } + bh = bh->b_this_page; + } while(bh != head); + SetPageError(page); + BUG_ON(PageWriteback(page)); + SetPageWriteback(page); + unlock_page(page); + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + clear_buffer_dirty(bh); + submit_bh(WRITE, bh); + nr++; + } + put_bh(bh); + bh = next; + } while(bh != head); + goto done; } @@ -2115,6 +2217,7 @@ static int reiserfs_releasepage(struct page *page, int unused_gfp_flags) struct address_space_operations reiserfs_address_space_operations = { .writepage = reiserfs_writepage, .readpage = reiserfs_readpage, + .readpages = reiserfs_readpages, .releasepage = reiserfs_releasepage, .sync_page = block_sync_page, .prepare_write = reiserfs_prepare_write, diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c index 5f7d1d5969fe..41d5bbd8a334 100644 --- a/fs/smbfs/smbiod.c +++ b/fs/smbfs/smbiod.c @@ -285,10 +285,10 @@ static int smbiod(void *unused) MOD_INC_USE_COUNT; daemonize(); - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); siginitsetinv(¤t->blocked, sigmask(SIGKILL)); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); strcpy(current->comm, "smbiod"); diff --git a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c index 9398993ec4d4..5505483ca88c 100644 --- a/fs/xfs/linux/xfs_aops.c +++ b/fs/xfs/linux/xfs_aops.c @@ -50,7 +50,7 @@ map_blocks( if (((flags & (PBF_DIRECT|PBF_SYNC)) == PBF_DIRECT) && (offset >= inode->i_size)) - count = max(count, XFS_WRITE_IO_LOG); + count = max_t(ssize_t, count, XFS_WRITE_IO_LOG); retry: VOP_BMAP(vp, offset, count, flags, pbmapp, &nmaps, error); if (flags & PBF_WRITE) { diff --git a/fs/xfs/pagebuf/page_buf.c b/fs/xfs/pagebuf/page_buf.c index 4c60a8799fcb..d6b027eb2022 100644 --- a/fs/xfs/pagebuf/page_buf.c +++ b/fs/xfs/pagebuf/page_buf.c @@ -1581,10 +1581,10 @@ pagebuf_daemon( daemonize(); /* Avoid signals */ - spin_lock_irq(¤t->sig->siglock); + spin_lock_irq(¤t->sighand->siglock); sigfillset(¤t->blocked); recalc_sigpending(); - spin_unlock_irq(¤t->sig->siglock); + spin_unlock_irq(¤t->sighand->siglock); strcpy(current->comm, "pagebufd"); current->flags |= PF_MEMALLOC; diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 27d7013b4d80..8a20f1cfc415 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -5522,7 +5522,7 @@ xfs_getbmap( int prealloced; /* this is a file with * preallocated data space */ int sh_unwritten; /* true, if unwritten */ - /* extents listed seperately */ + /* extents listed separately */ int bmapi_flags; /* flags for xfs_bmapi */ __int32_t oflags; /* getbmapx bmv_oflags field */ |
