37 files changed, 889 insertions, 500 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index b3eca7db8051..99960d002026 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -127,10 +127,10 @@ static int kafscmd(void *arg)
 	complete(&kafscmd_alive);
 
 	/* only certain signals are of interest */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked,0);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* loop around looking for things to attend to */
 	do {
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index a875684e3d4b..7de072e495c0 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -46,9 +46,9 @@ static inline void afs_discard_my_signals(void)
 	while (signal_pending(current)) {
 		siginfo_t sinfo;
 
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		dequeue_signal(&current->blocked,&sinfo);
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 	}
 }
 
diff --git a/fs/afs/kafsasyncd.c b/fs/afs/kafsasyncd.c
index caedebc20095..e546a6da5015 100644
--- a/fs/afs/kafsasyncd.c
+++ b/fs/afs/kafsasyncd.c
@@ -101,10 +101,10 @@ static int kafsasyncd(void *arg)
 	complete(&kafsasyncd_alive);
 
 	/* only certain signals are of interest */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked,0);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* loop around looking for things to attend to */
 	do {
diff --git a/fs/afs/kafstimod.c b/fs/afs/kafstimod.c
index 0d3f30a73657..2b0f5a9d84e9 100644
--- a/fs/afs/kafstimod.c
+++ b/fs/afs/kafstimod.c
@@ -78,10 +78,10 @@ static int kafstimod(void *arg)
 	complete(&kafstimod_alive);
 
 	/* only certain signals are of interest */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked,0);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* loop around looking for things to attend to */
  loop:
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index c212015631b9..6c82dc144b33 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -70,10 +70,10 @@ static int autofs_write(struct file *file, const void *addr, int bytes)
 	/* Keep the currently executing process from receiving a
 	   SIGPIPE unless it was already supposed to get one */
 	if (wr == -EPIPE && !sigpipe) {
-		spin_lock_irqsave(&current->sig->siglock, flags);
+		spin_lock_irqsave(&current->sighand->siglock, flags);
 		sigdelset(&current->pending.signal, SIGPIPE);
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 	}
 
 	return (bytes > 0);
@@ -161,18 +161,18 @@ int autofs_wait(struct autofs_sb_info *sbi, struct qstr *name)
 		sigset_t oldset;
 		unsigned long irqflags;
 
-		spin_lock_irqsave(&current->sig->siglock, irqflags);
+		spin_lock_irqsave(&current->sighand->siglock, irqflags);
 		oldset = current->blocked;
 		siginitsetinv(&current->blocked, SHUTDOWN_SIGS & ~oldset.sig[0]);
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, irqflags);
+		spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
 
 		interruptible_sleep_on(&wq->queue);
 
-		spin_lock_irqsave(&current->sig->siglock, irqflags);
+		spin_lock_irqsave(&current->sighand->siglock, irqflags);
 		current->blocked = oldset;
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, irqflags);
+		spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
 	} else {
 		DPRINTK(("autofs_wait: skipped sleeping\n"));
 	}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 7af5f71e16b9..c1b7279cae81 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -74,10 +74,10 @@ static int autofs4_write(struct file *file, const void *addr, int bytes)
 	/* Keep the currently executing process from receiving a
 	   SIGPIPE unless it was already supposed to get one */
 	if (wr == -EPIPE && !sigpipe) {
-		spin_lock_irqsave(&current->sig->siglock, flags);
+		spin_lock_irqsave(&current->sighand->siglock, flags);
 		sigdelset(&current->pending.signal, SIGPIPE);
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 	}
 
 	return (bytes > 0);
@@ -198,18 +198,18 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct qstr *name,
 		sigset_t oldset;
 		unsigned long irqflags;
 
-		spin_lock_irqsave(&current->sig->siglock, irqflags);
+		spin_lock_irqsave(&current->sighand->siglock, irqflags);
 		oldset = current->blocked;
 		siginitsetinv(&current->blocked, SHUTDOWN_SIGS & ~oldset.sig[0]);
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, irqflags);
+		spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
 
 		interruptible_sleep_on(&wq->queue);
 
-		spin_lock_irqsave(&current->sig->siglock, irqflags);
+		spin_lock_irqsave(&current->sighand->siglock, irqflags);
 		current->blocked = oldset;
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, irqflags);
+		spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
 	} else {
 		DPRINTK(("autofs_wait: skipped sleeping\n"));
 	}
diff --git a/fs/befs/ChangeLog b/fs/befs/ChangeLog
index 8e09a0bd8ebb..6774a4e815b2 100644
--- a/fs/befs/ChangeLog
+++ b/fs/befs/ChangeLog
@@ -60,7 +60,7 @@ Version 0.63 (2002-01-31)
 
 * Documentation improvements in source. [WD]
 
-* Makefile fix for independant module when CONFIG_MODVERSION is set in 
+* Makefile fix for independent module when CONFIG_MODVERSION is set in 
 	kernel config [Pavel Roskin <proski@gnu.org>]
 
 * Compile warning fix for namei.c. [Sergey S. Kostyliov <rathamahata@php4.ru>]
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 0d214c4a54fd..a95b66f560c7 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1058,7 +1058,7 @@ static inline void fill_note(struct memelfnote *note, const char *name, int type
 
 /*
  * fill up all the fields in prstatus from the given task struct, except registers
- * which need to be filled up seperately.
+ * which need to be filled up separately.
  */
 static inline void fill_prstatus(struct elf_prstatus *prstatus, struct task_struct *p, long signr) 
 {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 7f088639326e..7975056a6995 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -403,7 +403,7 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
 	set_personality(PER_LINUX);
 
 	/*
-	 * there are a couple of cases here,  the seperate code/data
+	 * there are a couple of cases here,  the separate code/data
 	 * case,  and then the fully copied to RAM case which lumps
 	 * it all together.
 	 */
diff --git a/fs/buffer.c b/fs/buffer.c
index 3fc9e47c5a0a..bf6ae714c730 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -127,9 +127,10 @@ void __wait_on_buffer(struct buffer_head * bh)
 	get_bh(bh);
 	do {
 		prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
-		blk_run_queues();
-		if (buffer_locked(bh))
+		if (buffer_locked(bh)) {
+			blk_run_queues();
 			io_schedule();
+		}
 	} while (buffer_locked(bh));
 	put_bh(bh);
 	finish_wait(wqh, &wait);
@@ -959,8 +960,6 @@ no_grow:
 	 * the reserve list is empty, we're sure there are 
 	 * async buffer heads in use.
 	 */
-	blk_run_queues();
-
 	free_more_memory();
 	goto try_again;
 }
diff --git a/fs/char_dev.c b/fs/char_dev.c
index ff34b5e336cd..ec9489c3a387 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/block_dev.c
+ *  linux/fs/char_dev.c
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
@@ -38,16 +38,13 @@ static kmem_cache_t * cdev_cachep;
 	 ((struct char_device *) kmem_cache_alloc(cdev_cachep, SLAB_KERNEL))
 #define destroy_cdev(cdev) kmem_cache_free(cdev_cachep, (cdev))
 
-static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 {
-	struct char_device * cdev = (struct char_device *) foo;
+	struct char_device *cdev = (struct char_device *) foo;
 
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR)
-	{
 		memset(cdev, 0, sizeof(*cdev));
-		sema_init(&cdev->sem, 1);
-	}
 }
 
 void __init cdev_cache_init(void)
diff --git a/fs/exec.c b/fs/exec.c
index 8be3fa7c0ff2..a63d5c43da1f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -300,6 +300,8 @@ void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long a
 
 	pgd = pgd_offset(tsk->mm, address);
 	pte_chain = pte_chain_alloc(GFP_KERNEL);
+	if (!pte_chain)
+		goto out_sig;
 	spin_lock(&tsk->mm->page_table_lock);
 	pmd = pmd_alloc(tsk->mm, pgd, address);
 	if (!pmd)
@@ -325,6 +327,7 @@ void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long a
 	return;
 out:
 	spin_unlock(&tsk->mm->page_table_lock);
+out_sig:
 	__free_page(page);
 	force_sig(SIGKILL, tsk);
 	pte_chain_free(pte_chain);
@@ -556,35 +559,65 @@ static inline void put_proc_dentry(struct dentry *dentry)
  * disturbing other processes.  (Other processes might share the signal
  * table via the CLONE_SIGHAND option to clone().)
  */
-static inline int de_thread(struct signal_struct *oldsig)
+static inline int de_thread(struct task_struct *tsk)
 {
-	struct signal_struct *newsig;
+	struct signal_struct *newsig, *oldsig = tsk->signal;
+	struct sighand_struct *newsighand, *oldsighand = tsk->sighand;
+	spinlock_t *lock = &oldsighand->siglock;
 	int count;
 
-	if (atomic_read(&current->sig->count) <= 1)
+	/*
+	 * If we don't share sighandlers, then we aren't sharing anything
+	 * and we can just re-use it all.
+	 */
+	if (atomic_read(&oldsighand->count) <= 1)
 		return 0;
 
-	newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
-	if (!newsig)
+	newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
+	if (!newsighand)
 		return -ENOMEM;
 
+	spin_lock_init(&newsighand->siglock);
+	atomic_set(&newsighand->count, 1);
+	memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action));
+
+	/*
+	 * See if we need to allocate a new signal structure
+	 */
+	newsig = NULL;
+	if (atomic_read(&oldsig->count) > 1) {
+		newsig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
+		if (!newsig) {
+			kmem_cache_free(sighand_cachep, newsighand);
+			return -ENOMEM;
+		}
+		atomic_set(&newsig->count, 1);
+		newsig->group_exit = 0;
+		newsig->group_exit_code = 0;
+		newsig->group_exit_task = NULL;
+		newsig->group_stop_count = 0;
+		init_sigpending(&newsig->shared_pending);
+	}
+
 	if (thread_group_empty(current))
-		goto out;
+		goto no_thread_group;
 	/*
 	 * Kill all other threads in the thread group:
 	 */
-	spin_lock_irq(&oldsig->siglock);
+	spin_lock_irq(lock);
 	if (oldsig->group_exit) {
 		/*
 		 * Another group action in progress, just
 		 * return so that the signal is processed.
 		 */
-		spin_unlock_irq(&oldsig->siglock);
-		kmem_cache_free(sigact_cachep, newsig);
+		spin_unlock_irq(lock);
+		kmem_cache_free(sighand_cachep, newsighand);
+		if (newsig)
+			kmem_cache_free(signal_cachep, newsig);
 		return -EAGAIN;
 	}
 	oldsig->group_exit = 1;
-	__broadcast_thread_group(current, SIGKILL);
+	zap_other_threads(current);
 
 	/*
 	 * Account for the thread group leader hanging around:
@@ -595,13 +628,13 @@ static inline int de_thread(struct signal_struct *oldsig)
 	while (atomic_read(&oldsig->count) > count) {
 		oldsig->group_exit_task = current;
 		current->state = TASK_UNINTERRUPTIBLE;
-		spin_unlock_irq(&oldsig->siglock);
+		spin_unlock_irq(lock);
 		schedule();
-		spin_lock_irq(&oldsig->siglock);
+		spin_lock_irq(lock);
 		if (oldsig->group_exit_task)
 			BUG();
 	}
-	spin_unlock_irq(&oldsig->siglock);
+	spin_unlock_irq(lock);
 
 	/*
 	 * At this point all other threads have exited, all we have to
@@ -656,7 +689,8 @@ static inline int de_thread(struct signal_struct *oldsig)
 			current->ptrace = ptrace;
 			__ptrace_link(current, parent);
 		}
-		
+
+		list_del(&current->tasks);
 		list_add_tail(&current->tasks, &init_task.tasks);
 		current->exit_signal = SIGCHLD;
 		state = leader->state;
@@ -671,31 +705,29 @@ static inline int de_thread(struct signal_struct *oldsig)
 		release_task(leader);
         }
 
-out:
-	spin_lock_init(&newsig->siglock);
-	atomic_set(&newsig->count, 1);
-	newsig->group_exit = 0;
-	newsig->group_exit_code = 0;
-	newsig->group_exit_task = NULL;
-	memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
-	init_sigpending(&newsig->shared_pending);
+no_thread_group:
 
 	write_lock_irq(&tasklist_lock);
-	spin_lock(&oldsig->siglock);
-	spin_lock(&newsig->siglock);
+	spin_lock(&oldsighand->siglock);
+	spin_lock(&newsighand->siglock);
 
 	if (current == oldsig->curr_target)
 		oldsig->curr_target = next_thread(current);
-	current->sig = newsig;
+	if (newsig)
+		current->signal = newsig;
+	current->sighand = newsighand;
 	init_sigpending(&current->pending);
 	recalc_sigpending();
 
-	spin_unlock(&newsig->siglock);
-	spin_unlock(&oldsig->siglock);
+	spin_unlock(&newsighand->siglock);
+	spin_unlock(&oldsighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 
-	if (atomic_dec_and_test(&oldsig->count))
-		kmem_cache_free(sigact_cachep, oldsig);
+	if (newsig && atomic_dec_and_test(&oldsig->count))
+		kmem_cache_free(signal_cachep, oldsig);
+
+	if (atomic_dec_and_test(&oldsighand->count))
+		kmem_cache_free(sighand_cachep, oldsighand);
 
 	if (!thread_group_empty(current))
 		BUG();
@@ -741,21 +773,20 @@ int flush_old_exec(struct linux_binprm * bprm)
 {
 	char * name;
 	int i, ch, retval;
-	struct signal_struct * oldsig = current->sig;
 
 	/* 
 	 * Release all of the old mmap stuff
 	 */
 	retval = exec_mmap(bprm->mm);
 	if (retval)
-		goto mmap_failed;
+		goto out;
 	/*
 	 * Make sure we have a private signal table and that
 	 * we are unassociated from the previous thread group.
 	 */
-	retval = de_thread(oldsig);
+	retval = de_thread(current);
 	if (retval)
-		goto flush_failed;
+		goto out;
 
 	/* This is the point of no return */
 
@@ -789,14 +820,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 
 	return 0;
 
-mmap_failed:
-flush_failed:
-	spin_lock_irq(&current->sig->siglock);
-	if (current->sig != oldsig) {
-		kmem_cache_free(sigact_cachep, current->sig);
-		current->sig = oldsig;
-	}
-	spin_unlock_irq(&current->sig->siglock);
+out:
 	return retval;
 }
 
@@ -880,7 +904,7 @@ void compute_creds(struct linux_binprm *bprm)
 		if (must_not_trace_exec(current)
 		    || atomic_read(&current->fs->count) > 1
 		    || atomic_read(&current->files->count) > 1
-		    || atomic_read(&current->sig->count) > 1) {
+		    || atomic_read(&current->sighand->count) > 1) {
 			if(!capable(CAP_SETUID)) {
 				bprm->e_uid = current->uid;
 				bprm->e_gid = current->gid;
@@ -1297,8 +1321,8 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 	}
 	mm->dumpable = 0;
 	init_completion(&mm->core_done);
-	current->sig->group_exit = 1;
-	current->sig->group_exit_code = exit_code;
+	current->signal->group_exit = 1;
+	current->signal->group_exit_code = exit_code;
 	coredump_wait(mm);
 
 	if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
@@ -1325,7 +1349,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
 
 	retval = binfmt->core_dump(signr, regs, file);
 
-	current->sig->group_exit_code |= 0x80;
+	current->signal->group_exit_code |= 0x80;
 close_fail:
 	filp_close(file, NULL);
 fail_unlock:
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ccdb52c9cc77..24897acf33da 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -99,6 +99,34 @@ int ext3_forget(handle_t *handle, int is_metadata,
 	return err;
 }
 
+/*
+ * Work out how many blocks we need to progress with the next chunk of a
+ * truncate transaction.
+ */
+
+static unsigned long blocks_for_truncate(struct inode *inode) 
+{
+	unsigned long needed;
+	
+	needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
+
+	/* Give ourselves just enough room to cope with inodes in which
+	 * i_blocks is corrupt: we've seen disk corruptions in the past
+	 * which resulted in random data in an inode which looked enough
+	 * like a regular file for ext3 to try to delete it.  Things
+	 * will go a bit crazy if that happens, but at least we should
+	 * try not to panic the whole kernel. */
+	if (needed < 2)
+		needed = 2;
+
+	/* But we need to bound the transaction so we don't overflow the
+	 * journal. */
+	if (needed > EXT3_MAX_TRANS_DATA) 
+		needed = EXT3_MAX_TRANS_DATA;
+
+	return EXT3_DATA_TRANS_BLOCKS + needed;
+}
+	
 /* 
  * Truncate transactions can be complex and absolutely huge.  So we need to
  * be able to restart the transaction at a conventient checkpoint to make
@@ -112,14 +140,9 @@ int ext3_forget(handle_t *handle, int is_metadata,
 
 static handle_t *start_transaction(struct inode *inode) 
 {
-	long needed;
 	handle_t *result;
 	
-	needed = inode->i_blocks;
-	if (needed > EXT3_MAX_TRANS_DATA) 
-		needed = EXT3_MAX_TRANS_DATA;
-	
-	result = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS + needed);
+	result = ext3_journal_start(inode, blocks_for_truncate(inode));
 	if (!IS_ERR(result))
 		return result;
 	
@@ -135,14 +158,9 @@ static handle_t *start_transaction(struct inode *inode)
  */
 static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
 {
-	long needed;
-	
 	if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS)
 		return 0;
-	needed = inode->i_blocks;
-	if (needed > EXT3_MAX_TRANS_DATA) 
-		needed = EXT3_MAX_TRANS_DATA;
-	if (!ext3_journal_extend(handle, EXT3_RESERVE_TRANS_BLOCKS + needed))
+	if (!ext3_journal_extend(handle, blocks_for_truncate(inode)))
 		return 0;
 	return 1;
 }
@@ -154,11 +172,8 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
  */
 static int ext3_journal_test_restart(handle_t *handle, struct inode *inode)
 {
-	long needed = inode->i_blocks;
-	if (needed > EXT3_MAX_TRANS_DATA) 
-		needed = EXT3_MAX_TRANS_DATA;
 	jbd_debug(2, "restarting handle %p\n", handle);
-	return ext3_journal_restart(handle, EXT3_DATA_TRANS_BLOCKS + needed);
+	return ext3_journal_restart(handle, blocks_for_truncate(inode));
 }
 
 /*
diff --git a/fs/freevxfs/vxfs_fshead.c b/fs/freevxfs/vxfs_fshead.c
index f0d4e5a7f128..bf8f5e3f90da 100644
--- a/fs/freevxfs/vxfs_fshead.c
+++ b/fs/freevxfs/vxfs_fshead.c
@@ -111,13 +111,15 @@ vxfs_read_fshead(struct super_block *sbp)
 	struct vxfs_fsh			*pfp, *sfp;
 	struct vxfs_inode_info		*vip, *tip;
 
-	if (!(vip = vxfs_blkiget(sbp, infp->vsi_iext, infp->vsi_fshino))) {
+	vip = vxfs_blkiget(sbp, infp->vsi_iext, infp->vsi_fshino);
+	if (!vip) {
 		printk(KERN_ERR "vxfs: unabled to read fsh inode\n");
 		return -EINVAL;
-	} else if (!VXFS_ISFSH(vip)) {
+	}
+	if (!VXFS_ISFSH(vip)) {
 		printk(KERN_ERR "vxfs: fsh list inode is of wrong type (%x)\n",
 				vip->vii_mode & VXFS_TYPE_MASK); 
-		return -EINVAL;
+		goto out_free_fship;
 	}
 
 
@@ -126,23 +128,26 @@ vxfs_read_fshead(struct super_block *sbp)
 	vxfs_dumpi(vip, infp->vsi_fshino);
 #endif
 
-	if (!(infp->vsi_fship = vxfs_get_fake_inode(sbp, vip))) {
+	infp->vsi_fship = vxfs_get_fake_inode(sbp, vip);
+	if (!infp->vsi_fship) {
 		printk(KERN_ERR "vxfs: unabled to get fsh inode\n");
-		return -EINVAL;
+		goto out_free_fship;
 	}
 
-	if (!(sfp = vxfs_getfsh(infp->vsi_fship, 0))) {
+	sfp = vxfs_getfsh(infp->vsi_fship, 0);
+	if (!sfp) {
 		printk(KERN_ERR "vxfs: unabled to get structural fsh\n");
-		return -EINVAL;
+		goto out_iput_fship;
 	} 
 
 #ifdef DIAGNOSTIC
 	vxfs_dumpfsh(sfp);
 #endif
 
-	if (!(pfp = vxfs_getfsh(infp->vsi_fship, 1))) {
+	pfp = vxfs_getfsh(infp->vsi_fship, 1);
+	if (!pfp) {
 		printk(KERN_ERR "vxfs: unabled to get primary fsh\n");
-		return -EINVAL;
+		goto out_free_sfp;
 	}
 
 #ifdef DIAGNOSTIC
@@ -150,24 +155,50 @@ vxfs_read_fshead(struct super_block *sbp)
 #endif
 
 	tip = vxfs_blkiget(sbp, infp->vsi_iext, sfp->fsh_ilistino[0]);
-	if (!tip || ((infp->vsi_stilist = vxfs_get_fake_inode(sbp, tip)) == NULL)) {
+	if (!tip)
+		goto out_free_pfp;
+
+	infp->vsi_stilist = vxfs_get_fake_inode(sbp, tip);
+	if (!infp->vsi_stilist) {
 		printk(KERN_ERR "vxfs: unabled to get structual list inode\n");
-		return -EINVAL;
-	} else if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) {
+		kfree(tip);
+		goto out_free_pfp;
+	}
+	if (!VXFS_ISILT(VXFS_INO(infp->vsi_stilist))) {
 		printk(KERN_ERR "vxfs: structual list inode is of wrong type (%x)\n",
 				VXFS_INO(infp->vsi_stilist)->vii_mode & VXFS_TYPE_MASK); 
-		return -EINVAL;
+		goto out_iput_stilist;
 	}
 
 	tip = vxfs_stiget(sbp, pfp->fsh_ilistino[0]);
-	if (!tip || ((infp->vsi_ilist = vxfs_get_fake_inode(sbp, tip)) == NULL)) {
+	if (!tip)
+		goto out_iput_stilist;
+	infp->vsi_ilist = vxfs_get_fake_inode(sbp, tip);
+	if (!infp->vsi_ilist) {
 		printk(KERN_ERR "vxfs: unabled to get inode list inode\n");
-		return -EINVAL;
-	} else if (!VXFS_ISILT(VXFS_INO(infp->vsi_ilist))) {
+		kfree(tip);
+		goto out_iput_stilist;
+	}
+	if (!VXFS_ISILT(VXFS_INO(infp->vsi_ilist))) {
 		printk(KERN_ERR "vxfs: inode list inode is of wrong type (%x)\n",
 				VXFS_INO(infp->vsi_ilist)->vii_mode & VXFS_TYPE_MASK);
-		return -EINVAL;
+		goto out_iput_ilist;
 	}
 
 	return 0;
+
+ out_iput_ilist:
+ 	iput(infp->vsi_ilist);
+ out_iput_stilist:
+ 	iput(infp->vsi_stilist);
+ out_free_pfp:
+	kfree(pfp);
+ out_free_sfp:
+ 	kfree(sfp);
+ out_iput_fship:
+	iput(infp->vsi_fship);
+	return -EINVAL;
+ out_free_fship:
+ 	kfree(vip);
+	return -EINVAL;
 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d3db0faa9abe..ad8ef0487ad2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -61,6 +61,12 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 			sb->s_op->dirty_inode(inode);
 	}
 
+	/*
+	 * make sure that changes are seen by all cpus before we test i_state
+	 * -- mikulas
+	 */
+	smp_mb();
+
 	/* avoid the locking if we can */
 	if ((inode->i_state & flags) == flags)
 		return;
@@ -137,6 +143,12 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
 	inode->i_state |= I_LOCK;
 	inode->i_state &= ~I_DIRTY;
 
+	/*
+	 * smp_rmb(); note: if you remove write_lock below, you must add this.
+	 * mark_inode_dirty doesn't take spinlock, make sure that inode is not
+	 * read speculatively by this cpu before &= ~I_DIRTY  -- mikulas
+	 */
+
 	write_lock(&mapping->page_lock);
 	if (wait || !wbc->for_kupdate || list_empty(&mapping->io_pages))
 		list_splice_init(&mapping->dirty_pages, &mapping->io_pages);
@@ -334,7 +346,6 @@ writeback_inodes(struct writeback_control *wbc)
 	}
 	spin_unlock(&sb_lock);
 	spin_unlock(&inode_lock);
-	blk_run_queues();
 }
 
 /*
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index bb8bf302da95..5ce105bd3d1e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -34,6 +34,7 @@ static struct super_operations hugetlbfs_ops;
 static struct address_space_operations hugetlbfs_aops;
 struct file_operations hugetlbfs_file_operations;
 static struct inode_operations hugetlbfs_dir_inode_operations;
+static struct inode_operations hugetlbfs_inode_operations;
 
 static struct backing_dev_info hugetlbfs_backing_dev_info = {
 	.ra_pages	= 0,	/* No readahead */
@@ -44,7 +45,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct inode *inode =file->f_dentry->d_inode;
 	struct address_space *mapping = inode->i_mapping;
-	size_t len;
 	int ret;
 
 	if (!capable(CAP_IPC_LOCK))
@@ -65,15 +65,52 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
 	vma->vm_ops = &hugetlb_vm_ops;
 	ret = hugetlb_prefault(mapping, vma);
-	len = (vma->vm_end - vma->vm_start) + (vma->vm_pgoff << PAGE_SHIFT);
-	if (inode->i_size < len)
-		inode->i_size = len;
-
 	up(&inode->i_sem);
 	return ret;
 }
 
 /*
+ * Called under down_write(mmap_sem), page_table_lock is not held
+ */
+
+#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags);
+#else
+static unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (len > TASK_SIZE)
+		return -ENOMEM;
+
+	if (addr) {
+		addr = ALIGN(addr, HPAGE_SIZE);
+		vma = find_vma(mm, addr);
+		if (TASK_SIZE - len >= addr &&
+		    (!vma || addr + len <= vma->vm_start))
+			return addr;
+	}
+
+	addr = ALIGN(mm->free_area_cache, HPAGE_SIZE);
+
+	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
+		/* At this point:  (!vma || addr < vma->vm_end). */
+		if (TASK_SIZE - len < addr)
+			return -ENOMEM;
+		if (!vma || addr + len <= vma->vm_start)
+			return addr;
+		addr = ALIGN(vma->vm_end, HPAGE_SIZE);
+	}
+}
+#endif
+
+/*
  * Read a page. Again trivial. If it didn't already exist
  * in the page cache, it is zero-filled.
  */
@@ -83,12 +120,14 @@ static int hugetlbfs_readpage(struct file *file, struct page * page)
 	return -EINVAL;
 }
 
-static int hugetlbfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+static int hugetlbfs_prepare_write(struct file *file,
+			struct page *page, unsigned offset, unsigned to)
 {
 	return -EINVAL;
 }
 
-static int hugetlbfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
+static int hugetlbfs_commit_write(struct file *file,
+			struct page *page, unsigned offset, unsigned to)
 {
 	return -EINVAL;
 }
@@ -103,28 +142,8 @@ void huge_pagevec_release(struct pagevec *pvec)
 	pagevec_reinit(pvec);
 }
 
-void truncate_partial_hugepage(struct page *page, unsigned partial)
+void truncate_huge_page(struct page *page)
 {
-	int i;
-	const unsigned piece = partial & (PAGE_SIZE - 1);
-	const unsigned tailstart = PAGE_SIZE - piece;
-	const unsigned whole_pages = partial / PAGE_SIZE;
-	const unsigned last_page_offset = HPAGE_SIZE/PAGE_SIZE - whole_pages;
-
-	for (i = HPAGE_SIZE/PAGE_SIZE - 1; i >= last_page_offset; ++i)
-		memclear_highpage_flush(&page[i], 0, PAGE_SIZE);
-
-	if (!piece)
-		return;
-
-	memclear_highpage_flush(&page[last_page_offset - 1], tailstart, piece);
-}
-
-void truncate_huge_page(struct address_space *mapping, struct page *page)
-{
-	if (page->mapping != mapping)
-		return;
-
 	clear_page_dirty(page);
 	ClearPageUptodate(page);
 	remove_from_page_cache(page);
@@ -133,52 +152,13 @@ void truncate_huge_page(struct address_space *mapping, struct page *page)
 
 void truncate_hugepages(struct address_space *mapping, loff_t lstart)
 {
-	const pgoff_t start = (lstart + HPAGE_SIZE - 1) >> HPAGE_SHIFT;
-	const unsigned partial = lstart & (HPAGE_SIZE - 1);
+	const pgoff_t start = lstart >> HPAGE_SHIFT;
 	struct pagevec pvec;
 	pgoff_t next;
 	int i;
 
 	pagevec_init(&pvec, 0);
 	next = start;
-
-	while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
-		for (i = 0; i < pagevec_count(&pvec); ++i) {
-			struct page *page = pvec.pages[i];
-			pgoff_t page_index = page->index;
-
-			if (page_index > next)
-				next = page_index;
-
-			++next;
-
-			if (TestSetPageLocked(page))
-				continue;
-
-			if (PageWriteback(page)) {
-				unlock_page(page);
-				continue;
-			}
-
-			truncate_huge_page(mapping, page);
-			unlock_page(page);
-		}
-		huge_pagevec_release(&pvec);
-		cond_resched();
-	}
-
-	if (partial) {
-		struct page *page = find_lock_page(mapping, start - 1);
-		if (page) {
-			wait_on_page_writeback(page);
-			truncate_partial_hugepage(page, partial);
-			unlock_page(page);
-			huge_page_release(page);
-		}
-	}
-
-	next = start;
-
 	while (1) {
 		if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 			if (next == start)
@@ -191,11 +171,10 @@ void truncate_hugepages(struct address_space *mapping, loff_t lstart)
 			struct page *page = pvec.pages[i];
 
 			lock_page(page);
-			wait_on_page_writeback(page);
 			if (page->index > next)
 				next = page->index;
 			++next;
-			truncate_huge_page(mapping, page);
+			truncate_huge_page(page);
 			unlock_page(page);
 		}
 		huge_pagevec_release(&pvec);
@@ -259,70 +238,73 @@ static void hugetlbfs_drop_inode(struct inode *inode)
 		hugetlbfs_forget_inode(inode);
 }
 
-static void hugetlb_vmtruncate_list(struct list_head *list, unsigned long pgoff)
+/*
+ * h_pgoff is in HPAGE_SIZE units.
+ * vma->vm_pgoff is in PAGE_SIZE units.
+ */
+static void
+hugetlb_vmtruncate_list(struct list_head *list, unsigned long h_pgoff)
 {
-	unsigned long start, end, length, delta;
 	struct vm_area_struct *vma;
 
 	list_for_each_entry(vma, list, shared) {
-		start = vma->vm_start;
-		end = vma->vm_end;
-		length = end - start;
-
-		if (vma->vm_pgoff >= pgoff) {
-			zap_hugepage_range(vma, start, length);
+		unsigned long h_vm_pgoff;
+		unsigned long v_length;
+		unsigned long h_length;
+		unsigned long v_offset;
+
+		h_vm_pgoff = vma->vm_pgoff << (HPAGE_SHIFT - PAGE_SHIFT);
+		v_length = vma->vm_end - vma->vm_start;
+		h_length = v_length >> HPAGE_SHIFT;
+		v_offset = (h_pgoff - h_vm_pgoff) << HPAGE_SHIFT;
+
+		/*
+		 * Is this VMA fully outside the truncation point?
+		 */
+		if (h_vm_pgoff >= h_pgoff) {
+			zap_hugepage_range(vma, vma->vm_start, v_length);
 			continue;
 		}
 
-		length >>= PAGE_SHIFT;
-		delta = pgoff = vma->vm_pgoff;
-		if (delta >= length)
+		/*
+		 * Is this VMA fully inside the truncaton point?
+		 */
+		if (h_vm_pgoff + (v_length >> HPAGE_SHIFT) <= h_pgoff)
 			continue;
 
-		start += delta << PAGE_SHIFT;
-		length = (length - delta) << PAGE_SHIFT;
-		zap_hugepage_range(vma, start, length);
+		/*
+		 * The VMA straddles the truncation point.  v_offset is the
+		 * offset (in bytes) into the VMA where the point lies.
+		 */
+		zap_hugepage_range(vma,
+				vma->vm_start + v_offset,
+				v_length - v_offset);
 	}
 }
 
+/*
+ * Expanding truncates are not allowed.
+ */
 static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 {
 	unsigned long pgoff;
 	struct address_space *mapping = inode->i_mapping;
-	unsigned long limit;
 
-	pgoff = (offset + HPAGE_SIZE - 1) >> HPAGE_SHIFT;
+	if (offset > inode->i_size)
+		return -EINVAL;
 
-	if (inode->i_size < offset)
-		goto do_expand;
+	BUG_ON(offset & ~HPAGE_MASK);
+	pgoff = offset >> HPAGE_SHIFT;
 
 	inode->i_size = offset;
 	down(&mapping->i_shared_sem);
-	if (list_empty(&mapping->i_mmap) && list_empty(&mapping->i_mmap_shared))
-		goto out_unlock;
 	if (!list_empty(&mapping->i_mmap))
 		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
 	if (!list_empty(&mapping->i_mmap_shared))
 		hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff);
-
-out_unlock:
 	up(&mapping->i_shared_sem);
 	truncate_hugepages(mapping, offset);
 	return 0;
-
-do_expand:
-	limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
-	if (limit != RLIM_INFINITY && offset > limit)
-		goto out_sig;
-	if (offset > inode->i_sb->s_maxbytes)
-		goto out;
-	inode->i_size = offset;
-	return 0;
-
-out_sig:
-	send_sig(SIGXFSZ, current, 0);
-out:
-	return -EFBIG;
 }
 
 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
@@ -341,15 +323,10 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
 	error = security_inode_setattr(dentry, attr);
 	if (error)
 		goto out;
-
-	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
-	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
-		error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
-	if (error)
-		goto out;
-
 	if (ia_valid & ATTR_SIZE) {
-		error = hugetlb_vmtruncate(inode, attr->ia_size);
+		error = -EINVAL;
+		if (!(attr->ia_size & ~HPAGE_MASK))
+			error = hugetlb_vmtruncate(inode, attr->ia_size);
 		if (error)
 			goto out;
 		attr->ia_valid &= ~ATTR_SIZE;
@@ -364,8 +341,8 @@ out:
 	return error;
 }
 
-static struct inode *
-hugetlbfs_get_inode(struct super_block *sb, int mode, dev_t dev)
+static struct inode *hugetlbfs_get_inode(struct super_block *sb,
+					int mode, dev_t dev)
 {
 	struct inode * inode = new_inode(sb);
 
@@ -377,13 +354,14 @@ hugetlbfs_get_inode(struct super_block *sb, int mode, dev_t dev)
 		inode->i_blocks = 0;
 		inode->i_rdev = NODEV;
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
-		inode->i_mapping->backing_dev_info = &hugetlbfs_backing_dev_info;
+		inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
 		default:
 			init_special_inode(inode, mode, dev);
 			break;
 		case S_IFREG:
+			inode->i_op = &hugetlbfs_inode_operations;
 			inode->i_fop = &hugetlbfs_file_operations;
 			break;
 		case S_IFDIR:
@@ -405,8 +383,8 @@ hugetlbfs_get_inode(struct super_block *sb, int mode, dev_t dev)
  * File creation. Allocate an inode, and we're done..
  */
 /* SMP-safe */
-static int
-hugetlbfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+static int hugetlbfs_mknod(struct inode *dir,
+			struct dentry *dentry, int mode, dev_t dev)
 {
 	struct inode * inode = hugetlbfs_get_inode(dir->i_sb, mode, dev);
 	int error = -ENOSPC;
@@ -419,7 +397,7 @@ hugetlbfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
 	return error;
 }
 
-static int hugetlbfs_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 {
 	int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
 	if (!retval)
@@ -432,7 +410,8 @@ static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode)
 	return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
 }
 
-static int hugetlbfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
+static int hugetlbfs_symlink(struct inode *dir,
+			struct dentry *dentry, const char *symname)
 {
 	struct inode *inode;
 	int error = -ENOSPC;
@@ -450,15 +429,25 @@ static int hugetlbfs_symlink(struct inode * dir, struct dentry *dentry, const ch
 	return error;
 }
 
+/*
+ * For direct-IO reads into hugetlb pages
+ */
+int hugetlbfs_set_page_dirty(struct page *page)
+{
+	return 0;
+}
+
 static struct address_space_operations hugetlbfs_aops = {
 	.readpage	= hugetlbfs_readpage,
 	.prepare_write	= hugetlbfs_prepare_write,
-	.commit_write	= hugetlbfs_commit_write
+	.commit_write	= hugetlbfs_commit_write,
+	.set_page_dirty	= hugetlbfs_set_page_dirty,
 };
 
 struct file_operations hugetlbfs_file_operations = {
-	.mmap		= hugetlbfs_file_mmap,
-	.fsync		= simple_sync_file,
+	.mmap			= hugetlbfs_file_mmap,
+	.fsync			= simple_sync_file,
+	.get_unmapped_area	= hugetlb_get_unmapped_area,
 };
 
 static struct inode_operations hugetlbfs_dir_inode_operations = {
@@ -474,12 +463,17 @@ static struct inode_operations hugetlbfs_dir_inode_operations = {
 	.setattr	= hugetlbfs_setattr,
 };
 
+static struct inode_operations hugetlbfs_inode_operations = {
+	.setattr	= hugetlbfs_setattr,
+};
+
 static struct super_operations hugetlbfs_ops = {
 	.statfs		= simple_statfs,
 	.drop_inode	= hugetlbfs_drop_inode,
 };
 
-static int hugetlbfs_fill_super(struct super_block * sb, void * data, int silent)
+static int
+hugetlbfs_fill_super(struct super_block * sb, void * data, int silent)
 {
 	struct inode * inode;
 	struct dentry * root;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index d2f5935ef972..a106e23956f7 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -205,10 +205,10 @@ int kjournald(void *arg)
 
 	lock_kernel();
 	daemonize();
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	sprintf(current->comm, "kjournald");
 
@@ -732,14 +732,21 @@ fail:
  * need to set up all of the mapping information to tell the journaling
  * system where the journal blocks are.
  *
- * journal_init_dev creates a journal which maps a fixed contiguous
- * range of blocks on an arbitrary block device.
- *
- * journal_init_inode creates a journal which maps an on-disk inode as
- * the journal.  The inode must exist already, must support bmap() and
- * must have all data blocks preallocated.
  */
 
+/**
+ *  journal_t * journal_init_dev() - creates an initialises a journal structure
+ *  @bdev: Block device on which to create the journal
+ *  @fs_dev: Device which hold journalled filesystem for this journal.
+ *  @start: Block nr Start of journal.
+ *  @len:  Lenght of the journal in blocks.
+ *  @blocksize: blocksize of journalling device
+ *  @returns: a newly created journal_t *
+ *  
+ *  journal_init_dev creates a journal which maps a fixed contiguous
+ *  range of blocks on an arbitrary block device.
+ * 
+ */
 journal_t * journal_init_dev(struct block_device *bdev,
 			struct block_device *fs_dev,
 			int start, int len, int blocksize)
@@ -763,7 +770,15 @@ journal_t * journal_init_dev(struct block_device *bdev,
 
 	return journal;
 }
-
+ 
+/** 
+ *  journal_t * journal_init_inode () - creates a journal which maps to a inode.
+ *  @inode: An inode to create the journal in
+ *  
+ * journal_init_inode creates a journal which maps an on-disk inode as
+ * the journal.  The inode must exist already, must support bmap() and
+ * must have all data blocks preallocated.
+ */
 journal_t * journal_init_inode (struct inode *inode)
 {
 	struct buffer_head *bh;
@@ -852,12 +867,15 @@ static int journal_reset (journal_t *journal)
 	return 0;
 }
 
-/*
+/** 
+ * int journal_create() - Initialise the new journal file
+ * @journal: Journal to create. This structure must have been initialised
+ * 
  * Given a journal_t structure which tells us which disk blocks we can
  * use, create a new journal superblock and initialise all of the
- * journal fields from scratch.  */
-
-int journal_create (journal_t *journal)
+ * journal fields from scratch.  
+ **/
+int journal_create(journal_t *journal)
 {
 	unsigned long blocknr;
 	struct buffer_head *bh;
@@ -920,11 +938,14 @@ int journal_create (journal_t *journal)
 	return journal_reset(journal);
 }
 
-/*
+/** 
+ * void journal_update_superblock() - Update journal sb on disk.
+ * @journal: The journal to update.
+ * @wait: Set to '0' if you don't want to wait for IO completion.
+ *
  * Update a journal's dynamic superblock fields and write it to disk,
  * optionally waiting for the IO to complete.
-*/
-
+ */
 void journal_update_superblock(journal_t *journal, int wait)
 {
 	journal_superblock_t *sb = journal->j_superblock;
@@ -1040,12 +1061,14 @@ static int load_superblock(journal_t *journal)
 }
 
 
-/*
+/**
+ * int journal_load() - Read journal from disk.
+ * @journal: Journal to act on.
+ * 
  * Given a journal_t structure which tells us which disk blocks contain
  * a journal, read the journal from disk to initialise the in-memory
  * structures.
  */
-
 int journal_load(journal_t *journal)
 {
 	int err;
@@ -1090,11 +1113,13 @@ recovery_error:
 	return -EIO;
 }
 
-/*
+/**
+ * void journal_destroy() - Release a journal_t structure.
+ * @journal: Journal to act on.
+* 
  * Release a journal_t structure once it is no longer in use by the
  * journaled object.
  */
-
 void journal_destroy (journal_t *journal)
 {
 	/* Wait for the commit thread to wake up and die. */
@@ -1131,8 +1156,12 @@ void journal_destroy (journal_t *journal)
 }
 
 
-/* Published API: Check whether the journal uses all of a given set of
- * features.  Return true (non-zero) if it does. */
+/**
+ *int journal_check_used_features () - Check if features specified are used.
+ * 
+ * Check whether the journal uses all of a given set of
+ * features.  Return true (non-zero) if it does. 
+ **/
 
 int journal_check_used_features (journal_t *journal, unsigned long compat,
 				 unsigned long ro, unsigned long incompat)
@@ -1154,7 +1183,10 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
 	return 0;
 }
 
-/* Published API: Check whether the journaling code supports the use of
+/**
+ * int journal_check_available_features() - Check feature set in journalling layer
+ * 
+ * Check whether the journaling code supports the use of
  * all of a given set of features on this journal.  Return true
  * (non-zero) if it can. */
 
@@ -1183,8 +1215,13 @@ int journal_check_available_features (journal_t *journal, unsigned long compat,
 	return 0;
 }
 
-/* Published API: Mark a given journal feature as present on the
- * superblock.  Returns true if the requested features could be set. */
+/**
+ * int journal_set_features () - Mark a given journal feature in the superblock
+ *
+ * Mark a given journal feature as present on the
+ * superblock.  Returns true if the requested features could be set. 
+ *
+ */
 
 int journal_set_features (journal_t *journal, unsigned long compat,
 			  unsigned long ro, unsigned long incompat)
@@ -1210,12 +1247,12 @@ int journal_set_features (journal_t *journal, unsigned long compat,
 }
 
 
-/*
- * Published API:
+/**
+ * int journal_update_format () - Update on-disk journal structure.
+ *
  * Given an initialised but unloaded journal struct, poke about in the
  * on-disk structure to update it to the most recent supported version.
  */
-
 int journal_update_format (journal_t *journal)
 {
 	journal_superblock_t *sb;
@@ -1265,7 +1302,10 @@ static int journal_convert_superblock_v1(journal_t *journal,
 }
 
 
-/*
+/**
+ * int journal_flush () - Flush journal
+ * @journal: Journal to act on.
+ * 
  * Flush all data for a given journal to disk and empty the journal.
  * Filesystems can use this when remounting readonly to ensure that
  * recovery does not need to happen on remount.
@@ -1319,12 +1359,16 @@ int journal_flush (journal_t *journal)
 	return err;
 }
 
-/*
+/**
+ * int journal_wipe() - Wipe journal contents
+ * @journal: Journal to act on.
+ * @write: flag (see below)
+ * 
  * Wipe out all of the contents of a journal, safely.  This will produce
  * a warning if the journal contains any valid recovery information.
  * Must be called between journal_init_*() and journal_load().
  *
- * If (write) is non-zero, then we wipe out the journal on disk; otherwise
+ * If 'write' is non-zero, then we wipe out the journal on disk; otherwise
  * we merely suppress recovery.
  */
 
@@ -1373,43 +1417,11 @@ const char * journal_dev_name(journal_t *journal)
 }
 
 /*
- * journal_abort: perform a complete, immediate shutdown of the ENTIRE
- * journal (not of a single transaction).  This operation cannot be
- * undone without closing and reopening the journal.
- *
- * The journal_abort function is intended to support higher level error
- * recovery mechanisms such as the ext2/ext3 remount-readonly error
- * mode.
- *
- * Journal abort has very specific semantics.  Any existing dirty,
- * unjournaled buffers in the main filesystem will still be written to
- * disk by bdflush, but the journaling mechanism will be suspended
- * immediately and no further transaction commits will be honoured.
- *
- * Any dirty, journaled buffers will be written back to disk without
- * hitting the journal.  Atomicity cannot be guaranteed on an aborted
- * filesystem, but we _do_ attempt to leave as much data as possible
- * behind for fsck to use for cleanup.
- *
- * Any attempt to get a new transaction handle on a journal which is in
- * ABORT state will just result in an -EROFS error return.  A
- * journal_stop on an existing handle will return -EIO if we have
- * entered abort state during the update.
+ * Journal abort has very specific semantics, which we describe
+ * for journal abort. 
  *
- * Recursive transactions are not disturbed by journal abort until the
- * final journal_stop, which will receive the -EIO error.
- *
- * Finally, the journal_abort call allows the caller to supply an errno
- * which will be recored (if possible) in the journal superblock.  This
- * allows a client to record failure conditions in the middle of a
- * transaction without having to complete the transaction to record the
- * failure to disk.  ext3_error, for example, now uses this
- * functionality.
- *
- * Errors which originate from within the journaling layer will NOT
- * supply an errno; a null errno implies that absolutely no further
- * writes are done to the journal (unless there are any already in
- * progress).
+ * Two internal function, which provide abort to te jbd layer
+ * itself are here.
  */
 
 /* Quick version for internal journal use (doesn't lock the journal).
@@ -1447,7 +1459,52 @@ void __journal_abort_soft (journal_t *journal, int errno)
 		journal_update_superblock(journal, 1);
 }
 
-/* Full version for external use */
+/**
+ * void journal_abort () - Shutdown the journal immediately.
+ * @journal: the journal to shutdown.
+ * @errno:   an error number to record in the journal indicating
+ *           the reason for the shutdown.
+ *
+ * Perform a complete, immediate shutdown of the ENTIRE
+ * journal (not of a single transaction).  This operation cannot be
+ * undone without closing and reopening the journal.
+ *           
+ * The journal_abort function is intended to support higher level error
+ * recovery mechanisms such as the ext2/ext3 remount-readonly error
+ * mode.
+ *
+ * Journal abort has very specific semantics.  Any existing dirty,
+ * unjournaled buffers in the main filesystem will still be written to
+ * disk by bdflush, but the journaling mechanism will be suspended
+ * immediately and no further transaction commits will be honoured.
+ *
+ * Any dirty, journaled buffers will be written back to disk without
+ * hitting the journal.  Atomicity cannot be guaranteed on an aborted
+ * filesystem, but we _do_ attempt to leave as much data as possible
+ * behind for fsck to use for cleanup.
+ *
+ * Any attempt to get a new transaction handle on a journal which is in
+ * ABORT state will just result in an -EROFS error return.  A
+ * journal_stop on an existing handle will return -EIO if we have
+ * entered abort state during the update.
+ *
+ * Recursive transactions are not disturbed by journal abort until the
+ * final journal_stop, which will receive the -EIO error.
+ *
+ * Finally, the journal_abort call allows the caller to supply an errno
+ * which will be recorded (if possible) in the journal superblock.  This
+ * allows a client to record failure conditions in the middle of a
+ * transaction without having to complete the transaction to record the
+ * failure to disk.  ext3_error, for example, now uses this
+ * functionality.
+ *
+ * Errors which originate from within the journaling layer will NOT
+ * supply an errno; a null errno implies that absolutely no further
+ * writes are done to the journal (unless there are any already in
+ * progress).
+ * 
+ */
+
 void journal_abort (journal_t *journal, int errno)
 {
 	lock_journal(journal);
@@ -1455,6 +1512,17 @@ void journal_abort (journal_t *journal, int errno)
 	unlock_journal(journal);
 }
 
+/** 
+ * int journal_errno () - returns the journal's error state.
+ * @journal: journal to examine.
+ *
+ * This is the errno numbet set with journal_abort(), the last
+ * time the journal was mounted - if the journal was stopped
+ * without calling abort this will be 0.
+ *
+ * If the journal has been aborted on this mount time -EROFS will
+ * be returned.
+ */
 int journal_errno (journal_t *journal)
 {
 	int err;
@@ -1468,6 +1536,14 @@ int journal_errno (journal_t *journal)
 	return err;
 }
 
+
+
+/** 
+ * int journal_clear_err () - clears the journal's error state
+ *
+ * An error must be cleared or Acked to take a FS out of readonly
+ * mode.
+ */
 int journal_clear_err (journal_t *journal)
 {
 	int err = 0;
@@ -1481,6 +1557,13 @@ int journal_clear_err (journal_t *journal)
 	return err;
 }
 
+
+/** 
+ * void journal_ack_err() - Ack journal err.
+ *
+ * An error must be cleared or Acked to take a FS out of readonly
+ * mode.
+ */
 void journal_ack_err (journal_t *journal)
 {
 	lock_journal(journal);
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index e6a96d3c30ce..f82d7f3cc507 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -206,20 +206,22 @@ do {									\
 		var -= ((journal)->j_last - (journal)->j_first);	\
 } while (0)
 
-/*
- * journal_recover
- *
+/**
+ * int journal_recover(journal_t *journal) - recovers a on-disk journal
+ * @journal: the journal to recover
+ * 
  * The primary function for recovering the log contents when mounting a
  * journaled device.  
- * 
+ */
+int journal_recover(journal_t *journal)
+{
+/*
  * Recovery is done in three passes.  In the first pass, we look for the
  * end of the log.  In the second, we assemble the list of revoke
  * blocks.  In the third and final pass, we replay any un-revoked blocks
  * in the log.  
  */
 
-int journal_recover(journal_t *journal)
-{
 	int			err;
 	journal_superblock_t *	sb;
 
@@ -263,20 +265,23 @@ int journal_recover(journal_t *journal)
 	return err;
 }
 
-/*
- * journal_skip_recovery
- *
+/**
+ * int journal_skip_recovery() - Start journal and wipe exiting records 
+ * @journal: journal to startup
+ * 
  * Locate any valid recovery information from the journal and set up the
  * journal structures in memory to ignore it (presumably because the
  * caller has evidence that it is out of date).  
- *
+ * This function does'nt appear to be exorted..
+ */
+int journal_skip_recovery(journal_t *journal)
+{
+/*
  * We perform one pass over the journal to allow us to tell the user how
  * much recovery information is being erased, and to let us initialise
  * the journal transaction sequence numbers to the next unused ID. 
  */
 
-int journal_skip_recovery(journal_t *journal)
-{
 	int			err;
 	journal_superblock_t *	sb;
 
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 597562cf47fe..14ca5228e9d6 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -222,19 +222,20 @@ static handle_t *new_handle(int nblocks)
 	return handle;
 }
 
-/*
- * Obtain a new handle.  
+/**
+ * handle_t *journal_start() - Obtain a new handle.  
+ * @journal: Journal to start transaction on.
+ * @nblocks: number of block buffer we might modify
  *
  * We make sure that the transaction can guarantee at least nblocks of
  * modified buffers in the log.  We block until the log can guarantee
  * that much space.  
  *
- * This function is visible to journal users (like ext2fs), so is not
+ * This function is visible to journal users (like ext3fs), so is not
  * called with the journal already locked.
  *
  * Return a pointer to a newly allocated handle, or NULL on failure
  */
-
 handle_t *journal_start(journal_t *journal, int nblocks)
 {
 	handle_t *handle = journal_current_handle();
@@ -324,7 +325,11 @@ fail_unlock:
 	return ret;
 }
 
-/*
+/**
+ * handle_t *journal_try_start() - Don't block, but try and get a handle
+ * @journal: Journal to start transaction on.
+ * @nblocks: number of block buffer we might modify
+ * 
  * Try to start a handle, but non-blockingly.  If we weren't able
  * to, return an ERR_PTR value.
  */
@@ -368,16 +373,18 @@ handle_t *journal_try_start(journal_t *journal, int nblocks)
 	return handle;
 }
 
-/*
- * journal_extend: extend buffer credits.
- *
+/**
+ * int journal_extend() - extend buffer credits.
+ * @handle:  handle to 'extend'
+ * @nblocks: nr blocks to try to extend by.
+ * 
  * Some transactions, such as large extends and truncates, can be done
  * atomically all at once or in several stages.  The operation requests
  * a credit for a number of buffer modications in advance, but can
  * extend its credit if it needs more.  
  *
  * journal_extend tries to give the running handle more buffer credits.
- * It does not guarantee that allocation: this is a best-effort only.
+ * It does not guarantee that allocation - this is a best-effort only.
  * The calling process MUST be able to deal cleanly with a failure to
  * extend here.
  *
@@ -386,7 +393,6 @@ handle_t *journal_try_start(journal_t *journal, int nblocks)
  * return code < 0 implies an error
  * return code > 0 implies normal transaction-full status.
  */
-
 int journal_extend (handle_t *handle, int nblocks)
 {
 	transaction_t *transaction = handle->h_transaction;
@@ -435,8 +441,12 @@ error_out:
 }
 
 
-/*
- * journal_restart: restart a handle for a multi-transaction filesystem
+/**
+ * int journal_restart() - restart a handle .
+ * @handle:  handle to restart
+ * @nblocks: nr credits requested
+ * 
+ * Restart a handle for a multi-transaction filesystem
  * operation.
  *
  * If the journal_extend() call above fails to grant new buffer credits
@@ -478,8 +488,9 @@ int journal_restart(handle_t *handle, int nblocks)
 }
 
 
-/* 
- * Barrier operation: establish a transaction barrier. 
+/**
+ * void journal_lock_updates () - establish a transaction barrier.
+ * @journal:  Journal to establish a barrier on.
  *
  * This locks out any further updates from being started, and blocks
  * until all existing updates have completed, returning only once the
@@ -487,7 +498,6 @@ int journal_restart(handle_t *handle, int nblocks)
  *
  * The journal lock should not be held on entry.
  */
-
 void journal_lock_updates (journal_t *journal)
 {
 	lock_journal(journal);
@@ -515,12 +525,14 @@ void journal_lock_updates (journal_t *journal)
 	down(&journal->j_barrier);
 }
 
-/*
+/**
+ * void journal_unlock_updates (journal_t* journal) - release barrier
+ * @journal:  Journal to release the barrier on.
+ * 
  * Release a transaction barrier obtained with journal_lock_updates().
  *
  * Should be called without the journal lock held.
  */
-
 void journal_unlock_updates (journal_t *journal)
 {
 	lock_journal(journal);
@@ -566,9 +578,6 @@ static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
 }
 
 /*
- * journal_get_write_access: notify intent to modify a buffer for metadata
- * (not data) update.
- *
  * If the buffer is already part of the current transaction, then there
  * is nothing we need to do.  If it is already part of a prior
  * transaction which we are still committing to disk, then we need to
@@ -577,7 +586,6 @@ static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
  * the handle's metadata buffer credits (unless the buffer is already
  * part of the transaction, that is).
  *
- * Returns an error code or 0 on success.
  */
 
 static int
@@ -786,6 +794,17 @@ out_unlocked:
 	return error;
 }
 
+/**
+ * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
+ * @handle: transaction to add buffer modifications to
+ * @bh:     bh to be used for metadata writes
+ *
+ * Returns an error code or 0 on success.
+ *
+ * In full data journalling mode the buffer may be of type BJ_AsyncData,
+ * because we're write()ing a buffer which is also part of a shared mapping.
+ */
+
 int journal_get_write_access (handle_t *handle, struct buffer_head *bh) 
 {
 	transaction_t *transaction = handle->h_transaction;
@@ -816,6 +835,13 @@ int journal_get_write_access (handle_t *handle, struct buffer_head *bh)
  * There is no lock ranking violation: it was a newly created,
  * unlocked buffer beforehand. */
 
+/**
+ * int journal_get_create_access () - notify intent to use newly created bh
+ * @handle: transaction to new buffer to
+ * @bh: new buffer.
+ *
+ * Call this if you create a new bh.
+ */
 int journal_get_create_access (handle_t *handle, struct buffer_head *bh) 
 {
 	transaction_t *transaction = handle->h_transaction;
@@ -875,13 +901,14 @@ out:
 
 
 
-/*
- * journal_get_undo_access: Notify intent to modify metadata with non-
- * rewindable consequences
- *
+/**
+ * int journal_get_undo_access() -  Notify intent to modify metadata with non-rewindable consequences
+ * @handle: transaction
+ * @bh: buffer to undo
+ * 
  * Sometimes there is a need to distinguish between metadata which has
  * been committed to disk and that which has not.  The ext3fs code uses
- * this for freeing and allocating space: we have to make sure that we
+ * this for freeing and allocating space, we have to make sure that we
  * do not reuse freed space until the deallocation has been committed,
  * since if we overwrote that space we would make the delete
  * un-rewindable in case of a crash.
@@ -893,13 +920,12 @@ out:
  * as we know that the buffer has definitely been committed to disk.
  * 
  * We never need to know which transaction the committed data is part
- * of: buffers touched here are guaranteed to be dirtied later and so
+ * of, buffers touched here are guaranteed to be dirtied later and so
  * will be committed to a new transaction in due course, at which point
  * we can discard the old committed data pointer.
  *
  * Returns error number or 0 on success.  
  */
-
 int journal_get_undo_access (handle_t *handle, struct buffer_head *bh)
 {
 	journal_t *journal = handle->h_transaction->t_journal;
@@ -942,21 +968,23 @@ out:
 	return err;
 }
 
-/* 
- * journal_dirty_data: mark a buffer as containing dirty data which
- * needs to be flushed before we can commit the current transaction.  
- *
+/** 
+ * int journal_dirty_data() -  mark a buffer as containing dirty data which needs to be flushed before we can commit the current transaction.  
+ * @handle: transaction
+ * @bh: bufferhead to mark
+ * 
  * The buffer is placed on the transaction's data list and is marked as
  * belonging to the transaction.
  *
  * Returns error number or 0 on success.  
- *
+ */
+int journal_dirty_data (handle_t *handle, struct buffer_head *bh)
+{
+/*
  * journal_dirty_data() can be called via page_launder->ext3_writepage
  * by kswapd.  So it cannot block.  Happily, there's nothing here
  * which needs lock_journal if `async' is set.
  */
-int journal_dirty_data (handle_t *handle, struct buffer_head *bh)
-{
 	journal_t *journal = handle->h_transaction->t_journal;
 	int need_brelse = 0;
 	struct journal_head *jh;
@@ -1097,24 +1125,28 @@ no_journal:
 	return 0;
 }
 
-/* 
- * journal_dirty_metadata: mark a buffer as containing dirty metadata
- * which needs to be journaled as part of the current transaction.
+/** 
+ * int journal_dirty_metadata() -  mark a buffer as containing dirty metadata
+ * @handle: transaction to add buffer to.
+ * @bh: buffer to mark 
+ * 
+ * mark dirty metadata which needs to be journaled as part of the current transaction.
  *
  * The buffer is placed on the transaction's metadata list and is marked
  * as belonging to the transaction.  
  *
+ * Returns error number or 0 on success.  
+ */
+int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh)
+{
+/*
  * Special care needs to be taken if the buffer already belongs to the
  * current committing transaction (in which case we should have frozen
  * data present for that commit).  In that case, we don't relink the
  * buffer: that only gets done when the old transaction finally
  * completes its commit.
  * 
- * Returns error number or 0 on success.  
  */
-
-int journal_dirty_metadata (handle_t *handle, struct buffer_head *bh)
-{
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
 	struct journal_head *jh = bh2jh(bh);
@@ -1199,9 +1231,12 @@ void journal_release_buffer (handle_t *handle, struct buffer_head *bh)
 }
 #endif
 
-/* 
- * journal_forget: bforget() for potentially-journaled buffers.  We can
- * only do the bforget if there are no commits pending against the
+/** 
+ * void journal_forget() - bforget() for potentially-journaled buffers.
+ * @handle: transaction handle
+ * @bh:     bh to 'forget'
+ *
+ * We can only do the bforget if there are no commits pending against the
  * buffer.  If the buffer is dirty in the current running transaction we
  * can safely unlink it. 
  *
@@ -1213,7 +1248,6 @@ void journal_release_buffer (handle_t *handle, struct buffer_head *bh)
  * Allow this call even if the handle has aborted --- it may be part of
  * the caller's cleanup after an abort.
  */
-
 void journal_forget (handle_t *handle, struct buffer_head *bh)
 {
 	transaction_t *transaction = handle->h_transaction;
@@ -1352,8 +1386,14 @@ out:
 }
 #endif
 
-/*
- * Register a callback function for this handle.  The function will be
+/**
+ * void journal_callback_set() -  Register a callback function for this handle.
+ * @handle: handle to attach the callback to.
+ * @func: function to callback.
+ * @jcb:  structure with additional information required by func() , and
+ *        some space for jbd internal information.
+ * 
+ * The function will be
  * called when the transaction that this handle is part of has been
  * committed to disk with the original callback data struct and the
  * error status of the journal as parameters.  There is no guarantee of
@@ -1374,7 +1414,11 @@ void journal_callback_set(handle_t *handle,
 	jcb->jcb_func = func;
 }
 
-/*
+
+/**
+ * int journal_stop() - complete a transaction
+ * @handle: tranaction to complete.
+ * 
  * All done for a particular handle.
  *
  * There is not much action needed here.  We just return any remaining
@@ -1387,7 +1431,6 @@ void journal_callback_set(handle_t *handle,
  * return -EIO if a journal_abort has been executed since the
  * transaction began.
  */
-
 int journal_stop(handle_t *handle)
 {
 	transaction_t *transaction = handle->h_transaction;
@@ -1473,8 +1516,10 @@ int journal_stop(handle_t *handle)
 	return err;
 }
 
-/*
- * For synchronous operations: force any uncommitted trasnactions
+/**int journal_force_commit() - force any uncommitted transactions
+ * @journal: journal to force
+ *
+ * For synchronous operations: force any uncommitted transactions
  * to disk.  May seem kludgy, but it reuses all the handle batching
  * code in a very simple manner.
  */
@@ -1667,6 +1712,26 @@ out:
 	return 0;
 }
 
+
+/** 
+ * int journal_try_to_free_buffers() - try to free page buffers.
+ * @journal: journal for operation
+ * @page: to try and free
+ * @gfp_mask: 'IO' mode for try_to_free_buffers()
+ *
+ * 
+ * For all the buffers on this page,
+ * if they are fully written out ordered data, move them onto BUF_CLEAN
+ * so try_to_free_buffers() can reap them.
+ * 
+ * This function returns non-zero if we wish try_to_free_buffers()
+ * to be called. We do this if the page is releasable by try_to_free_buffers().
+ * We also do it if the page has locked or dirty buffers and the caller wants
+ * us to perform sync or async writeout.
+ */
+int journal_try_to_free_buffers(journal_t *journal, 
+				struct page *page, int unused_gfp_mask)
+{
 /*
  * journal_try_to_free_buffers().  Try to remove all this page's buffers
  * from the journal.
@@ -1689,9 +1754,6 @@ out:
  * cannot happen because we never reallocate freed data as metadata
  * while the data is part of a transaction.  Yes?
  */
-int journal_try_to_free_buffers(journal_t *journal, 
-				struct page *page, int unused_gfp_mask)
-{
 	struct buffer_head *head;
 	struct buffer_head *bh;
 	int ret = 0;
@@ -1886,8 +1948,15 @@ zap_buffer:
 	return may_free;
 }
 
-/*
- * Return non-zero if the page's buffers were successfully reaped
+/** 
+ * int journal_invalidatepage() 
+ * @journal: journal to use for flush... 
+ * @page:    page to flush
+ * @offset:  length of page to invalidate.
+ *
+ * Reap page buffers containing data after offset in page.
+ *
+ * Return non-zero if the page's buffers were successfully reaped.
  */
 int journal_invalidatepage(journal_t *journal, 
 		      struct page *page, 
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
index d5b053e5b73a..6cf3d86a5d79 100644
--- a/fs/jffs/intrep.c
+++ b/fs/jffs/intrep.c
@@ -3347,10 +3347,10 @@ jffs_garbage_collect_thread(void *ptr)
 	current->session = 1;
 	current->pgrp = 1;
 	init_completion(&c->gc_thread_comp); /* barrier */ 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv (&current->blocked, sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGCONT));
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 	strcpy(current->comm, "jffs_gcd");
 
 	D1(printk (KERN_NOTICE "jffs_garbage_collect_thread(): Starting infinite loop.\n"));
@@ -3378,9 +3378,9 @@ jffs_garbage_collect_thread(void *ptr)
 			siginfo_t info;
 			unsigned long signr = 0;
 
-			spin_lock_irq(&current->sig->siglock);
+			spin_lock_irq(&current->sighand->siglock);
 			signr = dequeue_signal(&current->blocked, &info);
-			spin_unlock_irq(&current->sig->siglock);
+			spin_unlock_irq(&current->sighand->siglock);
 
 			switch(signr) {
 			case SIGSTOP:
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index b1654cff562b..a5c35fdb51c8 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -54,7 +54,7 @@
 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,40)
 #define current_sig_lock current->sigmask_lock
 #else
-#define current_sig_lock current->sig->siglock
+#define current_sig_lock current->sighand->siglock
 #endif
 
 static inline void jffs2_init_inode_info(struct jffs2_inode_info *f)
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index e3d931ff7ca2..360139794557 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -2139,10 +2139,10 @@ int jfsIOWait(void *arg)
 
 	unlock_kernel();
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	complete(&jfsIOwait);
 
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 58df2f7c38cd..6af148d0387c 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2780,10 +2780,10 @@ int jfs_lazycommit(void *arg)
 
 	jfsCommitTask = current;
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	LAZY_LOCK_INIT();
 	TxAnchor.unlock_queue = TxAnchor.unlock_tail = 0;
@@ -2815,7 +2815,7 @@ restart:
 			txLazyCommit(tblk);
 
 			/*
-			 * We can be running indefinately if other processors
+			 * We can be running indefinitely if other processors
 			 * are adding transactions to this list
 			 */
 			cond_resched();
@@ -2985,10 +2985,10 @@ int jfs_sync(void *arg)
 
 	unlock_kernel();
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	complete(&jfsIOwait);
 
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 404ac2d3a95b..c4c4e0595163 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -139,7 +139,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	}
 
 	/* Keep the old signal mask */
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	oldset = current->blocked;
 
 	/* If we're cleaning up locks because the process is exiting,
@@ -149,7 +149,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	    && (current->flags & PF_EXITING)) {
 		sigfillset(&current->blocked);	/* Mask all signals */
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 		call = nlmclnt_alloc_call();
 		if (!call) {
@@ -158,7 +158,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 		}
 		call->a_flags = RPC_TASK_ASYNC;
 	} else {
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 		memset(call, 0, sizeof(*call));
 		locks_init_lock(&call->a_args.lock.fl);
 		locks_init_lock(&call->a_res.lock.fl);
@@ -183,10 +183,10 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 		kfree(call);
 
  out_restore:
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
 	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sig->siglock, flags);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 done:
 	dprintk("lockd: clnt proc returns %d\n", status);
@@ -588,11 +588,11 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl)
 	int		status;
 
 	/* Block all signals while setting up call */
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	oldset = current->blocked;
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sig->siglock, flags);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 	req = nlmclnt_alloc_call();
 	if (!req)
@@ -607,10 +607,10 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl)
 	if (status < 0)
 		kfree(req);
 
-	spin_lock_irqsave(&current->sig->siglock, flags);
+	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
 	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sig->siglock, flags);
+	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 	return status;
 }
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index f608fbc8354b..a0cafbdfbb0a 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -101,10 +101,10 @@ lockd(struct svc_rqst *rqstp)
 	sprintf(current->comm, "lockd");
 
 	/* Process request with signals blocked.  */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked, sigmask(SIGKILL));
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	/* kick rpciod */
 	rpciod_up();
@@ -126,9 +126,9 @@ lockd(struct svc_rqst *rqstp)
 	{
 		long timeout = MAX_SCHEDULE_TIMEOUT;
 		if (signalled()) {
-			spin_lock_irq(&current->sig->siglock);
+			spin_lock_irq(&current->sighand->siglock);
 			flush_signals(current);
-			spin_unlock_irq(&current->sig->siglock);
+			spin_unlock_irq(&current->sighand->siglock);
 			if (nlmsvc_ops) {
 				nlmsvc_invalidate_all();
 				grace_period_expire = set_grace_period();
@@ -297,9 +297,9 @@ lockd_down(void)
 			"lockd_down: lockd failed to exit, clearing pid\n");
 		nlmsvc_pid = 0;
 	}
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 out:
 	up(&nlmsvc_sema);
 }
diff --git a/fs/mpage.c b/fs/mpage.c
index a44993cd7927..3460144c1894 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -116,6 +116,49 @@ mpage_alloc(struct block_device *bdev,
 	return bio;
 }
 
+/*
+ * support function for mpage_readpages.  The fs supplied get_block might
+ * return an up to date buffer.  This is used to map that buffer into
+ * the page, which allows readpage to avoid triggering a duplicate call
+ * to get_block.
+ *
+ * The idea is to avoid adding buffers to pages that don't already have
+ * them.  So when the buffer is up to date and the page size == block size,
+ * this marks the page up to date instead of adding new buffers.
+ */
+static void 
+map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block) 
+{
+	struct inode *inode = page->mapping->host;
+	struct buffer_head *page_bh, *head;
+	int block = 0;
+
+	if (!page_has_buffers(page)) {
+		/*
+		 * don't make any buffers if there is only one buffer on
+		 * the page and the page just needs to be set up to date
+		 */
+		if (inode->i_blkbits == PAGE_CACHE_SHIFT && 
+		    buffer_uptodate(bh)) {
+			SetPageUptodate(page);    
+			return;
+		}
+		create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+	}
+	head = page_buffers(page);
+	page_bh = head;
+	do {
+		if (block == page_block) {
+			page_bh->b_state = bh->b_state;
+			page_bh->b_bdev = bh->b_bdev;
+			page_bh->b_blocknr = bh->b_blocknr;
+			break;
+		}
+		page_bh = page_bh->b_this_page;
+		block++;
+	} while (page_bh != head);
+}
+
 /**
  * mpage_readpages - populate an address space with some pages, and
  *                       start reads against them.
@@ -186,6 +229,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 	block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
 	last_block = (inode->i_size + blocksize - 1) >> blkbits;
 
+	bh.b_page = page;
 	for (page_block = 0; page_block < blocks_per_page;
 				page_block++, block_in_file++) {
 		bh.b_state = 0;
@@ -200,6 +244,17 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 				first_hole = page_block;
 			continue;
 		}
+
+		/* some filesystems will copy data into the page during
+		 * the get_block call, in which case we don't want to
+		 * read it again.  map_buffer_to_page copies the data
+		 * we just collected from get_block into the page's buffers
+		 * so readpage doesn't have to repeat the get_block call
+		 */
+		if (buffer_uptodate(&bh)) {
+			map_buffer_to_page(page, &bh, page_block);
+			goto confused;
+		}
 	
 		if (first_hole != blocks_per_page)
 			goto confused;		/* hole -> non-hole */
@@ -256,7 +311,10 @@ out:
 confused:
 	if (bio)
 		bio = mpage_bio_submit(READ, bio);
-	block_read_full_page(page, get_block);
+	if (!PageUptodate(page))
+	        block_read_full_page(page, get_block);
+	else
+		unlock_page(page);
 	goto out;
 }
 
@@ -344,6 +402,7 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
 	sector_t boundary_block = 0;
 	struct block_device *boundary_bdev = NULL;
 	int length;
+	struct buffer_head map_bh;
 
 	if (page_has_buffers(page)) {
 		struct buffer_head *head = page_buffers(page);
@@ -401,8 +460,8 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
 	BUG_ON(!PageUptodate(page));
 	block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
 	last_block = (inode->i_size - 1) >> blkbits;
+	map_bh.b_page = page;
 	for (page_block = 0; page_block < blocks_per_page; ) {
-		struct buffer_head map_bh;
 
 		map_bh.b_state = 0;
 		if (get_block(inode, block_in_file, &map_bh, 1))
@@ -559,7 +618,6 @@ mpage_writepages(struct address_space *mapping,
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
 
 	if (wbc->nonblocking && bdi_write_congested(bdi)) {
-		blk_run_queues();
 		wbc->encountered_congestion = 1;
 		return 0;
 	}
@@ -614,7 +672,6 @@ mpage_writepages(struct address_space *mapping,
 			if (ret || (--(wbc->nr_to_write) <= 0))
 				done = 1;
 			if (wbc->nonblocking && bdi_write_congested(bdi)) {
-				blk_run_queues();
 				wbc->encountered_congestion = 1;
 				done = 1;
 			}
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c
index 93ba7610dde0..f01c538eb282 100644
--- a/fs/ncpfs/sock.c
+++ b/fs/ncpfs/sock.c
@@ -745,7 +745,7 @@ static int ncp_do_request(struct ncp_server *server, int size,
 		sigset_t old_set;
 		unsigned long mask, flags;
 
-		spin_lock_irqsave(&current->sig->siglock, flags);
+		spin_lock_irqsave(&current->sighand->siglock, flags);
 		old_set = current->blocked;
 		if (current->flags & PF_EXITING)
 			mask = 0;
@@ -764,7 +764,7 @@ static int ncp_do_request(struct ncp_server *server, int size,
 		}
 		siginitsetinv(&current->blocked, mask);
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 		
 		fs = get_fs();
 		set_fs(get_ds());
@@ -773,10 +773,10 @@ static int ncp_do_request(struct ncp_server *server, int size,
 
 		set_fs(fs);
 
-		spin_lock_irqsave(&current->sig->siglock, flags);
+		spin_lock_irqsave(&current->sighand->siglock, flags);
 		current->blocked = old_set;
 		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sig->siglock, flags);
+		spin_unlock_irqrestore(&current->sighand->siglock, flags);
 	}
 
 	DDPRINTK("do_ncp_rpc_call returned %d\n", result);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7cfd12436c79..1f1ab0213a87 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -743,8 +743,8 @@ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
 		goto out;
 	
 	/*
-	 * Now we do a seperate LOOKUP for each component of the mount path.
-	 * The LOOKUPs are done seperately so that we can conveniently
+	 * Now we do a separate LOOKUP for each component of the mount path.
+	 * The LOOKUPs are done separately so that we can conveniently
 	 * catch an ERR_WRONGSEC if it occurs along the way...
 	 */
 	p = server->mnt_path;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 94f48ae35e95..3919e77036e3 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -189,10 +189,10 @@ nfsd(struct svc_rqst *rqstp)
 	 */
 	for (;;) {
 		/* Block all but the shutdown signals */
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		siginitsetinv(&current->blocked, SHUTDOWN_SIGS);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 
 		/*
 		 * Find a socket with data available and call its
@@ -210,10 +210,10 @@ nfsd(struct svc_rqst *rqstp)
 		exp_readlock();
 
 		/* Process request with signals blocked.  */
-		spin_lock_irq(&current->sig->siglock);
+		spin_lock_irq(&current->sighand->siglock);
 		siginitsetinv(&current->blocked, ALLOWED_SIGS);
 		recalc_sigpending();
-		spin_unlock_irq(&current->sig->siglock);
+		spin_unlock_irq(&current->sighand->siglock);
 
 		svc_process(serv, rqstp);
 
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 9a33c941cd5c..e0d448b3a6c9 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -510,7 +510,7 @@ static BOOL ldm_validate_vmdb (struct block_device *bdev, unsigned long base,
 
 	/* Are there uncommitted transactions? */
 	if (BE16(data + 0x10) != 0x01) {
-		ldm_crit ("Database is not in a consistant state.  Aborting.");
+		ldm_crit ("Database is not in a consistent state.  Aborting.");
 		goto out;
 	}
 
diff --git a/fs/proc/array.c b/fs/proc/array.c
index e135ac5a1080..df1501a0f332 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -190,16 +190,16 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
 	sigemptyset(catch);
 
 	read_lock(&tasklist_lock);
-	if (p->sig) {
-		spin_lock_irq(&p->sig->siglock);
-		k = p->sig->action;
+	if (p->sighand) {
+		spin_lock_irq(&p->sighand->siglock);
+		k = p->sighand->action;
 		for (i = 1; i <= _NSIG; ++i, ++k) {
 			if (k->sa.sa_handler == SIG_IGN)
 				sigaddset(ign, i);
 			else if (k->sa.sa_handler != SIG_DFL)
 				sigaddset(catch, i);
 		}
-		spin_unlock_irq(&p->sig->siglock);
+		spin_unlock_irq(&p->sighand->siglock);
 	}
 	read_unlock(&tasklist_lock);
 }
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 64811521d0ce..c051de09c559 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -306,6 +306,7 @@ static uint find_free_dqentry(struct dquot *dquot, int *err)
 		blk = get_free_dqblk(filp, info);
 		if ((int)blk < 0) {
 			*err = blk;
+			freedqbuf(buf);
 			return 0;
 		}
 		memset(buf, 0, V2_DQBLKSIZE);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 817c5c465d19..a9e5003b3589 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -11,6 +11,8 @@
 #include <asm/uaccess.h>
 #include <asm/unaligned.h>
 #include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/writeback.h>
 
 /* args for the create parameter of reiserfs_get_block */
 #define GET_BLOCK_NO_CREATE 0 /* don't create new blocks or convert tails */
@@ -262,7 +264,10 @@ research:
 	blocknr = get_block_num(ind_item, path.pos_in_item) ;
 	ret = 0 ;
 	if (blocknr) {
-		map_bh(bh_result, inode->i_sb, blocknr);
+	    map_bh(bh_result, inode->i_sb, blocknr);
+	    if (path.pos_in_item == ((ih_item_len(ih) / UNFM_P_SIZE) - 1)) {
+		set_buffer_boundary(bh_result);
+	    }
 	} else 
 	    // We do not return -ENOENT if there is a hole but page is uptodate, because it means
 	    // That there is some MMAPED data associated with it that is yet to  be written to disk.
@@ -286,7 +291,7 @@ research:
 	return -ENOENT;
     }
 
-    /* if we've got a direct item, and the buffer was uptodate,
+    /* if we've got a direct item, and the buffer or page was uptodate,
     ** we don't want to pull data off disk again.  skip to the
     ** end, where we map the buffer and return
     */
@@ -367,7 +372,9 @@ research:
 
 finished:
     pathrelse (&path);
-    /* I _really_ doubt that you want it.  Chris? */
+    /* this buffer has valid data, but isn't valid for io.  mapping it to
+     * block #0 tells the rest of reiserfs it just has a tail in it
+     */
     map_bh(bh_result, inode->i_sb, 0);
     set_buffer_uptodate (bh_result);
     return 0;
@@ -842,6 +849,12 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
     return retval;
 }
 
+static int
+reiserfs_readpages(struct file *file, struct address_space *mapping,
+		struct list_head *pages, unsigned nr_pages)
+{
+    return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
+}
 
 //
 // BAD: new directories have stat data of new type and all other items
@@ -1809,13 +1822,19 @@ static int map_block_for_writepage(struct inode *inode,
     int use_get_block = 0 ;
     int bytes_copied = 0 ;
     int copy_size ;
+    int trans_running = 0;
+
+    /* catch places below that try to log something without starting a trans */
+    th.t_trans_id = 0;
+
+    if (!buffer_uptodate(bh_result)) {
+        buffer_error();
+	return -EIO;
+    }
 
     kmap(bh_result->b_page) ;
 start_over:
     reiserfs_write_lock(inode->i_sb);
-    journal_begin(&th, inode->i_sb, jbegin_count) ;
-    reiserfs_update_inode_transaction(inode) ;
-
     make_cpu_key(&key, inode, byte_offset, TYPE_ANY, 3) ;
 
 research:
@@ -1841,7 +1860,6 @@ research:
 	    goto out ;
 	}
 	set_block_dev_mapped(bh_result, get_block_num(item,pos_in_item),inode);
-        set_buffer_uptodate(bh_result);
     } else if (is_direct_le_ih(ih)) {
         char *p ; 
         p = page_address(bh_result->b_page) ;
@@ -1850,7 +1868,20 @@ research:
 
 	fs_gen = get_generation(inode->i_sb) ;
 	copy_item_head(&tmp_ih, ih) ;
+
+	if (!trans_running) {
+	    /* vs-3050 is gone, no need to drop the path */
+	    journal_begin(&th, inode->i_sb, jbegin_count) ;
+	    reiserfs_update_inode_transaction(inode) ;
+	    trans_running = 1;
+	    if (fs_changed(fs_gen, inode->i_sb) && item_moved(&tmp_ih, &path)) {
+		reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
+		goto research;
+	    }
+	}
+
 	reiserfs_prepare_for_journal(inode->i_sb, bh, 1) ;
+
 	if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) {
 	    reiserfs_restore_prepared_buffer(inode->i_sb, bh) ;
 	    goto research;
@@ -1861,7 +1892,6 @@ research:
 	journal_mark_dirty(&th, inode->i_sb, bh) ;
 	bytes_copied += copy_size ;
 	set_block_dev_mapped(bh_result, 0, inode);
-        set_buffer_uptodate(bh_result);
 
 	/* are there still bytes left? */
         if (bytes_copied < bh_result->b_size && 
@@ -1878,7 +1908,10 @@ research:
     
 out:
     pathrelse(&path) ;
-    journal_end(&th, inode->i_sb, jbegin_count) ;
+    if (trans_running) {
+	journal_end(&th, inode->i_sb, jbegin_count) ;
+	trans_running = 0;
+    }
     reiserfs_write_unlock(inode->i_sb);
 
     /* this is where we fill in holes in the file. */
@@ -1894,49 +1927,77 @@ out:
 	}
     }
     kunmap(bh_result->b_page) ;
+
+    if (!retval && buffer_mapped(bh_result) && bh_result->b_blocknr == 0) {
+	/* we've copied data from the page into the direct item, so the
+	 * buffer in the page is now clean, mark it to reflect that.
+	 */
+        lock_buffer(bh_result);
+	clear_buffer_dirty(bh_result);
+	unlock_buffer(bh_result);
+    }
     return retval ;
 }
 
-/* helper func to get a buffer head ready for writepage to send to
-** ll_rw_block
-*/
-static inline void submit_bh_for_writepage(struct buffer_head **bhp, int nr) {
-    struct buffer_head *bh ;
-    int i;
-    for(i = 0 ; i < nr ; i++) {
-        bh = bhp[i] ;
-	lock_buffer(bh) ;
-	mark_buffer_async_write(bh) ;
-	/* submit_bh doesn't care if the buffer is dirty, but nobody
-	** later on in the call chain will be cleaning it.  So, we
-	** clean the buffer here, it still gets written either way.
-	*/
-	clear_buffer_dirty(bh) ;
-	set_buffer_uptodate(bh) ;
-	submit_bh(WRITE, bh) ;
+/*
+ * does the right thing for deciding when to lock a buffer and
+ * mark it for io during a writepage.  make sure the buffer is
+ * dirty before sending it here though.
+ */
+static void lock_buffer_for_writepage(struct page *page, 
+                                      struct writeback_control *wbc, 
+			              struct buffer_head *bh)
+{
+    if (wbc->sync_mode != WB_SYNC_NONE) {
+	lock_buffer(bh);
+    } else {
+	if (test_set_buffer_locked(bh)) {
+	    __set_page_dirty_nobuffers(page);
+	    return;
+	}
+    }
+    if (test_clear_buffer_dirty(bh)) {
+	if (!buffer_uptodate(bh))
+	    buffer_error();
+	mark_buffer_async_write(bh);
+    } else {
+	unlock_buffer(bh);
     }
 }
 
+/* 
+ * mason@suse.com: updated in 2.5.54 to follow the same general io 
+ * start/recovery path as __block_write_full_page, along with special
+ * code to handle reiserfs tails.
+ */
 static int reiserfs_write_full_page(struct page *page, struct writeback_control *wbc) {
     struct inode *inode = page->mapping->host ;
     unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT ;
-    unsigned last_offset = PAGE_CACHE_SIZE;
     int error = 0;
     unsigned long block ;
-    unsigned cur_offset = 0 ;
-    struct buffer_head *head, *bh ;
+    struct buffer_head *head, *bh;
     int partial = 0 ;
-    struct buffer_head *arr[PAGE_CACHE_SIZE/512] ;
-    int nr = 0 ;
+    int nr = 0;
 
-    if (!page_has_buffers(page))
-        block_prepare_write(page, 0, 0, NULL) ;
+    /* The page dirty bit is cleared before writepage is called, which
+     * means we have to tell create_empty_buffers to make dirty buffers
+     * The page really should be up to date at this point, so tossing
+     * in the BH_Uptodate is just a sanity check.
+     */
+    if (!page_has_buffers(page)) {
+	if (!PageUptodate(page))
+	    buffer_error();
+	create_empty_buffers(page, inode->i_sb->s_blocksize, 
+	                    (1 << BH_Dirty) | (1 << BH_Uptodate));
+    }
+    head = page_buffers(page) ;
 
     /* last page in the file, zero out any contents past the
     ** last byte in the file
     */
     if (page->index >= end_index) {
 	char *kaddr;
+	unsigned last_offset;
 
         last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1) ;
 	/* no file contents in this page */
@@ -1949,66 +2010,107 @@ static int reiserfs_write_full_page(struct page *page, struct writeback_control
 	flush_dcache_page(page) ;
 	kunmap_atomic(kaddr, KM_USER0) ;
     }
-    head = page_buffers(page) ;
     bh = head ;
     block = page->index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits) ;
     do {
-	/* if this offset in the page is outside the file */
-	if (cur_offset >= last_offset) {
-	    if (!buffer_uptodate(bh))
-	        partial = 1 ;
-	} else {
-	    /* fast path, buffer mapped to an unformatted node */
+	get_bh(bh);
+	if (buffer_dirty(bh)) {
 	    if (buffer_mapped(bh) && bh->b_blocknr != 0) {
-		arr[nr++] = bh ;
+		/* buffer mapped to an unformatted node */
+		lock_buffer_for_writepage(page, wbc, bh);
 	    } else {
-		/* buffer not mapped yet, or points to a direct item.
-		** search and dirty or log
-		*/
+		/* not mapped yet, or it points to a direct item, search
+		 * the btree for the mapping info, and log any direct
+		 * items found
+		 */
 		if ((error = map_block_for_writepage(inode, bh, block))) {
 		    goto fail ;
 		}
-		/* map_block_for_writepage either found an unformatted node
-		** and mapped it for us, or it found a direct item
-		** and logged the changes.  
-		*/
-		if (buffer_mapped(bh) && bh->b_blocknr != 0) {
-		    arr[nr++] = bh ;
-		}
+		if (buffer_mapped(bh) && bh->b_blocknr != 0)  {
+		    lock_buffer_for_writepage(page, wbc, bh);
+		} 
 	    }
 	}
-        bh = bh->b_this_page ;
-	cur_offset += bh->b_size ;
-	block++ ;
+        bh = bh->b_this_page;
+	block++;
     } while(bh != head) ;
 
-    if (!partial)
-        SetPageUptodate(page) ;
     BUG_ON(PageWriteback(page));
     SetPageWriteback(page);
     unlock_page(page);
 
-    /* if this page only had a direct item, it is very possible for
-    ** nr == 0 without there being any kind of error.
-    */
-    if (nr) {
-        submit_bh_for_writepage(arr, nr) ;
-    } else {
-        end_page_writeback(page) ;
+    /*
+     * since any buffer might be the only dirty buffer on the page, 
+     * the first submit_bh can bring the page out of writeback.
+     * be careful with the buffers.
+     */
+    do {
+        struct buffer_head *next = bh->b_this_page;
+	if (buffer_async_write(bh)) {
+	    submit_bh(WRITE, bh);
+	    nr++;
+	}
+	put_bh(bh);
+	bh = next;
+    } while(bh != head);
+
+    error = 0;
+done:
+    if (nr == 0) {
+        /*
+         * if this page only had a direct item, it is very possible for
+         * no io to be required without there being an error.  Or, 
+	 * someone else could have locked them and sent them down the 
+	 * pipe without locking the page
+	 */
+	do {
+	    if (!buffer_uptodate(bh)) {
+	        partial = 1;
+		break;
+	    }
+	} while(bh != head);
+	if (!partial)
+	    SetPageUptodate(page);
+	end_page_writeback(page);
     }
-
-    return 0 ;
+    return error;
 
 fail:
-    if (nr) {
-        SetPageWriteback(page);
-        unlock_page(page);
-        submit_bh_for_writepage(arr, nr) ;
-    } else {
-        unlock_page(page) ;
-    }
-    ClearPageUptodate(page) ;
-    return error ;
+    /* catches various errors, we need to make sure any valid dirty blocks
+     * get to the media.  The page is currently locked and not marked for 
+     * writeback
+     */
+    ClearPageUptodate(page);
+    bh = head;
+    do {
+	get_bh(bh);
+	if (buffer_mapped(bh) && buffer_dirty(bh) && bh->b_blocknr) {
+	    lock_buffer(bh);
+	    mark_buffer_async_write(bh);
+	} else {
+	    /*
+	     * clear any dirty bits that might have come from getting
+	     * attached to a dirty page
+	     */
+	     clear_buffer_dirty(bh);
+	}
+        bh = bh->b_this_page;
+    } while(bh != head);
+    SetPageError(page);
+    BUG_ON(PageWriteback(page));
+    SetPageWriteback(page);
+    unlock_page(page);
+    do {
+        struct buffer_head *next = bh->b_this_page;
+	if (buffer_async_write(bh)) {
+	    clear_buffer_dirty(bh);
+	    submit_bh(WRITE, bh);
+	    nr++;
+	}
+	put_bh(bh);
+	bh = next;
+    } while(bh != head);
+    goto done;
 }
 
 
@@ -2115,6 +2217,7 @@ static int reiserfs_releasepage(struct page *page, int unused_gfp_flags)
 struct address_space_operations reiserfs_address_space_operations = {
     .writepage = reiserfs_writepage,
     .readpage = reiserfs_readpage, 
+    .readpages = reiserfs_readpages, 
     .releasepage = reiserfs_releasepage,
     .sync_page = block_sync_page,
     .prepare_write = reiserfs_prepare_write,
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
index 5f7d1d5969fe..41d5bbd8a334 100644
--- a/fs/smbfs/smbiod.c
+++ b/fs/smbfs/smbiod.c
@@ -285,10 +285,10 @@ static int smbiod(void *unused)
 	MOD_INC_USE_COUNT;
 	daemonize();
 
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	siginitsetinv(&current->blocked, sigmask(SIGKILL));
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	strcpy(current->comm, "smbiod");
 
diff --git a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c
index 9398993ec4d4..5505483ca88c 100644
--- a/fs/xfs/linux/xfs_aops.c
+++ b/fs/xfs/linux/xfs_aops.c
@@ -50,7 +50,7 @@ map_blocks(
 
 	if (((flags & (PBF_DIRECT|PBF_SYNC)) == PBF_DIRECT) &&
 	    (offset >= inode->i_size))
-		count = max(count, XFS_WRITE_IO_LOG);
+		count = max_t(ssize_t, count, XFS_WRITE_IO_LOG);
 retry:
 	VOP_BMAP(vp, offset, count, flags, pbmapp, &nmaps, error);
 	if (flags & PBF_WRITE) {
diff --git a/fs/xfs/pagebuf/page_buf.c b/fs/xfs/pagebuf/page_buf.c
index 4c60a8799fcb..d6b027eb2022 100644
--- a/fs/xfs/pagebuf/page_buf.c
+++ b/fs/xfs/pagebuf/page_buf.c
@@ -1581,10 +1581,10 @@ pagebuf_daemon(
 	daemonize();
 
 	/* Avoid signals */
-	spin_lock_irq(&current->sig->siglock);
+	spin_lock_irq(&current->sighand->siglock);
 	sigfillset(&current->blocked);
 	recalc_sigpending();
-	spin_unlock_irq(&current->sig->siglock);
+	spin_unlock_irq(&current->sighand->siglock);
 
 	strcpy(current->comm, "pagebufd");
 	current->flags |= PF_MEMALLOC;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 27d7013b4d80..8a20f1cfc415 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5522,7 +5522,7 @@ xfs_getbmap(
 	int			prealloced;	/* this is a file with
 						 * preallocated data space */
 	int			sh_unwritten;	/* true, if unwritten */
-						/* extents listed seperately */
+						/* extents listed separately */
 	int			bmapi_flags;	/* flags for xfs_bmapi */
 	__int32_t		oflags;		/* getbmapx bmv_oflags field */