Merge sgi.com:/source2/linux-2.6 into sgi.com:/source2/xfs-linux-2.6

author: Nathan Scott <nathans@sgi.com> 2004-04-29 10:19:56 +1000
committer: Nathan Scott <nathans@sgi.com> 2004-04-29 10:19:56 +1000
commit: adda10937c78955b4e51c837f7f00a1bb6fa445f (patch)
tree: 41787767176c1b7384e6ec80ceafad87833daa6e /fs
parent: 16bd713551a4857a002e8e28e3d6e7c6421d63bf (diff)
parent: d61a4de11ca55278c8874cb3f91d03518c2052c9 (diff)
15 files changed, 491 insertions, 570 deletions
diff --git a/fs/xfs/linux/xfs_aops.c b/fs/xfs/linux/xfs_aops.c
index a25da32753fe..3afc61d10b1b 100644
--- a/fs/xfs/linux/xfs_aops.c
+++ b/fs/xfs/linux/xfs_aops.c
@@ -108,7 +108,7 @@ linvfs_unwritten_done(
 	struct buffer_head	*bh,
 	int			uptodate)
 {
-	page_buf_t		*pb = (page_buf_t *)bh->b_private;
+	xfs_buf_t		*pb = (xfs_buf_t *)bh->b_private;
 
 	ASSERT(buffer_unwritten(bh));
 	bh->b_end_io = NULL;
@@ -265,9 +265,9 @@ xfs_map_at_offset(
 STATIC struct page *
 xfs_probe_unwritten_page(
 	struct address_space	*mapping,
-	unsigned long		index,
+	pgoff_t			index,
 	xfs_iomap_t		*iomapp,
-	page_buf_t		*pb,
+	xfs_buf_t		*pb,
 	unsigned long		max_offset,
 	unsigned long		*fsbs,
 	unsigned int            bbits)
@@ -316,7 +316,7 @@ out:
 STATIC unsigned int
 xfs_probe_unmapped_page(
 	struct address_space	*mapping,
-	unsigned long		index,
+	pgoff_t			index,
 	unsigned int		pg_offset)
 {
 	struct page		*page;
@@ -356,8 +356,8 @@ xfs_probe_unmapped_cluster(
 	struct buffer_head	*bh,
 	struct buffer_head	*head)
 {
-	unsigned long		tindex, tlast, tloff;
-	unsigned int		len, total = 0;
+	pgoff_t			tindex, tlast, tloff;
+	unsigned int		pg_offset, len, total = 0;
 	struct address_space	*mapping = inode->i_mapping;
 
 	/* First sum forwards in this page */
@@ -382,9 +382,9 @@ xfs_probe_unmapped_cluster(
 			total += len;
 		}
 		if (tindex == tlast &&
-		    (tloff = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+		    (pg_offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
 			total += xfs_probe_unmapped_page(mapping,
-							tindex, tloff);
+							tindex, pg_offset);
 		}
 	}
 	return total;
@@ -398,7 +398,7 @@ xfs_probe_unmapped_cluster(
 STATIC struct page *
 xfs_probe_delalloc_page(
 	struct inode		*inode,
-	unsigned long		index)
+	pgoff_t			index)
 {
 	struct page		*page;
 
@@ -445,7 +445,7 @@ xfs_map_unwritten(
 {
 	struct buffer_head	*bh = curr;
 	xfs_iomap_t		*tmp;
-	page_buf_t		*pb;
+	xfs_buf_t		*pb;
 	loff_t			offset, size;
 	unsigned long		nblocks = 0;
 
@@ -497,8 +497,9 @@ xfs_map_unwritten(
 	 */
 	if (bh == head) {
 		struct address_space	*mapping = inode->i_mapping;
-		unsigned long		tindex, tloff, tlast, bs;
-		unsigned int		bbits = inode->i_blkbits;
+		pgoff_t			tindex, tloff, tlast;
+		unsigned long		bs;
+		unsigned int		pg_offset, bbits = inode->i_blkbits;
 		struct page		*page;
 
 		tlast = i_size_read(inode) >> PAGE_CACHE_SHIFT;
@@ -522,10 +523,10 @@ xfs_map_unwritten(
 		}
 
 		if (tindex == tlast &&
-		    (tloff = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
+		    (pg_offset = (i_size_read(inode) & (PAGE_CACHE_SIZE - 1)))) {
 			page = xfs_probe_unwritten_page(mapping,
 							tindex, iomapp, pb,
-							tloff, &bs, bbits);
+							pg_offset, &bs, bbits);
 			if (page) {
 				nblocks += bs;
 				atomic_add(bs, &pb->pb_io_remaining);
@@ -603,7 +604,8 @@ xfs_convert_page(
 {
 	struct buffer_head	*bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
 	xfs_iomap_t		*mp = iomapp, *tmp;
-	unsigned long		end, offset, end_index;
+	unsigned long		end, offset;
+	pgoff_t			end_index;
 	int			i = 0, index = 0;
 	int			bbits = inode->i_blkbits;
 
@@ -671,12 +673,12 @@ xfs_convert_page(
 STATIC void
 xfs_cluster_write(
 	struct inode		*inode,
-	unsigned long		tindex,
+	pgoff_t			tindex,
 	xfs_iomap_t		*iomapp,
 	int			startio,
 	int			all_bh)
 {
-	unsigned long		tlast;
+	pgoff_t			tlast;
 	struct page		*page;
 
 	tlast = (iomapp->iomap_offset + iomapp->iomap_bsize) >> PAGE_CACHE_SHIFT;
@@ -716,7 +718,8 @@ xfs_page_state_convert(
 {
 	struct buffer_head	*bh_arr[MAX_BUF_PER_PAGE], *bh, *head;
 	xfs_iomap_t		*iomp, iomap;
-	unsigned long		p_offset = 0, end_index;
+	unsigned long		p_offset = 0;
+	pgoff_t			end_index;
 	loff_t			offset;
 	unsigned long long	end_offset;
 	int			len, err, i, cnt = 0, uptodate = 1;
diff --git a/fs/xfs/linux/xfs_buf.c b/fs/xfs/linux/xfs_buf.c
index d34846671e06..ce514381e53b 100644
--- a/fs/xfs/linux/xfs_buf.c
+++ b/fs/xfs/linux/xfs_buf.c
@@ -31,14 +31,10 @@
  */
 
 /*
- *	page_buf.c
- *
- *	The page_buf module provides an abstract buffer cache model on top of
- *	the Linux page cache.  Cached metadata blocks for a file system are
- *	hashed to the inode for the block device.  The page_buf module
- *	assembles buffer (page_buf_t) objects on demand to aggregate such
- *	cached pages for I/O.
- *
+ *	The xfs_buf.c code provides an abstract buffer cache model on top
+ *	of the Linux page cache.  Cached metadata blocks for a file system
+ *	are hashed to the inode for the block device.  xfs_buf.c assembles
+ *	buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O.
  *
  *      Written by Steve Lord, Jim Mostek, Russell Cattelan
  *		    and Rajagopal Ananthanarayanan ("ananth") at SGI.
@@ -71,7 +67,7 @@
 
 STATIC kmem_cache_t *pagebuf_cache;
 STATIC void pagebuf_daemon_wakeup(void);
-STATIC void pagebuf_delwri_queue(page_buf_t *, int);
+STATIC void pagebuf_delwri_queue(xfs_buf_t *, int);
 STATIC struct workqueue_struct *pagebuf_logio_workqueue;
 STATIC struct workqueue_struct *pagebuf_dataio_workqueue;
 
@@ -82,7 +78,7 @@ STATIC struct workqueue_struct *pagebuf_dataio_workqueue;
 #ifdef PAGEBUF_TRACE
 void
 pagebuf_trace(
-	page_buf_t	*pb,
+	xfs_buf_t	*pb,
 	char		*id,
 	void		*data,
 	void		*ra)
@@ -169,8 +165,6 @@ _bhash(
  * Mapping of multi-page buffers into contiguous virtual space
  */
 
-STATIC void *pagebuf_mapout_locked(page_buf_t *);
-
 typedef struct a_list {
 	void		*vm_addr;
 	struct a_list	*next;
@@ -229,8 +223,8 @@ purge_addresses(void)
 
 STATIC void
 _pagebuf_initialize(
-	page_buf_t		*pb,
-	pb_target_t		*target,
+	xfs_buf_t		*pb,
+	xfs_buftarg_t		*target,
 	loff_t			range_base,
 	size_t			range_length,
 	page_buf_flags_t	flags)
@@ -240,7 +234,7 @@ _pagebuf_initialize(
 	 */
 	flags &= ~(PBF_LOCK|PBF_MAPPED|PBF_DONT_BLOCK|PBF_READ_AHEAD);
 
-	memset(pb, 0, sizeof(page_buf_t));
+	memset(pb, 0, sizeof(xfs_buf_t));
 	atomic_set(&pb->pb_hold, 1);
 	init_MUTEX_LOCKED(&pb->pb_iodonesema);
 	INIT_LIST_HEAD(&pb->pb_list);
@@ -251,12 +245,12 @@ _pagebuf_initialize(
 	pb->pb_file_offset = range_base;
 	/*
 	 * Set buffer_length and count_desired to the same value initially.
-	 * IO routines should use count_desired, which will be the same in
+	 * I/O routines should use count_desired, which will be the same in
 	 * most cases but may be reset (e.g. XFS recovery).
 	 */
 	pb->pb_buffer_length = pb->pb_count_desired = range_length;
 	pb->pb_flags = flags | PBF_NONE;
-	pb->pb_bn = PAGE_BUF_DADDR_NULL;
+	pb->pb_bn = XFS_BUF_DADDR_NULL;
 	atomic_set(&pb->pb_pin_count, 0);
 	init_waitqueue_head(&pb->pb_waiters);
 
@@ -270,7 +264,7 @@ _pagebuf_initialize(
  */
 STATIC int
 _pagebuf_get_pages(
-	page_buf_t		*pb,
+	xfs_buf_t		*pb,
 	int			page_count,
 	page_buf_flags_t	flags)
 {
@@ -292,164 +286,124 @@ _pagebuf_get_pages(
 }
 
 /*
- * Walk a pagebuf releasing all the pages contained within it.
+ *	Frees pb_pages if it was malloced.
  */
-STATIC inline void
-_pagebuf_freepages(
-	page_buf_t		*pb)
+STATIC void
+_pagebuf_free_pages(
+	xfs_buf_t	*bp)
 {
-	int			buf_index;
-
-	for (buf_index = 0; buf_index < pb->pb_page_count; buf_index++) {
-		struct page	*page = pb->pb_pages[buf_index];
-
-		if (page) {
-			pb->pb_pages[buf_index] = NULL;
-			page_cache_release(page);
-		}
+	if (bp->pb_pages != bp->pb_page_array) {
+		kmem_free(bp->pb_pages,
+			  bp->pb_page_count * sizeof(struct page *));
 	}
 }
 
 /*
- *	pagebuf_free
+ *	Releases the specified buffer.
  *
- *	pagebuf_free releases the specified buffer.  The modification
- *	state of any associated pages is left unchanged.
+ * 	The modification state of any associated pages is left unchanged.
+ * 	The buffer most not be on any hash - use pagebuf_rele instead for
+ * 	hashed and refcounted buffers
  */
 void
 pagebuf_free(
-	page_buf_t		*pb)
+	xfs_buf_t		*bp)
 {
-	PB_TRACE(pb, "free", 0);
-	
-	ASSERT(list_empty(&pb->pb_hash_list));
-
-	/* release any virtual mapping */ ;
-	if (pb->pb_flags & _PBF_ADDR_ALLOCATED) {
-		void *vaddr = pagebuf_mapout_locked(pb);
-		if (vaddr) {
-			free_address(vaddr);
-		}
+	PB_TRACE(bp, "free", 0);
+
+	ASSERT(list_empty(&bp->pb_hash_list));
+
+	if (bp->pb_flags & _PBF_PAGE_CACHE) {
+		uint		i;
+
+		if ((bp->pb_flags & PBF_MAPPED) && (bp->pb_page_count > 1))
+			free_address(bp->pb_addr - bp->pb_offset);
+
+		for (i = 0; i < bp->pb_page_count; i++)
+			page_cache_release(bp->pb_pages[i]);
+		_pagebuf_free_pages(bp);
+	} else if (bp->pb_flags & _PBF_KMEM_ALLOC) {
+		 /*
+		  * XXX(hch): bp->pb_count_desired might be incorrect (see
+		  * pagebuf_associate_memory for details), but fortunately
+		  * the Linux version of kmem_free ignores the len argument..
+		  */
+		kmem_free(bp->pb_addr, bp->pb_count_desired);
+		_pagebuf_free_pages(bp);
 	}
 
-	if (pb->pb_flags & _PBF_MEM_ALLOCATED) {
-		if (pb->pb_pages) {
-			/* release the pages in the address list */
-			if ((pb->pb_pages[0]) &&
-			    (pb->pb_flags & _PBF_MEM_SLAB)) {
-				kfree(pb->pb_addr);
-			} else {
-				_pagebuf_freepages(pb);
-			}
-			if (pb->pb_pages != pb->pb_page_array)
-				kfree(pb->pb_pages);
-			pb->pb_pages = NULL;
-		}
-		pb->pb_flags &= ~(_PBF_MEM_ALLOCATED|_PBF_MEM_SLAB);
-	}
-
-	pagebuf_deallocate(pb);
+	pagebuf_deallocate(bp);
 }
 
 /*
- *	_pagebuf_lookup_pages
- *
- *	_pagebuf_lookup_pages finds all pages which match the buffer
- *	in question and the range of file offsets supplied,
- *	and builds the page list for the buffer, if the
- *	page list is not already formed or if not all of the pages are
- *	already in the list. Invalid pages (pages which have not yet been
- *	read in from disk) are assigned for any pages which are not found.
+ *	Finds all pages for buffer in question and builds it's page list.
  */
 STATIC int
 _pagebuf_lookup_pages(
-	page_buf_t		*pb,
-	struct address_space	*aspace,
-	page_buf_flags_t	flags)
+	xfs_buf_t		*bp,
+	uint			flags)
 {
-	loff_t			next_buffer_offset;
-	unsigned long		page_count, pi, index;
-	struct page		*page;
+	struct address_space	*mapping = bp->pb_target->pbr_mapping;
+	unsigned int		sectorshift = bp->pb_target->pbr_sshift;
+	size_t			blocksize = bp->pb_target->pbr_bsize;
+	size_t			size = bp->pb_count_desired;
+	size_t			nbytes, offset;
 	int			gfp_mask = pb_to_gfp(flags);
-	int			all_mapped, good_pages, nbytes, rval, retries;
-	unsigned int		blocksize, sectorshift;
-	size_t			size, offset;
-
-	next_buffer_offset = pb->pb_file_offset + pb->pb_buffer_length;
-	good_pages = page_count = (page_buf_btoc(next_buffer_offset) -
-				   page_buf_btoct(pb->pb_file_offset));
-
-	if (pb->pb_flags & _PBF_ALL_PAGES_MAPPED) {
-		/* Bring pages forward in cache */
-		for (pi = 0; pi < page_count; pi++) {
-			mark_page_accessed(pb->pb_pages[pi]);
-		}
-		if ((flags & PBF_MAPPED) && !(pb->pb_flags & PBF_MAPPED)) {
-			all_mapped = 1;
-			rval = 0;
-			goto mapit;
-		}
-		return 0;
-	}
+	unsigned short		page_count, i;
+	pgoff_t			first;
+	loff_t			end;
+	int			error;
 
-	/* Ensure pb_pages field has been initialised */
-	rval = _pagebuf_get_pages(pb, page_count, flags);
-	if (rval)
-		return rval;
+	end = bp->pb_file_offset + bp->pb_buffer_length;
+	page_count = page_buf_btoc(end) - page_buf_btoct(bp->pb_file_offset);
 
-	all_mapped = 1;
-	blocksize = pb->pb_target->pbr_bsize;
-	sectorshift = pb->pb_target->pbr_sshift;
-	size = pb->pb_count_desired;
-	offset = pb->pb_offset;
-
-	/* Enter the pages in the page list */
-	index = (pb->pb_file_offset - pb->pb_offset) >> PAGE_CACHE_SHIFT;
-	for (pi = 0; pi < page_count; pi++, index++) {
-		if (pb->pb_pages[pi] == 0) {
-			retries = 0;
-		      retry:
-			page = find_or_create_page(aspace, index, gfp_mask);
-			if (!page) {
-				if (flags & PBF_READ_AHEAD)
-					return -ENOMEM;
-				/*
-				 * This could deadlock.  But until all the
-				 * XFS lowlevel code is revamped to handle
-				 * buffer allocation failures we can't do
-				 * much.
-				 */
-				if (!(++retries % 100)) {
-					printk(KERN_ERR
-					       "possibly deadlocking in %s\n",
-					       __FUNCTION__);
-				}
-				XFS_STATS_INC(pb_page_retries);
-				pagebuf_daemon_wakeup();
-				current->state = TASK_UNINTERRUPTIBLE;
-				schedule_timeout(10);
-				goto retry;
+	error = _pagebuf_get_pages(bp, page_count, flags);
+	if (unlikely(error))
+		return error;
+
+	offset = bp->pb_offset;
+	first = bp->pb_file_offset >> PAGE_CACHE_SHIFT;
+
+	for (i = 0; i < bp->pb_page_count; i++) {
+		struct page	*page;
+		uint		retries = 0;
+
+	      retry:
+		page = find_or_create_page(mapping, first + i, gfp_mask);
+		if (unlikely(page == NULL)) {
+			if (flags & PBF_READ_AHEAD)
+				return -ENOMEM;
+
+			/*
+			 * This could deadlock.
+			 *
+			 * But until all the XFS lowlevel code is revamped to
+			 * handle buffer allocation failures we can't do much.
+			 */
+			if (!(++retries % 100)) {
+				printk(KERN_ERR "possibly deadlocking in %s\n",
+						__FUNCTION__);
 			}
-			XFS_STATS_INC(pb_page_found);
-			mark_page_accessed(page);
-			pb->pb_pages[pi] = page;
-		} else {
-			page = pb->pb_pages[pi];
-			lock_page(page);
+
+			XFS_STATS_INC(pb_page_retries);
+			pagebuf_daemon_wakeup();
+			current->state = TASK_UNINTERRUPTIBLE;
+			schedule_timeout(10);
+			goto retry;
 		}
 
-		nbytes = PAGE_CACHE_SIZE - offset;
-		if (nbytes > size)
-			nbytes = size;
+		XFS_STATS_INC(pb_page_found);
+
+		nbytes = min_t(size_t, size, PAGE_CACHE_SIZE - offset);
 		size -= nbytes;
 
 		if (!PageUptodate(page)) {
+			page_count--;
 			if (blocksize == PAGE_CACHE_SIZE) {
 				if (flags & PBF_READ)
-					pb->pb_locked = 1;
-				good_pages--;
+					bp->pb_locked = 1;
 			} else if (!PagePrivate(page)) {
-				unsigned long	i, range;
+				unsigned long	j, range;
 
 				/*
 				 * In this case page->private holds a bitmap
@@ -457,60 +411,62 @@ _pagebuf_lookup_pages(
 				 */
 				ASSERT(blocksize < PAGE_CACHE_SIZE);
 				range = (offset + nbytes) >> sectorshift;
-				for (i = offset >> sectorshift; i < range; i++)
-					if (!test_bit(i, &page->private))
+				for (j = offset >> sectorshift; j < range; j++)
+					if (!test_bit(j, &page->private))
 						break;
-				if (i != range)
-					good_pages--;
-			} else {
-				good_pages--;
+				if (j == range)
+					page_count++;
 			}
 		}
+
+		bp->pb_pages[i] = page;
 		offset = 0;
 	}
 
-	if (!pb->pb_locked) {
-		for (pi = 0; pi < page_count; pi++) {
-			if (pb->pb_pages[pi])
-				unlock_page(pb->pb_pages[pi]);
-		}
+	if (!bp->pb_locked) {
+		for (i = 0; i < bp->pb_page_count; i++)
+			unlock_page(bp->pb_pages[i]);
 	}
 
-	pb->pb_flags |= _PBF_PAGECACHE;
-mapit:
-	pb->pb_flags |= _PBF_MEM_ALLOCATED;
-	if (all_mapped) {
-		pb->pb_flags |= _PBF_ALL_PAGES_MAPPED;
-
-		/* A single page buffer is always mappable */
-		if (page_count == 1) {
-			pb->pb_addr = (caddr_t)
-				page_address(pb->pb_pages[0]) + pb->pb_offset;
-			pb->pb_flags |= PBF_MAPPED;
-		} else if (flags & PBF_MAPPED) {
-			if (as_list_len > 64)
-				purge_addresses();
-			pb->pb_addr = vmap(pb->pb_pages, page_count,
-					VM_MAP, PAGE_KERNEL);
-			if (pb->pb_addr == NULL)
-				return -ENOMEM;
-			pb->pb_addr += pb->pb_offset;
-			pb->pb_flags |= PBF_MAPPED | _PBF_ADDR_ALLOCATED;
-		}
-	}
-	/* If some pages were found with data in them
-	 * we are not in PBF_NONE state.
-	 */
-	if (good_pages != 0) {
-		pb->pb_flags &= ~(PBF_NONE);
-		if (good_pages != page_count) {
-			pb->pb_flags |= PBF_PARTIAL;
-		}
+	bp->pb_flags |= _PBF_PAGE_CACHE;
+
+	if (page_count) {
+		/* if we have any uptodate pages, mark that in the buffer */
+		bp->pb_flags &= ~PBF_NONE;
+
+		/* if some pages aren't uptodate, mark that in the buffer */
+		if (page_count != bp->pb_page_count)
+			bp->pb_flags |= PBF_PARTIAL;
 	}
 
-	PB_TRACE(pb, "lookup_pages", (long)good_pages);
+	PB_TRACE(bp, "lookup_pages", (long)page_count);
+	return error;
+}
 
-	return rval;
+/*
+ *	Map buffer into kernel address-space if nessecary.
+ */
+STATIC int
+_pagebuf_map_pages(
+	xfs_buf_t		*bp,
+	uint			flags)
+{
+	/* A single page buffer is always mappable */
+	if (bp->pb_page_count == 1) {
+		bp->pb_addr = page_address(bp->pb_pages[0]) + bp->pb_offset;
+		bp->pb_flags |= PBF_MAPPED;
+	} else if (flags & PBF_MAPPED) {
+		if (as_list_len > 64)
+			purge_addresses();
+		bp->pb_addr = vmap(bp->pb_pages, bp->pb_page_count,
+				VM_MAP, PAGE_KERNEL);
+		if (unlikely(bp->pb_addr == NULL))
+			return -ENOMEM;
+		bp->pb_addr += bp->pb_offset;
+		bp->pb_flags |= PBF_MAPPED;
+	}
+
+	return 0;
 }
 
 /*
@@ -527,20 +483,19 @@ mapit:
  *	which may imply that this call will block until those buffers
  *	are unlocked.  No I/O is implied by this call.
  */
-STATIC page_buf_t *
+STATIC xfs_buf_t *
 _pagebuf_find(				/* find buffer for block	*/
-	pb_target_t		*target,/* target for block		*/
+	xfs_buftarg_t		*target,/* target for block		*/
 	loff_t			ioff,	/* starting offset of range	*/
 	size_t			isize,	/* length of range		*/
 	page_buf_flags_t	flags,	/* PBF_TRYLOCK			*/
-	page_buf_t		*new_pb)/* newly allocated buffer	*/
+	xfs_buf_t		*new_pb)/* newly allocated buffer	*/
 {
 	loff_t			range_base;
 	size_t			range_length;
 	int			hval;
 	pb_hash_t		*h;
-	struct list_head	*p;
-	page_buf_t		*pb;
+	xfs_buf_t		*pb, *n;
 	int			not_locked;
 
 	range_base = (ioff << BBSHIFT);
@@ -556,9 +511,7 @@ _pagebuf_find(				/* find buffer for block	*/
 	h = &pbhash[hval];
 
 	spin_lock(&h->pb_hash_lock);
-	list_for_each(p, &h->pb_hash) {
-		pb = list_entry(p, page_buf_t, pb_hash_list);
-
+	list_for_each_entry_safe(pb, n, &h->pb_hash, pb_hash_list) {
 		if (pb->pb_target == target &&
 		    pb->pb_file_offset == range_base &&
 		    pb->pb_buffer_length == range_length) {
@@ -616,11 +569,7 @@ found:
 	}
 
 	if (pb->pb_flags & PBF_STALE)
-		pb->pb_flags &= PBF_MAPPED | \
-				_PBF_ALL_PAGES_MAPPED | \
-				_PBF_ADDR_ALLOCATED | \
-				_PBF_MEM_ALLOCATED | \
-				_PBF_MEM_SLAB;
+		pb->pb_flags &= PBF_MAPPED;
 	PB_TRACE(pb, "got_lock", 0);
 	XFS_STATS_INC(pb_get_locked);
 	return (pb);
@@ -637,10 +586,10 @@ found:
  *	pages are present in the buffer, not all of every page may be
  *	valid.
  */
-page_buf_t *
+xfs_buf_t *
 pagebuf_find(				/* find buffer for block	*/
 					/* if the block is in memory	*/
-	pb_target_t		*target,/* target for block		*/
+	xfs_buftarg_t		*target,/* target for block		*/
 	loff_t			ioff,	/* starting offset of range	*/
 	size_t			isize,	/* length of range		*/
 	page_buf_flags_t	flags)	/* PBF_TRYLOCK			*/
@@ -657,37 +606,48 @@ pagebuf_find(				/* find buffer for block	*/
  *	although backing storage may not be.  If PBF_READ is set in
  *	flags, pagebuf_iostart is called also.
  */
-page_buf_t *
+xfs_buf_t *
 pagebuf_get(				/* allocate a buffer		*/
-	pb_target_t		*target,/* target for buffer		*/
+	xfs_buftarg_t		*target,/* target for buffer		*/
 	loff_t			ioff,	/* starting offset of range	*/
 	size_t			isize,	/* length of range		*/
 	page_buf_flags_t	flags)	/* PBF_TRYLOCK			*/
 {
-	page_buf_t		*pb, *new_pb;
-	int			error;
+	xfs_buf_t		*pb, *new_pb;
+	int			error = 0, i;
 
 	new_pb = pagebuf_allocate(flags);
 	if (unlikely(!new_pb))
-		return (NULL);
+		return NULL;
 
 	pb = _pagebuf_find(target, ioff, isize, flags, new_pb);
-	if (pb != new_pb) {
+	if (pb == new_pb) {
+		error = _pagebuf_lookup_pages(pb, flags);
+		if (unlikely(error)) {
+			printk(KERN_WARNING
+			       "pagebuf_get: failed to lookup pages\n");
+			goto no_buffer;
+		}
+	} else {
 		pagebuf_deallocate(new_pb);
-		if (unlikely(!pb))
-			return (NULL);
+		if (unlikely(pb == NULL))
+			return NULL;
 	}
 
-	XFS_STATS_INC(pb_get);
+	for (i = 0; i < pb->pb_page_count; i++)
+		mark_page_accessed(pb->pb_pages[i]);
 
-	/* fill in any missing pages */
-	error = _pagebuf_lookup_pages(pb, pb->pb_target->pbr_mapping, flags);
-	if (unlikely(error)) {
-		printk(KERN_WARNING
-			"pagebuf_get: warning, failed to lookup pages\n");
-		goto no_buffer;
+	if (!(pb->pb_flags & PBF_MAPPED)) {
+		error = _pagebuf_map_pages(pb, flags);
+		if (unlikely(error)) {
+			printk(KERN_WARNING
+			       "pagebuf_get: failed to map pages\n");
+			goto no_buffer;
+		}
 	}
 
+	XFS_STATS_INC(pb_get);
+
 	/*
 	 * Always fill in the block number now, the mapped cases can do
 	 * their own overlay of this later.
@@ -728,14 +688,14 @@ no_buffer:
 /*
  * Create a skeletal pagebuf (no pages associated with it).
  */
-page_buf_t *
+xfs_buf_t *
 pagebuf_lookup(
-	struct pb_target	*target,
+	xfs_buftarg_t		*target,
 	loff_t			ioff,
 	size_t			isize,
 	page_buf_flags_t	flags)
 {
-	page_buf_t		*pb;
+	xfs_buf_t		*pb;
 
 	pb = pagebuf_allocate(flags);
 	if (pb) {
@@ -750,7 +710,7 @@ pagebuf_lookup(
  */
 void
 pagebuf_readahead(
-	pb_target_t		*target,
+	xfs_buftarg_t		*target,
 	loff_t			ioff,
 	size_t			isize,
 	page_buf_flags_t	flags)
@@ -767,12 +727,12 @@ pagebuf_readahead(
 	pagebuf_get(target, ioff, isize, flags);
 }
 
-page_buf_t *
+xfs_buf_t *
 pagebuf_get_empty(
 	size_t			len,
-	pb_target_t		*target)
+	xfs_buftarg_t		*target)
 {
-	page_buf_t		*pb;
+	xfs_buf_t		*pb;
 
 	pb = pagebuf_allocate(0);
 	if (pb)
@@ -794,7 +754,7 @@ mem_to_page(
 
 int
 pagebuf_associate_memory(
-	page_buf_t		*pb,
+	xfs_buf_t		*pb,
 	void			*mem,
 	size_t			len)
 {
@@ -811,9 +771,9 @@ pagebuf_associate_memory(
 		page_count++;
 
 	/* Free any previous set of page pointers */
-	if (pb->pb_pages && (pb->pb_pages != pb->pb_page_array)) {
-		kfree(pb->pb_pages);
-	}
+	if (pb->pb_pages)
+		_pagebuf_free_pages(pb);
+
 	pb->pb_pages = NULL;
 	pb->pb_addr = mem;
 
@@ -843,54 +803,54 @@ pagebuf_associate_memory(
 	return 0;
 }
 
-page_buf_t *
+xfs_buf_t *
 pagebuf_get_no_daddr(
 	size_t			len,
-	pb_target_t		*target)
+	xfs_buftarg_t		*target)
 {
-	int			rval;
-	void			*rmem = NULL;
-	page_buf_flags_t	flags = PBF_FORCEIO;
-	page_buf_t		*pb;
-	size_t			tlen = 0;
+	size_t			malloc_len = len;
+	xfs_buf_t		*bp;
+	void			*data;
+	int			error;
 
 	if (unlikely(len > 0x20000))
-		return NULL;
-
-	pb = pagebuf_allocate(flags);
-	if (!pb)
-		return NULL;
-
-	_pagebuf_initialize(pb, target, 0, len, flags);
-
-	do {
-		if (tlen == 0) {
-			tlen = len; /* first time */
-		} else {
-			kfree(rmem); /* free the mem from the previous try */
-			tlen <<= 1; /* double the size and try again */
-		}
-		if ((rmem = kmalloc(tlen, GFP_KERNEL)) == 0) {
-			pagebuf_free(pb);
-			return NULL;
-		}
-	} while ((size_t)rmem != ((size_t)rmem & ~target->pbr_smask));
-
-	if ((rval = pagebuf_associate_memory(pb, rmem, len)) != 0) {
-		kfree(rmem);
-		pagebuf_free(pb);
-		return NULL;
+		goto fail;
+
+	bp = pagebuf_allocate(0);
+	if (unlikely(bp == NULL))
+		goto fail;
+	_pagebuf_initialize(bp, target, 0, len, PBF_FORCEIO);
+
+ try_again:
+	data = kmem_alloc(malloc_len, KM_SLEEP);
+	if (unlikely(data == NULL))
+		goto fail_free_buf;
+
+	/* check whether alignment matches.. */
+	if ((__psunsigned_t)data !=
+	    ((__psunsigned_t)data & ~target->pbr_smask)) {
+		/* .. else double the size and try again */
+		kmem_free(data, malloc_len);
+		malloc_len <<= 1;
+		goto try_again;
 	}
-	/* otherwise pagebuf_free just ignores it */
-	pb->pb_flags |= (_PBF_MEM_ALLOCATED | _PBF_MEM_SLAB);
-	PB_CLEAR_OWNER(pb);
-	up(&pb->pb_sema);	/* Return unlocked pagebuf */
 
-	PB_TRACE(pb, "no_daddr", rmem);
+	error = pagebuf_associate_memory(bp, data, len);
+	if (error)
+		goto fail_free_mem;
+	bp->pb_flags |= _PBF_KMEM_ALLOC;
 
-	return pb;
-}
+	pagebuf_unlock(bp);
 
+	PB_TRACE(bp, "no_daddr", data);
+	return bp;
+ fail_free_mem:
+	kmem_free(data, malloc_len);
+ fail_free_buf:
+	pagebuf_free(bp);
+ fail:
+	return NULL;
+}
 
 /*
  *	pagebuf_hold
@@ -902,7 +862,7 @@ pagebuf_get_no_daddr(
  */
 void
 pagebuf_hold(
-	page_buf_t		*pb)
+	xfs_buf_t		*pb)
 {
 	atomic_inc(&pb->pb_hold);
 	PB_TRACE(pb, "hold", 0);
@@ -916,7 +876,7 @@ pagebuf_hold(
  */
 void
 pagebuf_rele(
-	page_buf_t		*pb)
+	xfs_buf_t		*pb)
 {
 	pb_hash_t		*hash = pb_hash(pb);
 
@@ -975,7 +935,7 @@ pagebuf_rele(
 int
 pagebuf_cond_lock(			/* lock buffer, if not locked	*/
 					/* returns -EBUSY if locked)	*/
-	page_buf_t		*pb)
+	xfs_buf_t		*pb)
 {
 	int			locked;
 
@@ -994,7 +954,7 @@ pagebuf_cond_lock(			/* lock buffer, if not locked	*/
  */
 int
 pagebuf_lock_value(
-	page_buf_t		*pb)
+	xfs_buf_t		*pb)
 {
 	return(atomic_read(&pb->pb_sema.count));
 }
@@ -1009,7 +969,7 @@ pagebuf_lock_value(
  */
 int
 pagebuf_lock(
-	page_buf_t		*pb)
+	xfs_buf_t		*pb)
 {
 	PB_TRACE(pb, "lock", 0);
 	if (atomic_read(&pb->pb_io_remaining))
@@ -1029,7 +989,7 @@ pagebuf_lock(
  */
 void
 pagebuf_unlock(				/* unlock buffer		*/
-	page_buf_t		*pb)	/* buffer to unlock		*/
+	xfs_buf_t		*pb)	/* buffer to unlock		*/
 {
 	PB_CLEAR_OWNER(pb);
 	up(&pb->pb_sema);
@@ -1057,7 +1017,7 @@ pagebuf_unlock(				/* unlock buffer		*/
  */
 void
 pagebuf_pin(
-	page_buf_t		*pb)
+	xfs_buf_t		*pb)
 {
 	atomic_inc(&pb->pb_pin_count);
 	PB_TRACE(pb, "pin", (long)pb->pb_pin_count.counter);
@@ -1072,7 +1032,7 @@ pagebuf_pin(
  */
 void
 pagebuf_unpin(
-	page_buf_t		*pb)
+	xfs_buf_t		*pb)
 {
 	if (atomic_dec_and_test(&pb->pb_pin_count)) {
 		wake_up_all(&pb->pb_waiters);
@@ -1082,7 +1042,7 @@ pagebuf_unpin(
 
 int
 pagebuf_ispin(
-	page_buf_t		*pb)
+	xfs_buf_t		*pb)
 {
 	return atomic_read(&pb->pb_pin_count);
 }
@@ -1096,7 +1056,7 @@ pagebuf_ispin(
  */
 static inline void
 _pagebuf_wait_unpin(
-	page_buf_t		*pb)
+	xfs_buf_t		*pb)
 {
 	DECLARE_WAITQUEUE	(wait, current);
 
@@ -1131,23 +1091,17 @@ void
 pagebuf_iodone_work(
 	void			*v)
 {
-	page_buf_t		*pb = (page_buf_t *)v;
+	xfs_buf_t		*bp = (xfs_buf_t *)v;
 
-	if (pb->pb_iodone) {
-		(*(pb->pb_iodone)) (pb);
-		return;
-	}
-
-	if (pb->pb_flags & PBF_ASYNC) {
-		if (!pb->pb_relse)
-			pagebuf_unlock(pb);
-		pagebuf_rele(pb);
-	}
+	if (bp->pb_iodone)
+		(*(bp->pb_iodone))(bp);
+	else if (bp->pb_flags & PBF_ASYNC)
+		xfs_buf_relse(bp);
 }
 
 void
 pagebuf_iodone(
-	page_buf_t		*pb,
+	xfs_buf_t		*pb,
 	int			dataio,
 	int			schedule)
 {
@@ -1178,10 +1132,11 @@ pagebuf_iodone(
  */
 void
 pagebuf_ioerror(			/* mark/clear buffer error flag */
-	page_buf_t		*pb,	/* buffer to mark		*/
-	unsigned int		error)	/* error to store (0 if none)	*/
+	xfs_buf_t		*pb,	/* buffer to mark		*/
+	int			error)	/* error to store (0 if none)	*/
 {
-	pb->pb_error = error;
+	ASSERT(error >= 0 && error <= 0xffff);
+	pb->pb_error = (unsigned short)error;
 	PB_TRACE(pb, "ioerror", (unsigned long)error);
 }
 
@@ -1199,7 +1154,7 @@ pagebuf_ioerror(			/* mark/clear buffer error flag */
  */
 int
 pagebuf_iostart(			/* start I/O on a buffer	  */
-	page_buf_t		*pb,	/* buffer to start		  */
+	xfs_buf_t		*pb,	/* buffer to start		  */
 	page_buf_flags_t	flags)	/* PBF_LOCK, PBF_ASYNC, PBF_READ, */
 					/* PBF_WRITE, PBF_DELWRI,	  */
 					/* PBF_DONT_BLOCK		  */
@@ -1216,11 +1171,11 @@ pagebuf_iostart(			/* start I/O on a buffer	  */
 	}
 
 	pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | PBF_DELWRI | \
-			PBF_READ_AHEAD | PBF_RUN_QUEUES);
+			PBF_READ_AHEAD | _PBF_RUN_QUEUES);
 	pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \
-			PBF_READ_AHEAD | PBF_RUN_QUEUES);
+			PBF_READ_AHEAD | _PBF_RUN_QUEUES);
 
-	BUG_ON(pb->pb_bn == PAGE_BUF_DADDR_NULL);
+	BUG_ON(pb->pb_bn == XFS_BUF_DADDR_NULL);
 
 	/* For writes allow an alternate strategy routine to precede
 	 * the actual I/O request (which may not be issued at all in
@@ -1246,7 +1201,7 @@ pagebuf_iostart(			/* start I/O on a buffer	  */
 
 STATIC __inline__ int
 _pagebuf_iolocked(
-	page_buf_t		*pb)
+	xfs_buf_t		*pb)
 {
 	ASSERT(pb->pb_flags & (PBF_READ|PBF_WRITE));
 	if (pb->pb_flags & PBF_READ)
@@ -1256,7 +1211,7 @@ _pagebuf_iolocked(
 
 STATIC __inline__ void
 _pagebuf_iodone(
-	page_buf_t		*pb,
+	xfs_buf_t		*pb,
 	int			schedule)
 {
 	if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
@@ -1271,7 +1226,7 @@ bio_end_io_pagebuf(
 	unsigned int		bytes_done,
 	int			error)
 {
-	page_buf_t		*pb = (page_buf_t *)bio->bi_private;
+	xfs_buf_t		*pb = (xfs_buf_t *)bio->bi_private;
 	unsigned int		i, blocksize = pb->pb_target->pbr_bsize;
 	unsigned int		sectorshift = pb->pb_target->pbr_sshift;
 	struct bio_vec		*bvec = bio->bi_io_vec;
@@ -1289,8 +1244,9 @@ bio_end_io_pagebuf(
 			SetPageError(page);
 		} else if (blocksize == PAGE_CACHE_SIZE) {
 			SetPageUptodate(page);
-		} else if (!PagePrivate(page)) {
-			unsigned int	j, range;
+		} else if (!PagePrivate(page) &&
+				(pb->pb_flags & _PBF_PAGE_CACHE)) {
+			unsigned long	j, range;
 
 			ASSERT(blocksize < PAGE_CACHE_SIZE);
 			range = (bvec->bv_offset + bvec->bv_len) >> sectorshift;
@@ -1312,7 +1268,7 @@ bio_end_io_pagebuf(
 
 void
 _pagebuf_ioapply(
-	page_buf_t		*pb)
+	xfs_buf_t		*pb)
 {
 	int			i, map_i, total_nr_pages, nr_pages;
 	struct bio		*bio;
@@ -1404,34 +1360,19 @@ submit_io:
 		pagebuf_ioerror(pb, EIO);
 	}
 
-	if (pb->pb_flags & PBF_RUN_QUEUES) {
-		pb->pb_flags &= ~PBF_RUN_QUEUES;
+	if (pb->pb_flags & _PBF_RUN_QUEUES) {
+		pb->pb_flags &= ~_PBF_RUN_QUEUES;
 		if (atomic_read(&pb->pb_io_remaining) > 1)
 			blk_run_address_space(pb->pb_target->pbr_mapping);
 	}
 }
 
 /*
- *	pagebuf_iorequest
- *
- *	pagebuf_iorequest is the core I/O request routine.
- *	It assumes that the buffer is well-formed and
- *	mapped and ready for physical I/O, unlike
- *	pagebuf_iostart() and pagebuf_iophysio().  Those
- *	routines call the pagebuf_ioinitiate routine to start I/O,
- *	if it is present, or else call pagebuf_iorequest()
- *	directly if the pagebuf_ioinitiate routine is not present.
- *
- *	This function will be responsible for ensuring access to the
- *	pages is restricted whilst I/O is in progress - for locking
- *	pagebufs the pagebuf lock is the mediator, for non-locking
- *	pagebufs the pages will be locked. In the locking case we
- *	need to use the pagebuf lock as multiple meta-data buffers
- *	will reference the same page.
+ *	pagebuf_iorequest -- the core I/O request routine.
  */
 int
 pagebuf_iorequest(			/* start real I/O		*/
-	page_buf_t		*pb)	/* buffer to convey to device	*/
+	xfs_buf_t		*pb)	/* buffer to convey to device	*/
 {
 	PB_TRACE(pb, "iorequest", 0);
 
@@ -1467,7 +1408,7 @@ pagebuf_iorequest(			/* start real I/O		*/
  */
 int
 pagebuf_iowait(
-	page_buf_t		*pb)
+	xfs_buf_t		*pb)
 {
 	PB_TRACE(pb, "iowait", 0);
 	if (atomic_read(&pb->pb_io_remaining))
@@ -1477,28 +1418,9 @@ pagebuf_iowait(
 	return pb->pb_error;
 }
 
-STATIC void *
-pagebuf_mapout_locked(
-	page_buf_t		*pb)
-{
-	void			*old_addr = NULL;
-
-	if (pb->pb_flags & PBF_MAPPED) {
-		if (pb->pb_flags & _PBF_ADDR_ALLOCATED)
-			old_addr = pb->pb_addr - pb->pb_offset;
-		pb->pb_addr = NULL;
-		pb->pb_flags &= ~(PBF_MAPPED | _PBF_ADDR_ALLOCATED);
-	}
-
-	return old_addr;	/* Caller must free the address space,
-				 * we are under a spin lock, probably
-				 * not safe to do vfree here
-				 */
-}
-
 caddr_t
 pagebuf_offset(
-	page_buf_t		*pb,
+	xfs_buf_t		*pb,
 	size_t			offset)
 {
 	struct page		*page;
@@ -1516,7 +1438,7 @@ pagebuf_offset(
  */
 void
 pagebuf_iomove(
-	page_buf_t		*pb,	/* buffer to process		*/
+	xfs_buf_t		*pb,	/* buffer to process		*/
 	size_t			boff,	/* starting buffer offset	*/
 	size_t			bsize,	/* length to copy		*/
 	caddr_t			data,	/* data address			*/
@@ -1560,10 +1482,12 @@ STATIC spinlock_t pbd_delwrite_lock = SPIN_LOCK_UNLOCKED;
 
 STATIC void
 pagebuf_delwri_queue(
-	page_buf_t		*pb,
+	xfs_buf_t		*pb,
 	int			unlock)
 {
 	PB_TRACE(pb, "delwri_q", (long)unlock);
+	ASSERT(pb->pb_flags & PBF_DELWRI);
+
 	spin_lock(&pbd_delwrite_lock);
 	/* If already in the queue, dequeue and place at tail */
 	if (!list_empty(&pb->pb_list)) {
@@ -1574,7 +1498,7 @@ pagebuf_delwri_queue(
 	}
 
 	list_add_tail(&pb->pb_list, &pbd_delwrite_queue);
-	pb->pb_flushtime = jiffies + xfs_age_buffer;
+	pb->pb_queuetime = jiffies;
 	spin_unlock(&pbd_delwrite_lock);
 
 	if (unlock)
@@ -1583,7 +1507,7 @@ pagebuf_delwri_queue(
 
 void
 pagebuf_delwri_dequeue(
-	page_buf_t		*pb)
+	xfs_buf_t		*pb)
 {
 	PB_TRACE(pb, "delwri_uq", 0);
 	spin_lock(&pbd_delwrite_lock);
@@ -1617,8 +1541,8 @@ STATIC int
 pagebuf_daemon(
 	void			*data)
 {
-	page_buf_t		*pb;
-	struct list_head	*curr, *next, tmp;
+	struct list_head	tmp;
+	xfs_buf_t		*pb, *n;
 
 	/*  Set up the thread  */
 	daemonize("xfsbufd");
@@ -1638,16 +1562,15 @@ pagebuf_daemon(
 		schedule_timeout(xfs_flush_interval);
 
 		spin_lock(&pbd_delwrite_lock);
-
-		list_for_each_safe(curr, next, &pbd_delwrite_queue) {
-			pb = list_entry(curr, page_buf_t, pb_list);
-
+		list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
 			PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
+			ASSERT(pb->pb_flags & PBF_DELWRI);
 
-			if ((pb->pb_flags & PBF_DELWRI) &&
-			     !pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
+			if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
 				if (!force_flush &&
-				    time_before(jiffies, pb->pb_flushtime)) {
+				    time_before(jiffies,
+						pb->pb_queuetime +
+						xfs_age_buffer)) {
 					pagebuf_unlock(pb);
 					break;
 				}
@@ -1657,12 +1580,11 @@ pagebuf_daemon(
 				list_move(&pb->pb_list, &tmp);
 			}
 		}
-
 		spin_unlock(&pbd_delwrite_lock);
+
 		while (!list_empty(&tmp)) {
-			pb = list_entry(tmp.next, page_buf_t, pb_list);
+			pb = list_entry(tmp.next, xfs_buf_t, pb_list);
 			list_del_init(&pb->pb_list);
-
 			pagebuf_iostrategy(pb);
 			blk_run_address_space(pb->pb_target->pbr_mapping);
 		}
@@ -1678,32 +1600,25 @@ pagebuf_daemon(
 
 void
 pagebuf_delwri_flush(
-	pb_target_t		*target,
-	u_long			flags,
+	xfs_buftarg_t		*target,
+	int			wait,
 	int			*pinptr)
 {
-	page_buf_t		*pb;
-	struct list_head	*curr, *next, tmp;
+	struct list_head	tmp;
+	xfs_buf_t		*pb, *n;
 	int			pincount = 0;
 
 	pagebuf_runall_queues(pagebuf_dataio_workqueue);
 	pagebuf_runall_queues(pagebuf_logio_workqueue);
 
-	spin_lock(&pbd_delwrite_lock);
 	INIT_LIST_HEAD(&tmp);
+	spin_lock(&pbd_delwrite_lock);
+	list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
 
-	list_for_each_safe(curr, next, &pbd_delwrite_queue) {
-		pb = list_entry(curr, page_buf_t, pb_list);
-
-		/*
-		 * Skip other targets, markers and in progress buffers
-		 */
-
-		if ((pb->pb_flags == 0) || (pb->pb_target != target) ||
-		    !(pb->pb_flags & PBF_DELWRI)) {
+		if (pb->pb_target != target)
 			continue;
-		}
 
+		ASSERT(pb->pb_flags & PBF_DELWRI);
 		PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
 		if (pagebuf_ispin(pb)) {
 			pincount++;
@@ -1714,33 +1629,33 @@ pagebuf_delwri_flush(
 		pb->pb_flags |= PBF_WRITE;
 		list_move(&pb->pb_list, &tmp);
 	}
-	/* ok found all the items that can be worked on 
-	 * drop the lock and process the private list */
 	spin_unlock(&pbd_delwrite_lock);
 
-	list_for_each_safe(curr, next, &tmp) {
-		pb = list_entry(curr, page_buf_t, pb_list);
-
-		if (flags & PBDF_WAIT)
+	/*
+	 * Dropped the delayed write list lock, now walk the temporary list
+	 */
+	list_for_each_entry_safe(pb, n, &tmp, pb_list) {
+		if (wait)
 			pb->pb_flags &= ~PBF_ASYNC;
 		else
-			list_del_init(curr);
+			list_del_init(&pb->pb_list);
 
 		pagebuf_lock(pb);
 		pagebuf_iostrategy(pb);
 	}
 
+	/*
+	 * Remaining list items must be flushed before returning
+	 */
 	while (!list_empty(&tmp)) {
-		pb = list_entry(tmp.next, page_buf_t, pb_list);
+		pb = list_entry(tmp.next, xfs_buf_t, pb_list);
 
 		list_del_init(&pb->pb_list);
-		pagebuf_iowait(pb);
-		if (!pb->pb_relse)
-			pagebuf_unlock(pb);
-		pagebuf_rele(pb);
+		xfs_iowait(pb);
+		xfs_buf_relse(pb);
 	}
 
-	if (flags & PBDF_WAIT)
+	if (wait)
 		blk_run_address_space(target->pbr_mapping);
 
 	if (pinptr)
@@ -1796,7 +1711,7 @@ pagebuf_init(void)
 {
 	int			i;
 
-	pagebuf_cache = kmem_cache_create("page_buf_t", sizeof(page_buf_t), 0,
+	pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0,
 			SLAB_HWCACHE_ALIGN, NULL, NULL);
 	if (pagebuf_cache == NULL) {
 		printk("pagebuf: couldn't init pagebuf cache\n");
diff --git a/fs/xfs/linux/xfs_buf.h b/fs/xfs/linux/xfs_buf.h
index d58384ebebf9..16becc722551 100644
--- a/fs/xfs/linux/xfs_buf.h
+++ b/fs/xfs/linux/xfs_buf.h
@@ -51,10 +51,7 @@
  *	Base types
  */
 
-/* daddr must be signed since -1 is used for bmaps that are not yet allocated */
-typedef loff_t page_buf_daddr_t;
-
-#define PAGE_BUF_DADDR_NULL ((page_buf_daddr_t) (-1LL))
+#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
 
 #define page_buf_ctob(pp)	((pp) * PAGE_CACHE_SIZE)
 #define page_buf_btoc(dd)	(((dd) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT)
@@ -76,44 +73,39 @@ typedef enum page_buf_flags_e {		/* pb_flags values */
 	PBF_ASYNC = (1 << 4),   /* initiator will not wait for completion  */
 	PBF_NONE = (1 << 5),    /* buffer not read at all                  */
 	PBF_DELWRI = (1 << 6),  /* buffer has dirty pages                  */
-	PBF_STALE = (1 << 10),	/* buffer has been staled, do not find it  */
-	PBF_FS_MANAGED = (1 << 11), /* filesystem controls freeing memory  */
-	PBF_FS_DATAIOD = (1 << 12), /* schedule IO completion on fs datad  */
+	PBF_STALE = (1 << 7),	/* buffer has been staled, do not find it  */
+	PBF_FS_MANAGED = (1 << 8),  /* filesystem controls freeing memory  */
+	PBF_FS_DATAIOD = (1 << 9),  /* schedule IO completion on fs datad  */
+	PBF_FORCEIO = (1 << 10),    /* ignore any cache state		   */
+	PBF_FLUSH = (1 << 11),	    /* flush disk write cache		   */
+	PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead		   */
 
 	/* flags used only as arguments to access routines */
-	PBF_LOCK = (1 << 13),	/* lock requested			   */
-	PBF_TRYLOCK = (1 << 14), /* lock requested, but do not wait	   */
-	PBF_DONT_BLOCK = (1 << 15), /* do not block in current thread	   */
+	PBF_LOCK = (1 << 14),       /* lock requested			   */
+	PBF_TRYLOCK = (1 << 15),    /* lock requested, but do not wait	   */
+	PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread	   */
 
 	/* flags used only internally */
-	_PBF_PAGECACHE = (1 << 16),	/* backed by pagecache		   */
-	_PBF_ALL_PAGES_MAPPED = (1 << 18), /* all pages in range mapped	   */
-	_PBF_ADDR_ALLOCATED = (1 << 19), /* pb_addr space was allocated	   */
-	_PBF_MEM_ALLOCATED = (1 << 20), /* underlying pages are allocated  */
-	_PBF_MEM_SLAB = (1 << 21), /* underlying pages are slab allocated  */
-
-	PBF_FORCEIO = (1 << 22), /* ignore any cache state		   */
-	PBF_FLUSH = (1 << 23),	/* flush disk write cache		   */
-	PBF_READ_AHEAD = (1 << 24), /* asynchronous read-ahead		   */
-	PBF_RUN_QUEUES = (1 << 25), /* run block device task queue	   */
-
+	_PBF_PAGE_CACHE = (1 << 17),/* backed by pagecache		   */
+	_PBF_KMEM_ALLOC = (1 << 18),/* backed by kmem_alloc()		   */
+	_PBF_RUN_QUEUES = (1 << 19),/* run block device task queue	   */
 } page_buf_flags_t;
 
 #define PBF_UPDATE (PBF_READ | PBF_WRITE)
 #define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0)
 #define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0)
 
-typedef struct pb_target {
+typedef struct xfs_buftarg {
 	dev_t			pbr_dev;
 	struct block_device	*pbr_bdev;
 	struct address_space	*pbr_mapping;
 	unsigned int		pbr_bsize;
 	unsigned int		pbr_sshift;
 	size_t			pbr_smask;
-} pb_target_t;
+} xfs_buftarg_t;
 
 /*
- *	page_buf_t:  Buffer structure for page cache-based buffers
+ *	xfs_buf_t:  Buffer structure for page cache-based buffers
  *
  * This buffer structure is used by the page cache buffer management routines
  * to refer to an assembly of pages forming a logical buffer.  The actual
@@ -128,26 +120,26 @@ typedef struct pb_target {
  * to indicate which disk blocks in the page are not valid.
  */
 
-struct page_buf_s;
-typedef void (*page_buf_iodone_t)(struct page_buf_s *);
+struct xfs_buf;
+typedef void (*page_buf_iodone_t)(struct xfs_buf *);
 			/* call-back function on I/O completion */
-typedef void (*page_buf_relse_t)(struct page_buf_s *);
+typedef void (*page_buf_relse_t)(struct xfs_buf *);
 			/* call-back function on I/O completion */
-typedef int (*page_buf_bdstrat_t)(struct page_buf_s *);
+typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);
 
 #define PB_PAGES	4
 
-typedef struct page_buf_s {
+typedef struct xfs_buf {
 	struct semaphore	pb_sema;	/* semaphore for lockables  */
-	unsigned long		pb_flushtime;	/* time to flush pagebuf    */
+	unsigned long		pb_queuetime;	/* time buffer was queued   */
 	atomic_t		pb_pin_count;	/* pin count		    */
 	wait_queue_head_t	pb_waiters;	/* unpin waiters	    */
 	struct list_head	pb_list;
 	page_buf_flags_t	pb_flags;	/* status flags */
 	struct list_head	pb_hash_list;
-	struct pb_target	*pb_target;	/* logical object */
+	xfs_buftarg_t		*pb_target;	/* logical object */
 	atomic_t		pb_hold;	/* reference count */
-	page_buf_daddr_t	pb_bn;		/* block number for I/O */
+	xfs_daddr_t		pb_bn;		/* block number for I/O */
 	loff_t			pb_file_offset;	/* offset in file */
 	size_t			pb_buffer_length; /* size of buffer in bytes */
 	size_t			pb_count_desired; /* desired transfer size */
@@ -171,52 +163,52 @@ typedef struct page_buf_s {
 #ifdef PAGEBUF_LOCK_TRACKING
 	int			pb_last_holder;
 #endif
-} page_buf_t;
+} xfs_buf_t;
 
 
 /* Finding and Reading Buffers */
 
-extern page_buf_t *pagebuf_find(	/* find buffer for block if	*/
+extern xfs_buf_t *pagebuf_find(	/* find buffer for block if	*/
 					/* the block is in memory	*/
-		struct pb_target *,	/* inode for block		*/
+		xfs_buftarg_t *,	/* inode for block		*/
 		loff_t,			/* starting offset of range	*/
 		size_t,			/* length of range		*/
 		page_buf_flags_t);	/* PBF_LOCK			*/
 
-extern page_buf_t *pagebuf_get(		/* allocate a buffer		*/
-		struct pb_target *,	/* inode for buffer		*/
+extern xfs_buf_t *pagebuf_get(		/* allocate a buffer		*/
+		xfs_buftarg_t *,	/* inode for buffer		*/
 		loff_t,			/* starting offset of range     */
 		size_t,			/* length of range              */
 		page_buf_flags_t);	/* PBF_LOCK, PBF_READ,		*/
 					/* PBF_ASYNC			*/
 
-extern page_buf_t *pagebuf_lookup(
-		struct pb_target *,
+extern xfs_buf_t *pagebuf_lookup(
+		xfs_buftarg_t *,
 		loff_t,			/* starting offset of range	*/
 		size_t,			/* length of range		*/
 		page_buf_flags_t);	/* PBF_READ, PBF_WRITE,		*/
 					/* PBF_FORCEIO, 		*/
 
-extern page_buf_t *pagebuf_get_empty(	/* allocate pagebuf struct with	*/
+extern xfs_buf_t *pagebuf_get_empty(	/* allocate pagebuf struct with	*/
 					/*  no memory or disk address	*/
 		size_t len,
-		struct pb_target *);	/* mount point "fake" inode	*/
+		xfs_buftarg_t *);	/* mount point "fake" inode	*/
 
-extern page_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct	*/
+extern xfs_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct	*/
 					/* without disk address		*/
 		size_t len,
-		struct pb_target *);	/* mount point "fake" inode	*/
+		xfs_buftarg_t *);	/* mount point "fake" inode	*/
 
 extern int pagebuf_associate_memory(
-		page_buf_t *,
+		xfs_buf_t *,
 		void *,
 		size_t);
 
 extern void pagebuf_hold(		/* increment reference count	*/
-		page_buf_t *);		/* buffer to hold		*/
+		xfs_buf_t *);		/* buffer to hold		*/
 
 extern void pagebuf_readahead(		/* read ahead into cache	*/
-		struct pb_target  *,	/* target for buffer (or NULL)	*/
+		xfs_buftarg_t  *,	/* target for buffer (or NULL)	*/
 		loff_t,			/* starting offset of range     */
 		size_t,			/* length of range              */
 		page_buf_flags_t);	/* additional read flags	*/
@@ -224,63 +216,63 @@ extern void pagebuf_readahead(		/* read ahead into cache	*/
 /* Releasing Buffers */
 
 extern void pagebuf_free(		/* deallocate a buffer		*/
-		page_buf_t *);		/* buffer to deallocate		*/
+		xfs_buf_t *);		/* buffer to deallocate		*/
 
 extern void pagebuf_rele(		/* release hold on a buffer	*/
-		page_buf_t *);		/* buffer to release		*/
+		xfs_buf_t *);		/* buffer to release		*/
 
 /* Locking and Unlocking Buffers */
 
 extern int pagebuf_cond_lock(		/* lock buffer, if not locked	*/
 					/* (returns -EBUSY if locked)	*/
-		page_buf_t *);		/* buffer to lock		*/
+		xfs_buf_t *);		/* buffer to lock		*/
 
 extern int pagebuf_lock_value(		/* return count on lock		*/
-		page_buf_t *);          /* buffer to check              */
+		xfs_buf_t *);          /* buffer to check              */
 
 extern int pagebuf_lock(		/* lock buffer                  */
-		page_buf_t *);          /* buffer to lock               */
+		xfs_buf_t *);          /* buffer to lock               */
 
 extern void pagebuf_unlock(		/* unlock buffer		*/
-		page_buf_t *);		/* buffer to unlock		*/
+		xfs_buf_t *);		/* buffer to unlock		*/
 
 /* Buffer Read and Write Routines */
 
 extern void pagebuf_iodone(		/* mark buffer I/O complete	*/
-		page_buf_t *,		/* buffer to mark		*/
+		xfs_buf_t *,		/* buffer to mark		*/
 		int,			/* use data/log helper thread.	*/
 		int);			/* run completion locally, or in
 					 * a helper thread.		*/
 
 extern void pagebuf_ioerror(		/* mark buffer in error	(or not) */
-		page_buf_t *,		/* buffer to mark		*/
-		unsigned int);		/* error to store (0 if none)	*/
+		xfs_buf_t *,		/* buffer to mark		*/
+		int);			/* error to store (0 if none)	*/
 
 extern int pagebuf_iostart(		/* start I/O on a buffer	*/
-		page_buf_t *,		/* buffer to start		*/
+		xfs_buf_t *,		/* buffer to start		*/
 		page_buf_flags_t);	/* PBF_LOCK, PBF_ASYNC,		*/
 					/* PBF_READ, PBF_WRITE,		*/
 					/* PBF_DELWRI			*/
 
 extern int pagebuf_iorequest(		/* start real I/O		*/
-		page_buf_t *);		/* buffer to convey to device	*/
+		xfs_buf_t *);		/* buffer to convey to device	*/
 
 extern int pagebuf_iowait(		/* wait for buffer I/O done	*/
-		page_buf_t *);		/* buffer to wait on		*/
+		xfs_buf_t *);		/* buffer to wait on		*/
 
 extern void pagebuf_iomove(		/* move data in/out of pagebuf	*/
-		page_buf_t *,		/* buffer to manipulate		*/
+		xfs_buf_t *,		/* buffer to manipulate		*/
 		size_t,			/* starting buffer offset	*/
 		size_t,			/* length in buffer		*/
 		caddr_t,		/* data pointer			*/
 		page_buf_rw_t);		/* direction			*/
 
-static inline int pagebuf_iostrategy(page_buf_t *pb)
+static inline int pagebuf_iostrategy(xfs_buf_t *pb)
 {
 	return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
 }
 
-static inline int pagebuf_geterror(page_buf_t *pb)
+static inline int pagebuf_geterror(xfs_buf_t *pb)
 {
 	return pb ? pb->pb_error : ENOMEM;
 }
@@ -288,30 +280,24 @@ static inline int pagebuf_geterror(page_buf_t *pb)
 /* Buffer Utility Routines */
 
 extern caddr_t pagebuf_offset(		/* pointer at offset in buffer	*/
-		page_buf_t *,		/* buffer to offset into	*/
+		xfs_buf_t *,		/* buffer to offset into	*/
 		size_t);		/* offset			*/
 
 /* Pinning Buffer Storage in Memory */
 
 extern void pagebuf_pin(		/* pin buffer in memory		*/
-		page_buf_t *);		/* buffer to pin		*/
+		xfs_buf_t *);		/* buffer to pin		*/
 
 extern void pagebuf_unpin(		/* unpin buffered data		*/
-		page_buf_t *);		/* buffer to unpin		*/
+		xfs_buf_t *);		/* buffer to unpin		*/
 
 extern int pagebuf_ispin(		/* check if buffer is pinned	*/
-		page_buf_t *);		/* buffer to check		*/
+		xfs_buf_t *);		/* buffer to check		*/
 
 /* Delayed Write Buffer Routines */
 
-#define PBDF_WAIT    0x01
-extern void pagebuf_delwri_flush(
-		pb_target_t *,
-		unsigned long,
-		int *);
-
-extern void pagebuf_delwri_dequeue(
-		page_buf_t *);
+extern void pagebuf_delwri_flush(xfs_buftarg_t *, int, int *);
+extern void pagebuf_delwri_dequeue(xfs_buf_t *);
 
 /* Buffer Daemon Setup Routines */
 
@@ -322,7 +308,7 @@ extern void pagebuf_terminate(void);
 #ifdef PAGEBUF_TRACE
 extern ktrace_t *pagebuf_trace_buf;
 extern void pagebuf_trace(
-		page_buf_t *,		/* buffer being traced		*/
+		xfs_buf_t *,		/* buffer being traced		*/
 		char *,			/* description of operation	*/
 		void *,			/* arbitrary diagnostic value	*/
 		void *);		/* return address		*/
@@ -369,7 +355,7 @@ extern void pagebuf_trace(
 #define XFS_BUF_MANAGE		PBF_FS_MANAGED
 #define XFS_BUF_UNMANAGE(x)	((x)->pb_flags &= ~PBF_FS_MANAGED)
 
-static inline void xfs_buf_undelay(page_buf_t *pb)
+static inline void xfs_buf_undelay(xfs_buf_t *pb)
 {
 	if (pb->pb_flags & PBF_DELWRI) {
 		if (pb->pb_list.next != &pb->pb_list) {
@@ -423,12 +409,6 @@ static inline void xfs_buf_undelay(page_buf_t *pb)
 
 #define XFS_BUF_BP_ISMAPPED(bp)	 1
 
-typedef struct page_buf_s xfs_buf_t;
-#define xfs_buf page_buf_s
-
-typedef struct pb_target xfs_buftarg_t;
-#define xfs_buftarg pb_target
-
 #define XFS_BUF_DATAIO(x)	((x)->pb_flags |= PBF_FS_DATAIOD)
 #define XFS_BUF_UNDATAIO(x)	((x)->pb_flags &= ~PBF_FS_DATAIOD)
 
@@ -461,7 +441,7 @@ typedef struct pb_target xfs_buftarg_t;
 
 #define XFS_BUF_PTR(bp)		(xfs_caddr_t)((bp)->pb_addr)
 
-extern inline xfs_caddr_t xfs_buf_offset(page_buf_t *bp, size_t offset)
+extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
 {
 	if (bp->pb_flags & PBF_MAPPED)
 		return XFS_BUF_PTR(bp) + offset;
@@ -472,7 +452,7 @@ extern inline xfs_caddr_t xfs_buf_offset(page_buf_t *bp, size_t offset)
 				pagebuf_associate_memory(bp, val, count)
 #define XFS_BUF_ADDR(bp)	((bp)->pb_bn)
 #define XFS_BUF_SET_ADDR(bp, blk)		\
-			((bp)->pb_bn = (page_buf_daddr_t)(blk))
+			((bp)->pb_bn = (blk))
 #define XFS_BUF_OFFSET(bp)	((bp)->pb_file_offset)
 #define XFS_BUF_SET_OFFSET(bp, off)		\
 			((bp)->pb_file_offset = (off))
@@ -517,15 +497,15 @@ extern inline xfs_caddr_t xfs_buf_offset(page_buf_t *bp, size_t offset)
 #define xfs_buf_get_flags(target, blkno, len, flags) \
 		pagebuf_get((target), (blkno), (len), (flags))
 
-static inline int	xfs_bawrite(void *mp, page_buf_t *bp)
+static inline int	xfs_bawrite(void *mp, xfs_buf_t *bp)
 {
 	bp->pb_fspriv3 = mp;
 	bp->pb_strat = xfs_bdstrat_cb;
 	xfs_buf_undelay(bp);
-	return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | PBF_RUN_QUEUES);
+	return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | _PBF_RUN_QUEUES);
 }
 
-static inline void	xfs_buf_relse(page_buf_t *bp)
+static inline void	xfs_buf_relse(xfs_buf_t *bp)
 {
 	if (!bp->pb_relse)
 		pagebuf_unlock(bp);
@@ -553,13 +533,13 @@ static inline void	xfs_buf_relse(page_buf_t *bp)
 	    pagebuf_iomove((pb), (off), (len), NULL, PBRW_ZERO)
 
 
-static inline int	XFS_bwrite(page_buf_t *pb)
+static inline int	XFS_bwrite(xfs_buf_t *pb)
 {
 	int	iowait = (pb->pb_flags & PBF_ASYNC) == 0;
 	int	error = 0;
 
 	if (!iowait)
-		pb->pb_flags |= PBF_RUN_QUEUES;
+		pb->pb_flags |= _PBF_RUN_QUEUES;
 
 	xfs_buf_undelay(pb);
 	pagebuf_iostrategy(pb);
@@ -573,7 +553,7 @@ static inline int	XFS_bwrite(page_buf_t *pb)
 #define XFS_bdwrite(pb)		     \
 	    pagebuf_iostart(pb, PBF_DELWRI | PBF_ASYNC)
 
-static inline int xfs_bdwrite(void *mp, page_buf_t *bp)
+static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
 {
 	bp->pb_strat = xfs_bdstrat_cb;
 	bp->pb_fspriv3 = mp;
diff --git a/fs/xfs/linux/xfs_globals.c b/fs/xfs/linux/xfs_globals.c
index 73cf327841d2..1144a8b9f76d 100644
--- a/fs/xfs/linux/xfs_globals.c
+++ b/fs/xfs/linux/xfs_globals.c
@@ -50,19 +50,19 @@ unsigned long xfs_physmem;
  */
 
 xfs_param_t xfs_params = {
-			  /*	MIN	DFLT	MAX	*/
-	.restrict_chown	= {	0,	1,	1	},
-	.sgid_inherit	= {	0,	0,	1	},
-	.symlink_mode	= {	0,	0,	1	},
-	.panic_mask	= {	0,	0,	127	},
-	.error_level	= {	0,	3,	11	},
-	.sync_interval	= {	HZ,	30*HZ,	60*HZ	},
-	.stats_clear	= {	0,	0,	1	},
-	.inherit_sync	= {	0,	1,	1	},
-	.inherit_nodump	= {	0,	1,	1	},
-	.inherit_noatim = {	0,	1,	1	},
-	.flush_interval	= {	HZ/2,	HZ,	30*HZ	},
-	.age_buffer	= {	1*HZ,	15*HZ,	300*HZ	},
+			  /*	MIN		DFLT		MAX	*/
+	.restrict_chown	= {	0,		1,		1	},
+	.sgid_inherit	= {	0,		0,		1	},
+	.symlink_mode	= {	0,		0,		1	},
+	.panic_mask	= {	0,		0,		127	},
+	.error_level	= {	0,		3,		11	},
+	.sync_interval	= {	USER_HZ,	30*USER_HZ,	7200*USER_HZ },
+	.stats_clear	= {	0,		0,		1	},
+	.inherit_sync	= {	0,		1,		1	},
+	.inherit_nodump	= {	0,		1,		1	},
+	.inherit_noatim = {	0,		1,		1	},
+	.flush_interval	= {	USER_HZ/2,	USER_HZ,	30*USER_HZ },
+	.age_buffer	= {	1*USER_HZ,	15*USER_HZ,	7200*USER_HZ },
 };
 
 /*
diff --git a/fs/xfs/linux/xfs_ioctl.c b/fs/xfs/linux/xfs_ioctl.c
index c5059bbe5694..d6402d7465de 100644
--- a/fs/xfs/linux/xfs_ioctl.c
+++ b/fs/xfs/linux/xfs_ioctl.c
@@ -659,7 +659,7 @@ xfs_ioctl(
 
 	case XFS_IOC_DIOINFO: {
 		struct dioattr	da;
-		pb_target_t	*target =
+		xfs_buftarg_t	*target =
 			(ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
 			mp->m_rtdev_targp : mp->m_ddev_targp;
 
diff --git a/fs/xfs/linux/xfs_linux.h b/fs/xfs/linux/xfs_linux.h
index e76b25c3b8a7..bfd0604ea7a6 100644
--- a/fs/xfs/linux/xfs_linux.h
+++ b/fs/xfs/linux/xfs_linux.h
@@ -134,13 +134,13 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
 #define irix_symlink_mode	xfs_params.symlink_mode.val
 #define xfs_panic_mask		xfs_params.panic_mask.val
 #define xfs_error_level		xfs_params.error_level.val
-#define xfs_syncd_interval	xfs_params.sync_interval.val
+#define xfs_syncd_interval	(xfs_params.sync_interval.val * HZ / USER_HZ)
 #define xfs_stats_clear		xfs_params.stats_clear.val
 #define xfs_inherit_sync	xfs_params.inherit_sync.val
 #define xfs_inherit_nodump	xfs_params.inherit_nodump.val
 #define xfs_inherit_noatime	xfs_params.inherit_noatim.val
-#define xfs_flush_interval	xfs_params.flush_interval.val
-#define xfs_age_buffer		xfs_params.age_buffer.val
+#define xfs_flush_interval	(xfs_params.flush_interval.val * HZ / USER_HZ)
+#define xfs_age_buffer		(xfs_params.age_buffer.val * HZ / USER_HZ)
 
 #define current_cpu()		smp_processor_id()
 #define current_pid()		(current->pid)
@@ -247,10 +247,11 @@ static inline void set_buffer_unwritten_io(struct buffer_head *bh)
 #define howmany(x, y)	(((x)+((y)-1))/(y))
 #define roundup(x, y)	((((x)+((y)-1))/(y))*(y))
 
-static inline void xfs_stack_trace(void)
-{
-	dump_stack();
-}
+#define xfs_stack_trace()	dump_stack()
+
+#define xfs_itruncate_data(ip, off)	\
+	(-vmtruncate(LINVFS_GET_IP(XFS_ITOV(ip)), (off)))
+
 
 /* Move the kernel do_div definition off to one side */
 
diff --git a/fs/xfs/linux/xfs_lrw.c b/fs/xfs/linux/xfs_lrw.c
index 93bb959fef48..4bacdb76ad25 100644
--- a/fs/xfs/linux/xfs_lrw.c
+++ b/fs/xfs/linux/xfs_lrw.c
@@ -301,7 +301,7 @@ xfs_read(
 	/* END copy & waste from filemap.c */
 
 	if (ioflags & IO_ISDIRECT) {
-		pb_target_t	*target =
+		xfs_buftarg_t	*target =
 			(ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
 				mp->m_rtdev_targp : mp->m_ddev_targp;
 		if ((*offset & target->pbr_smask) ||
@@ -687,7 +687,7 @@ xfs_write(
 	}
 
 	if (ioflags & IO_ISDIRECT) {
-		pb_target_t	*target =
+		xfs_buftarg_t	*target =
 			(xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
 				mp->m_rtdev_targp : mp->m_ddev_targp;
 
diff --git a/fs/xfs/linux/xfs_lrw.h b/fs/xfs/linux/xfs_lrw.h
index 731c88fa910c..faf0afc70260 100644
--- a/fs/xfs/linux/xfs_lrw.h
+++ b/fs/xfs/linux/xfs_lrw.h
@@ -38,7 +38,7 @@ struct xfs_mount;
 struct xfs_iocore;
 struct xfs_inode;
 struct xfs_bmbt_irec;
-struct page_buf_s;
+struct xfs_buf;
 struct xfs_iomap;
 
 #if defined(XFS_RW_TRACE)
@@ -89,8 +89,8 @@ extern void xfs_inval_cached_trace(struct xfs_iocore *,
 
 extern int xfs_bmap(struct bhv_desc *, xfs_off_t, ssize_t, int,
 			struct xfs_iomap *, int *);
-extern int xfsbdstrat(struct xfs_mount *, struct page_buf_s *);
-extern int xfs_bdstrat_cb(struct page_buf_s *);
+extern int xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
+extern int xfs_bdstrat_cb(struct xfs_buf *);
 
 extern int xfs_zero_eof(struct vnode *, struct xfs_iocore *, xfs_off_t,
 				xfs_fsize_t, xfs_fsize_t);
diff --git a/fs/xfs/linux/xfs_super.c b/fs/xfs/linux/xfs_super.c
index 5b7f46bc34b9..7bca10400c51 100644
--- a/fs/xfs/linux/xfs_super.c
+++ b/fs/xfs/linux/xfs_super.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -153,8 +153,7 @@ xfs_set_inodeops(
 			inode->i_mapping->a_ops = &linvfs_aops;
 	} else {
 		inode->i_op = &linvfs_file_inode_operations;
-		init_special_inode(inode, inode->i_mode,
-					inode->i_rdev);
+		init_special_inode(inode, inode->i_mode, inode->i_rdev);
 	}
 }
 
@@ -287,7 +286,7 @@ void
 xfs_flush_buftarg(
 	xfs_buftarg_t		*btp)
 {
-	pagebuf_delwri_flush(btp, PBDF_WAIT, NULL);
+	pagebuf_delwri_flush(btp, 1, NULL);
 }
 
 void
@@ -448,7 +447,8 @@ linvfs_clear_inode(
 #define SYNCD_FLAGS	(SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)
 
 STATIC int
-syncd(void *arg)
+xfssyncd(
+	void			*arg)
 {
 	vfs_t			*vfsp = (vfs_t *) arg;
 	int			error;
@@ -480,20 +480,22 @@ syncd(void *arg)
 }
 
 STATIC int
-linvfs_start_syncd(vfs_t *vfsp)
+linvfs_start_syncd(
+	vfs_t			*vfsp)
 {
-	int pid;
+	int			pid;
 
-	pid = kernel_thread(syncd, (void *) vfsp,
+	pid = kernel_thread(xfssyncd, (void *) vfsp,
 			CLONE_VM | CLONE_FS | CLONE_FILES);
 	if (pid < 0)
-		return pid;
+		return -pid;
 	wait_event(vfsp->vfs_wait_sync_task, vfsp->vfs_sync_task);
 	return 0;
 }
 
 STATIC void
-linvfs_stop_syncd(vfs_t *vfsp)
+linvfs_stop_syncd(
+	vfs_t			*vfsp)
 {
 	vfsp->vfs_flag |= VFS_UMOUNT;
 	wmb();
@@ -735,7 +737,7 @@ linvfs_fill_super(
 	struct vfs		*vfsp = vfs_allocate();
 	struct xfs_mount_args	*args = xfs_args_allocate(sb);
 	struct kstatfs		statvfs;
-	int			error;
+	int			error, error2;
 
 	vfsp->vfs_super = sb;
 	LINVFS_SET_VFS(sb, vfsp);
@@ -776,11 +778,15 @@ linvfs_fill_super(
 		goto fail_unmount;
 
 	sb->s_root = d_alloc_root(LINVFS_GET_IP(rootvp));
-	if (!sb->s_root)
+	if (!sb->s_root) {
+		error = ENOMEM;
 		goto fail_vnrele;
-	if (is_bad_inode(sb->s_root->d_inode))
+	}
+	if (is_bad_inode(sb->s_root->d_inode)) {
+		error = EINVAL;
 		goto fail_vnrele;
-	if (linvfs_start_syncd(vfsp))
+	}
+	if ((error = linvfs_start_syncd(vfsp)))
 		goto fail_vnrele;
 	vn_trace_exit(rootvp, __FUNCTION__, (inst_t *)__return_address);
 
@@ -796,7 +802,7 @@ fail_vnrele:
 	}
 
 fail_unmount:
-	VFS_UNMOUNT(vfsp, 0, NULL, error);
+	VFS_UNMOUNT(vfsp, 0, NULL, error2);
 
 fail_vfsop:
 	vfs_deallocate(vfsp);
diff --git a/fs/xfs/linux/xfs_super.h b/fs/xfs/linux/xfs_super.h
index f905528a2ea3..15b238889b68 100644
--- a/fs/xfs/linux/xfs_super.h
+++ b/fs/xfs/linux/xfs_super.h
@@ -112,7 +112,7 @@ extern void xfs_qm_exit(void);
 
 struct xfs_inode;
 struct xfs_mount;
-struct pb_target;
+struct xfs_buftarg;
 struct block_device;
 
 extern __uint64_t xfs_max_file_offset(unsigned int);
@@ -126,12 +126,12 @@ extern int  xfs_blkdev_get(struct xfs_mount *, const char *,
 				struct block_device **);
 extern void xfs_blkdev_put(struct block_device *);
 
-extern struct pb_target *xfs_alloc_buftarg(struct block_device *);
-extern void xfs_relse_buftarg(struct pb_target *);
-extern void xfs_free_buftarg(struct pb_target *);
-extern void xfs_flush_buftarg(struct pb_target *);
-extern int xfs_readonly_buftarg(struct pb_target *);
-extern void xfs_setsize_buftarg(struct pb_target *, unsigned int, unsigned int);
-extern unsigned int xfs_getsize_buftarg(struct pb_target *);
+extern struct xfs_buftarg *xfs_alloc_buftarg(struct block_device *);
+extern void xfs_relse_buftarg(struct xfs_buftarg *);
+extern void xfs_free_buftarg(struct xfs_buftarg *);
+extern void xfs_flush_buftarg(struct xfs_buftarg *);
+extern int xfs_readonly_buftarg(struct xfs_buftarg *);
+extern void xfs_setsize_buftarg(struct xfs_buftarg *, unsigned int, unsigned int);
+extern unsigned int xfs_getsize_buftarg(struct xfs_buftarg *);
 
 #endif	/* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 596d9400b9e1..30850e6d6ccb 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -111,7 +111,7 @@ posix_acl_xattr_to_xfs(
 		return EINVAL;
 
 	if (src->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
-		return EINVAL;
+		return EOPNOTSUPP;
 
 	memset(dest, 0, sizeof(xfs_acl_t));
 	dest->acl_cnt = posix_acl_xattr_count(size);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index c8243145e9ea..ec42931d0498 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -224,12 +224,21 @@ xfs_dir2_leafn_add(
 	mp = dp->i_mount;
 	tp = args->trans;
 	leaf = bp->data;
+
+	/*
+	 * Quick check just to make sure we are not going to index
+	 * into other peoples memory
+	 */
+	if (index < 0)
+		return XFS_ERROR(EFSCORRUPTED);
+
 	/*
 	 * If there are already the maximum number of leaf entries in
 	 * the block, if there are no stale entries it won't fit.
 	 * Caller will do a split.  If there are stale entries we'll do
 	 * a compact.
 	 */
+
 	if (INT_GET(leaf->hdr.count, ARCH_CONVERT) == XFS_DIR2_MAX_LEAF_ENTS(mp)) {
 		if (INT_ISZERO(leaf->hdr.stale, ARCH_CONVERT))
 			return XFS_ERROR(ENOSPC);
@@ -828,12 +837,24 @@ xfs_dir2_leafn_rebalance(
 		state->inleaf = !swap;
 	else
 		state->inleaf =
-			swap ^ (args->hashval < INT_GET(leaf2->ents[0].hashval, ARCH_CONVERT));
+			swap ^ (blk1->index <= INT_GET(leaf1->hdr.count, ARCH_CONVERT));
 	/*
 	 * Adjust the expected index for insertion.
 	 */
 	if (!state->inleaf)
 		blk2->index = blk1->index - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
+	
+	/* 
+	 * Finally sanity check just to make sure we are not returning a negative index 
+	 */
+	if(blk2->index < 0) {
+		state->inleaf = 1;
+		blk2->index = 0;
+		cmn_err(CE_ALERT,
+			"xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting orignal leaf: "
+			"blk1->index %d\n",
+			blk1->index);
+	}
 }
 
 /*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 0835763a83ea..c6083d81a667 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -591,10 +591,11 @@ retry:
 	firstblock = NULLFSBLOCK;
 
 	/*
-	 * roundup the allocation request to m_dalign boundary if file size
-	 * is greater that 512K and we are allocating past the allocation eof
+	 * Roundup the allocation request to a stripe unit (m_dalign) boundary
+	 * if the file size is >= stripe unit size, and we are allocating past
+	 * the allocation eof.
 	 */
-	if (mp->m_dalign && (isize >= mp->m_dalign) && aeof) {
+	if (mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)) && aeof) {
 		int eof;
 		xfs_fileoff_t new_last_fsb;
 		new_last_fsb = roundup_64(last_fsb, mp->m_dalign);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 3d6822de23eb..5ebaa2fd92f2 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -213,9 +213,9 @@ xfs_cleanup(void)
  */
 STATIC int
 xfs_start_flags(
+	struct vfs		*vfs,
 	struct xfs_mount_args	*ap,
-	struct xfs_mount	*mp,
-	int			ronly)
+	struct xfs_mount	*mp)
 {
 	/* Values are in BBs */
 	if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
@@ -305,7 +305,7 @@ xfs_start_flags(
 	 * no recovery flag requires a read-only mount
 	 */
 	if (ap->flags & XFSMNT_NORECOVERY) {
-		if (!ronly) {
+		if (!(vfs->vfs_flag & VFS_RDONLY)) {
 			cmn_err(CE_WARN,
 	"XFS: tried to mount a FS read-write without recovery!");
 			return XFS_ERROR(EINVAL);
@@ -327,10 +327,12 @@ xfs_start_flags(
  */
 STATIC int
 xfs_finish_flags(
+	struct vfs		*vfs,
 	struct xfs_mount_args	*ap,
-	struct xfs_mount	*mp,
-	int			ronly)
+	struct xfs_mount	*mp)
 {
+	int			ronly = (vfs->vfs_flag & VFS_RDONLY);
+
 	/* Fail a mount where the logbuf is smaller then the log stripe */
 	if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
 		if ((ap->logbufsize == -1) &&
@@ -420,7 +422,6 @@ xfs_mount(
 	struct bhv_desc		*p;
 	struct xfs_mount	*mp = XFS_BHVTOM(bhvp);
 	struct block_device	*ddev, *logdev, *rtdev;
-	int			ronly = (vfsp->vfs_flag & VFS_RDONLY);
 	int			flags = 0, error;
 
 	ddev = vfsp->vfs_super->s_bdev;
@@ -472,13 +473,13 @@ xfs_mount(
 	/*
 	 * Setup flags based on mount(2) options and then the superblock
 	 */
-	error = xfs_start_flags(args, mp, ronly);
+	error = xfs_start_flags(vfsp, args, mp);
 	if (error)
 		goto error;
 	error = xfs_readsb(mp);
 	if (error)
 		goto error;
-	error = xfs_finish_flags(args, mp, ronly);
+	error = xfs_finish_flags(vfsp, args, mp);
 	if (error) {
 		xfs_freesb(mp);
 		goto error;
@@ -636,8 +637,7 @@ xfs_mntupdate(
 		 */ 
 		do {
 			VFS_SYNC(vfsp, REMOUNT_READONLY_FLAGS, NULL, error);
-			pagebuf_delwri_flush(mp->m_ddev_targp, PBDF_WAIT,
-								&pincount);
+			pagebuf_delwri_flush(mp->m_ddev_targp, 1, &pincount);
 			if(0 == pincount) { delay(50); count++; }
 		} while (count < 2);
 
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c1b6c6517f84..dd20a0a26736 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -680,18 +680,12 @@ xfs_setattr(
 	 * once it is a part of the transaction.
 	 */
 	if (mask & XFS_AT_SIZE) {
-		if (vap->va_size > ip->i_d.di_size) {
+		code = 0;
+		if (vap->va_size > ip->i_d.di_size)
 			code = xfs_igrow_start(ip, vap->va_size, credp);
-			xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		} else if (vap->va_size <= ip->i_d.di_size) {
-			xfs_iunlock(ip, XFS_ILOCK_EXCL);
-			xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
-					    (xfs_fsize_t)vap->va_size);
-			code = 0;
-		} else {
-			xfs_iunlock(ip, XFS_ILOCK_EXCL);
-			code = 0;
-		}
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		if (!code)
+			code = xfs_itruncate_data(ip, vap->va_size);
 		if (code) {
 			ASSERT(tp == NULL);
 			lock_flags &= ~XFS_ILOCK_EXCL;
author	Nathan Scott <nathans@sgi.com>	2004-04-29 10:19:56 +1000
committer	Nathan Scott <nathans@sgi.com>	2004-04-29 10:19:56 +1000
commit	adda10937c78955b4e51c837f7f00a1bb6fa445f (patch)
tree	41787767176c1b7384e6ec80ceafad87833daa6e /fs
parent	16bd713551a4857a002e8e28e3d6e7c6421d63bf (diff)
parent	d61a4de11ca55278c8874cb3f91d03518c2052c9 (diff)