summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNathan Scott <nathans@sgi.com>2005-01-20 11:41:18 +1100
committerNathan Scott <nathans@sgi.com>2005-01-20 11:41:18 +1100
commit364b1dccc8b290bc69b440b69dede741c356256d (patch)
treec8bab6bf2ed7d51628b1a2f3c81ab4ab7b29ae9a
parent8aa11989523554e67b3e19a1f4180b11cdf03a3c (diff)
parent2008db7cf8b4d2229658af3ceeae7885c6e2ba3f (diff)
Merge nathans@oss.sgi.com:/oss/bitkeeper/xfs-linux-2.6
into sgi.com:/source2/xfs-linux-2.6
-rw-r--r--fs/Kconfig77
-rw-r--r--fs/nfsd/export.c5
-rw-r--r--fs/xfs/Kconfig80
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c290
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h50
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c130
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c24
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c29
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c66
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h1
-rw-r--r--fs/xfs/xfs_clnt.h1
-rw-r--r--fs/xfs/xfs_iget.c46
-rw-r--r--fs/xfs/xfs_inode.h13
-rw-r--r--fs/xfs/xfs_mount.h4
-rw-r--r--fs/xfs/xfs_vfsops.c19
-rw-r--r--fs/xfs/xfs_vnodeops.c6
-rw-r--r--include/linux/fs.h4
22 files changed, 505 insertions, 356 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 9c5d4cff42af..431bb01a824e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -304,82 +304,7 @@ config FS_POSIX_ACL
depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || REISERFS_FS_POSIX_ACL || NFSD_V4
default y
-config XFS_FS
- tristate "XFS filesystem support"
- help
- XFS is a high performance journaling filesystem which originated
- on the SGI IRIX platform. It is completely multi-threaded, can
- support large files and large filesystems, extended attributes,
- variable block sizes, is extent based, and makes extensive use of
- Btrees (directories, extents, free space) to aid both performance
- and scalability.
-
- Refer to the documentation at <http://oss.sgi.com/projects/xfs/>
- for complete details. This implementation is on-disk compatible
- with the IRIX version of XFS.
-
- To compile this file system support as a module, choose M here: the
- module will be called xfs. Be aware, however, that if the file
- system of your root partition is compiled as a module, you'll need
- to use an initial ramdisk (initrd) to boot.
-
-config XFS_RT
- bool "Realtime support (EXPERIMENTAL)"
- depends on XFS_FS && EXPERIMENTAL
- help
- If you say Y here you will be able to mount and use XFS filesystems
- which contain a realtime subvolume. The realtime subvolume is a
- separate area of disk space where only file data is stored. The
- realtime subvolume is designed to provide very deterministic
- data rates suitable for media streaming applications.
-
- See the xfs man page in section 5 for a bit more information.
-
- This feature is unsupported at this time, is not yet fully
- functional, and may cause serious problems.
-
- If unsure, say N.
-
-config XFS_QUOTA
- bool "Quota support"
- depends on XFS_FS
- help
- If you say Y here, you will be able to set limits for disk usage on
- a per user and/or a per group basis under XFS. XFS considers quota
- information as filesystem metadata and uses journaling to provide a
- higher level guarantee of consistency. The on-disk data format for
- quota is also compatible with the IRIX version of XFS, allowing a
- filesystem to be migrated between Linux and IRIX without any need
- for conversion.
-
- If unsure, say N. More comprehensive documentation can be found in
- README.quota in the xfsprogs package. XFS quota can be used either
- with or without the generic quota support enabled (CONFIG_QUOTA) -
- they are completely independent subsystems.
-
-config XFS_SECURITY
- bool "Security Label support"
- depends on XFS_FS
- help
- Security labels support alternative access control models
- implemented by security modules like SELinux. This option
- enables an extended attribute namespace for inode security
- labels in the XFS filesystem.
-
- If you are not using a security module that requires using
- extended attributes for inode security labels, say N.
-
-config XFS_POSIX_ACL
- bool "POSIX ACL support"
- depends on XFS_FS
- help
- POSIX Access Control Lists (ACLs) support permissions for users and
- groups beyond the owner/group/world scheme.
-
- To learn more about Access Control Lists, visit the POSIX ACLs for
- Linux website <http://acl.bestbits.at/>.
-
- If you don't know what Access Control Lists are, say N.
+source "fs/xfs/Kconfig"
config MINIX_FS
tristate "Minix fs support"
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 1a53ff65dc3f..96c0ea8f60ce 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -305,11 +305,6 @@ void svc_export_request(struct cache_detail *cd,
static struct svc_export *svc_export_lookup(struct svc_export *, int);
-extern struct dentry *
-find_exported_dentry(struct super_block *sb, void *obj, void *parent,
- int (*acceptable)(void *context, struct dentry *de),
- void *context);
-
static int check_export(struct inode *inode, int flags)
{
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
new file mode 100644
index 000000000000..ab515dc81ec3
--- /dev/null
+++ b/fs/xfs/Kconfig
@@ -0,0 +1,80 @@
+menu "XFS support"
+
+config XFS_FS
+ tristate "XFS filesystem support"
+ help
+ XFS is a high performance journaling filesystem which originated
+ on the SGI IRIX platform. It is completely multi-threaded, can
+ support large files and large filesystems, extended attributes,
+ variable block sizes, is extent based, and makes extensive use of
+ Btrees (directories, extents, free space) to aid both performance
+ and scalability.
+
+ Refer to the documentation at <http://oss.sgi.com/projects/xfs/>
+ for complete details. This implementation is on-disk compatible
+ with the IRIX version of XFS.
+
+ To compile this file system support as a module, choose M here: the
+ module will be called xfs. Be aware, however, that if the file
+ system of your root partition is compiled as a module, you'll need
+ to use an initial ramdisk (initrd) to boot.
+
+config XFS_RT
+ bool "Realtime support (EXPERIMENTAL)"
+ depends on XFS_FS && EXPERIMENTAL
+ help
+ If you say Y here you will be able to mount and use XFS filesystems
+ which contain a realtime subvolume. The realtime subvolume is a
+ separate area of disk space where only file data is stored. The
+ realtime subvolume is designed to provide very deterministic
+ data rates suitable for media streaming applications.
+
+ See the xfs man page in section 5 for a bit more information.
+
+ This feature is unsupported at this time, is not yet fully
+ functional, and may cause serious problems.
+
+ If unsure, say N.
+
+config XFS_QUOTA
+ bool "Quota support"
+ depends on XFS_FS
+ help
+ If you say Y here, you will be able to set limits for disk usage on
+ a per user and/or a per group basis under XFS. XFS considers quota
+ information as filesystem metadata and uses journaling to provide a
+ higher level guarantee of consistency. The on-disk data format for
+ quota is also compatible with the IRIX version of XFS, allowing a
+ filesystem to be migrated between Linux and IRIX without any need
+ for conversion.
+
+ If unsure, say N. More comprehensive documentation can be found in
+ README.quota in the xfsprogs package. XFS quota can be used either
+ with or without the generic quota support enabled (CONFIG_QUOTA) -
+ they are completely independent subsystems.
+
+config XFS_SECURITY
+ bool "Security Label support"
+ depends on XFS_FS
+ help
+ Security labels support alternative access control models
+ implemented by security modules like SELinux. This option
+ enables an extended attribute namespace for inode security
+ labels in the XFS filesystem.
+
+ If you are not using a security module that requires using
+ extended attributes for inode security labels, say N.
+
+config XFS_POSIX_ACL
+ bool "POSIX ACL support"
+ depends on XFS_FS
+ help
+ POSIX Access Control Lists (ACLs) support permissions for users and
+ groups beyond the owner/group/world scheme.
+
+ To learn more about Access Control Lists, visit the POSIX ACLs for
+ Linux website <http://acl.bestbits.at/>.
+
+ If you don't know what Access Control Lists are, say N.
+
+endmenu
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index bb714c1dca18..5dac0f724fc6 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -70,6 +70,7 @@ xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_PROC_FS) += linux-2.6/xfs_stats.o
xfs-$(CONFIG_SYSCTL) += linux-2.6/xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += linux-2.6/xfs_ioctl32.o
+xfs-$(CONFIG_EXPORTFS) += linux-2.6/xfs_export.o
xfs-y += xfs_alloc.o \
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 29fc5b35c268..d09fa326a3bc 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -71,7 +71,7 @@ xfs_page_trace(
bhv_desc_t *bdp;
vnode_t *vp = LINVFS_GET_VP(inode);
loff_t isize = i_size_read(inode);
- loff_t offset = page->index << PAGE_CACHE_SHIFT;
+ loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
int delalloc = -1, unmapped = -1, unwritten = -1;
if (page_has_buffers(page))
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index d5e75e606d1a..a7cdd5e8d42b 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -53,6 +53,7 @@
#include <linux/workqueue.h>
#include <linux/percpu.h>
#include <linux/blkdev.h>
+#include <linux/hash.h>
#include "xfs_linux.h"
@@ -127,34 +128,71 @@ ktrace_t *pagebuf_trace_buf;
kmem_zone_free(pagebuf_cache, (pb));
/*
- * Pagebuf hashing
+ * Page Region interfaces.
+ *
+ * For pages in filesystems where the blocksize is smaller than the
+ * pagesize, we use the page->private field (long) to hold a bitmap
+ * of uptodate regions within the page.
+ *
+ * Each such region is "bytes per page / bits per long" bytes long.
+ *
+ * NBPPR == number-of-bytes-per-page-region
+ * BTOPR == bytes-to-page-region (rounded up)
+ * BTOPRT == bytes-to-page-region-truncated (rounded down)
*/
+#if (BITS_PER_LONG == 32)
+#define PRSHIFT (PAGE_CACHE_SHIFT - 5) /* (32 == 1<<5) */
+#elif (BITS_PER_LONG == 64)
+#define PRSHIFT (PAGE_CACHE_SHIFT - 6) /* (64 == 1<<6) */
+#else
+#error BITS_PER_LONG must be 32 or 64
+#endif
+#define NBPPR (PAGE_CACHE_SIZE/BITS_PER_LONG)
+#define BTOPR(b) (((unsigned int)(b) + (NBPPR - 1)) >> PRSHIFT)
+#define BTOPRT(b) (((unsigned int)(b) >> PRSHIFT))
+
+STATIC unsigned long
+page_region_mask(
+ size_t offset,
+ size_t length)
+{
+ unsigned long mask;
+ int first, final;
-#define NBITS 8
-#define NHASH (1<<NBITS)
+ first = BTOPR(offset);
+ final = BTOPRT(offset + length - 1);
+ first = min(first, final);
-typedef struct {
- struct list_head pb_hash;
- spinlock_t pb_hash_lock;
-} pb_hash_t;
+ mask = ~0UL;
+ mask <<= BITS_PER_LONG - (final - first);
+ mask >>= BITS_PER_LONG - (final);
-STATIC pb_hash_t pbhash[NHASH];
-#define pb_hash(pb) &pbhash[pb->pb_hash_index]
+ ASSERT(offset + length <= PAGE_CACHE_SIZE);
+ ASSERT((final - first) < BITS_PER_LONG && (final - first) >= 0);
-STATIC int
-_bhash(
- struct block_device *bdev,
- loff_t base)
+ return mask;
+}
+
+STATIC inline void
+set_page_region(
+ struct page *page,
+ size_t offset,
+ size_t length)
{
- int bit, hval;
+ page->private |= page_region_mask(offset, length);
+ if (page->private == ~0UL)
+ SetPageUptodate(page);
+}
- base >>= 9;
- base ^= (unsigned long)bdev / L1_CACHE_BYTES;
- for (bit = hval = 0; base && bit < sizeof(base) * 8; bit += NBITS) {
- hval ^= (int)base & (NHASH-1);
- base >>= NBITS;
- }
- return hval;
+STATIC inline int
+test_page_region(
+ struct page *page,
+ size_t offset,
+ size_t length)
+{
+ unsigned long mask = page_region_mask(offset, length);
+
+ return (mask && (page->private & mask) == mask);
}
/*
@@ -340,7 +378,6 @@ _pagebuf_lookup_pages(
uint flags)
{
struct address_space *mapping = bp->pb_target->pbr_mapping;
- unsigned int sectorshift = bp->pb_target->pbr_sshift;
size_t blocksize = bp->pb_target->pbr_bsize;
size_t size = bp->pb_count_desired;
size_t nbytes, offset;
@@ -400,22 +437,11 @@ _pagebuf_lookup_pages(
if (!PageUptodate(page)) {
page_count--;
- if (blocksize == PAGE_CACHE_SIZE) {
+ if (blocksize >= PAGE_CACHE_SIZE) {
if (flags & PBF_READ)
bp->pb_locked = 1;
} else if (!PagePrivate(page)) {
- unsigned long j, range;
-
- /*
- * In this case page->private holds a bitmap
- * of uptodate sectors within the page
- */
- ASSERT(blocksize < PAGE_CACHE_SIZE);
- range = (offset + nbytes) >> sectorshift;
- for (j = offset >> sectorshift; j < range; j++)
- if (!test_bit(j, &page->private))
- break;
- if (j == range)
+ if (test_page_region(page, offset, nbytes))
page_count++;
}
}
@@ -483,8 +509,8 @@ _pagebuf_map_pages(
* are unlocked. No I/O is implied by this call.
*/
xfs_buf_t *
-_pagebuf_find( /* find buffer for block */
- xfs_buftarg_t *target,/* target for block */
+_pagebuf_find(
+ xfs_buftarg_t *btp, /* block device target */
loff_t ioff, /* starting offset of range */
size_t isize, /* length of range */
page_buf_flags_t flags, /* PBF_TRYLOCK */
@@ -492,59 +518,55 @@ _pagebuf_find( /* find buffer for block */
{
loff_t range_base;
size_t range_length;
- int hval;
- pb_hash_t *h;
+ xfs_bufhash_t *hash;
xfs_buf_t *pb, *n;
- int not_locked;
range_base = (ioff << BBSHIFT);
range_length = (isize << BBSHIFT);
- /* Ensure we never do IOs smaller than the sector size */
- BUG_ON(range_length < (1 << target->pbr_sshift));
+ /* Check for IOs smaller than the sector size / not sector aligned */
+ ASSERT(!(range_length < (1 << btp->pbr_sshift)));
+ ASSERT(!(range_base & (loff_t)btp->pbr_smask));
- /* Ensure we never do IOs that are not sector aligned */
- BUG_ON(range_base & (loff_t)target->pbr_smask);
+ hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
- hval = _bhash(target->pbr_bdev, range_base);
- h = &pbhash[hval];
+ spin_lock(&hash->bh_lock);
- spin_lock(&h->pb_hash_lock);
- list_for_each_entry_safe(pb, n, &h->pb_hash, pb_hash_list) {
- if (pb->pb_target == target &&
- pb->pb_file_offset == range_base &&
+ list_for_each_entry_safe(pb, n, &hash->bh_list, pb_hash_list) {
+ ASSERT(btp == pb->pb_target);
+ if (pb->pb_file_offset == range_base &&
pb->pb_buffer_length == range_length) {
- /* If we look at something bring it to the
- * front of the list for next time
+ /*
+ * If we look at something bring it to the
+ * front of the list for next time.
*/
atomic_inc(&pb->pb_hold);
- list_move(&pb->pb_hash_list, &h->pb_hash);
+ list_move(&pb->pb_hash_list, &hash->bh_list);
goto found;
}
}
/* No match found */
if (new_pb) {
- _pagebuf_initialize(new_pb, target, range_base,
+ _pagebuf_initialize(new_pb, btp, range_base,
range_length, flags);
- new_pb->pb_hash_index = hval;
- list_add(&new_pb->pb_hash_list, &h->pb_hash);
+ new_pb->pb_hash = hash;
+ list_add(&new_pb->pb_hash_list, &hash->bh_list);
} else {
XFS_STATS_INC(pb_miss_locked);
}
- spin_unlock(&h->pb_hash_lock);
- return (new_pb);
+ spin_unlock(&hash->bh_lock);
+ return new_pb;
found:
- spin_unlock(&h->pb_hash_lock);
+ spin_unlock(&hash->bh_lock);
/* Attempt to get the semaphore without sleeping,
* if this does not work then we need to drop the
* spinlock and do a hard attempt on the semaphore.
*/
- not_locked = down_trylock(&pb->pb_sema);
- if (not_locked) {
+ if (down_trylock(&pb->pb_sema)) {
if (!(flags & PBF_TRYLOCK)) {
/* wait for buffer ownership */
PB_TRACE(pb, "get_lock", 0);
@@ -711,8 +733,6 @@ pagebuf_readahead(
bdi = target->pbr_mapping->backing_dev_info;
if (bdi_read_congested(bdi))
return;
- if (bdi_write_congested(bdi))
- return;
flags |= (PBF_TRYLOCK|PBF_ASYNC|PBF_READ_AHEAD);
xfs_buf_read_flags(target, ioff, isize, flags);
@@ -866,18 +886,29 @@ void
pagebuf_rele(
xfs_buf_t *pb)
{
- pb_hash_t *hash = pb_hash(pb);
+ xfs_bufhash_t *hash = pb->pb_hash;
PB_TRACE(pb, "rele", pb->pb_relse);
- if (atomic_dec_and_lock(&pb->pb_hold, &hash->pb_hash_lock)) {
+ /*
+ * pagebuf_lookup buffers are not hashed, not delayed write,
+ * and don't have their own release routines. Special case.
+ */
+ if (unlikely(!hash)) {
+ ASSERT(!pb->pb_relse);
+ if (atomic_dec_and_test(&pb->pb_hold))
+ xfs_buf_free(pb);
+ return;
+ }
+
+ if (atomic_dec_and_lock(&pb->pb_hold, &hash->bh_lock)) {
int do_free = 1;
if (pb->pb_relse) {
atomic_inc(&pb->pb_hold);
- spin_unlock(&hash->pb_hash_lock);
+ spin_unlock(&hash->bh_lock);
(*(pb->pb_relse)) (pb);
- spin_lock(&hash->pb_hash_lock);
+ spin_lock(&hash->bh_lock);
do_free = 0;
}
@@ -892,10 +923,10 @@ pagebuf_rele(
if (do_free) {
list_del_init(&pb->pb_hash_list);
- spin_unlock(&hash->pb_hash_lock);
+ spin_unlock(&hash->bh_lock);
pagebuf_free(pb);
} else {
- spin_unlock(&hash->pb_hash_lock);
+ spin_unlock(&hash->bh_lock);
}
}
}
@@ -935,6 +966,7 @@ pagebuf_cond_lock( /* lock buffer, if not locked */
return(locked ? 0 : -EBUSY);
}
+#ifdef DEBUG
/*
* pagebuf_lock_value
*
@@ -946,6 +978,7 @@ pagebuf_lock_value(
{
return(atomic_read(&pb->pb_sema.count));
}
+#endif
/*
* pagebuf_lock
@@ -1216,7 +1249,6 @@ bio_end_io_pagebuf(
{
xfs_buf_t *pb = (xfs_buf_t *)bio->bi_private;
unsigned int i, blocksize = pb->pb_target->pbr_bsize;
- unsigned int sectorshift = pb->pb_target->pbr_sshift;
struct bio_vec *bvec = bio->bi_io_vec;
if (bio->bi_size)
@@ -1234,14 +1266,7 @@ bio_end_io_pagebuf(
SetPageUptodate(page);
} else if (!PagePrivate(page) &&
(pb->pb_flags & _PBF_PAGE_CACHE)) {
- unsigned long j, range;
-
- ASSERT(blocksize < PAGE_CACHE_SIZE);
- range = (bvec->bv_offset + bvec->bv_len) >> sectorshift;
- for (j = bvec->bv_offset >> sectorshift; j < range; j++)
- set_bit(j, &page->private);
- if (page->private == (unsigned long)(PAGE_CACHE_SIZE-1))
- SetPageUptodate(page);
+ set_page_region(page, bvec->bv_offset, bvec->bv_len);
}
if (_pagebuf_iolocked(pb)) {
@@ -1470,28 +1495,59 @@ pagebuf_iomove(
*/
void
xfs_wait_buftarg(
- xfs_buftarg_t *target)
+ xfs_buftarg_t *btp)
{
- xfs_buf_t *pb, *n;
- pb_hash_t *h;
- int i;
+ xfs_buf_t *bp, *n;
+ xfs_bufhash_t *hash;
+ uint i;
- for (i = 0; i < NHASH; i++) {
- h = &pbhash[i];
+ for (i = 0; i < (1 << btp->bt_hashshift); i++) {
+ hash = &btp->bt_hash[i];
again:
- spin_lock(&h->pb_hash_lock);
- list_for_each_entry_safe(pb, n, &h->pb_hash, pb_hash_list) {
- if (pb->pb_target == target &&
- !(pb->pb_flags & PBF_FS_MANAGED)) {
- spin_unlock(&h->pb_hash_lock);
+ spin_lock(&hash->bh_lock);
+ list_for_each_entry_safe(bp, n, &hash->bh_list, pb_hash_list) {
+ ASSERT(btp == bp->pb_target);
+ if (!(bp->pb_flags & PBF_FS_MANAGED)) {
+ spin_unlock(&hash->bh_lock);
delay(100);
goto again;
}
}
- spin_unlock(&h->pb_hash_lock);
+ spin_unlock(&hash->bh_lock);
}
}
+/*
+ * Allocate buffer hash table for a given target.
+ * For devices containing metadata (i.e. not the log/realtime devices)
+ * we need to allocate a much larger hash table.
+ */
+STATIC void
+xfs_alloc_bufhash(
+ xfs_buftarg_t *btp,
+ int external)
+{
+ unsigned int i;
+
+ btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
+ btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
+ btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) *
+ sizeof(xfs_bufhash_t), KM_SLEEP);
+ for (i = 0; i < (1 << btp->bt_hashshift); i++) {
+ spin_lock_init(&btp->bt_hash[i].bh_lock);
+ INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
+ }
+}
+
+STATIC void
+xfs_free_bufhash(
+ xfs_buftarg_t *btp)
+{
+ kmem_free(btp->bt_hash,
+ (1 << btp->bt_hashshift) * sizeof(xfs_bufhash_t));
+ btp->bt_hash = NULL;
+}
+
void
xfs_free_buftarg(
xfs_buftarg_t *btp,
@@ -1500,6 +1556,7 @@ xfs_free_buftarg(
xfs_flush_buftarg(btp, 1);
if (external)
xfs_blkdev_put(btp->pbr_bdev);
+ xfs_free_bufhash(btp);
iput(btp->pbr_mapping->host);
kmem_free(btp, sizeof(*btp));
}
@@ -1514,11 +1571,12 @@ xfs_incore_relse(
truncate_inode_pages(btp->pbr_mapping, 0LL);
}
-int
-xfs_setsize_buftarg(
+STATIC int
+xfs_setsize_buftarg_flags(
xfs_buftarg_t *btp,
unsigned int blocksize,
- unsigned int sectorsize)
+ unsigned int sectorsize,
+ int verbose)
{
btp->pbr_bsize = blocksize;
btp->pbr_sshift = ffs(sectorsize) - 1;
@@ -1530,9 +1588,42 @@ xfs_setsize_buftarg(
sectorsize, XFS_BUFTARG_NAME(btp));
return EINVAL;
}
+
+ if (verbose &&
+ (PAGE_CACHE_SIZE / BITS_PER_LONG) > sectorsize) {
+ printk(KERN_WARNING
+ "XFS: %u byte sectors in use on device %s. "
+ "This is suboptimal; %u or greater is ideal.\n",
+ sectorsize, XFS_BUFTARG_NAME(btp),
+ (unsigned int)PAGE_CACHE_SIZE / BITS_PER_LONG);
+ }
+
return 0;
}
+/*
+* When allocating the initial buffer target we have not yet
+* read in the superblock, so don't know what sized sectors
+* are being used is at this early stage. Play safe.
+*/
+STATIC int
+xfs_setsize_buftarg_early(
+ xfs_buftarg_t *btp,
+ struct block_device *bdev)
+{
+ return xfs_setsize_buftarg_flags(btp,
+ PAGE_CACHE_SIZE, bdev_hardsect_size(bdev), 0);
+}
+
+int
+xfs_setsize_buftarg(
+ xfs_buftarg_t *btp,
+ unsigned int blocksize,
+ unsigned int sectorsize)
+{
+ return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
+}
+
STATIC int
xfs_mapping_buftarg(
xfs_buftarg_t *btp,
@@ -1568,7 +1659,8 @@ xfs_mapping_buftarg(
xfs_buftarg_t *
xfs_alloc_buftarg(
- struct block_device *bdev)
+ struct block_device *bdev,
+ int external)
{
xfs_buftarg_t *btp;
@@ -1576,10 +1668,11 @@ xfs_alloc_buftarg(
btp->pbr_dev = bdev->bd_dev;
btp->pbr_bdev = bdev;
- if (xfs_setsize_buftarg(btp, PAGE_CACHE_SIZE, bdev_hardsect_size(bdev)))
+ if (xfs_setsize_buftarg_early(btp, bdev))
goto error;
if (xfs_mapping_buftarg(btp, bdev))
goto error;
+ xfs_alloc_bufhash(btp, external);
return btp;
error:
@@ -1843,8 +1936,6 @@ pagebuf_daemon_stop(void)
int __init
pagebuf_init(void)
{
- int i;
-
pagebuf_cache = kmem_cache_create("xfs_buf_t", sizeof(xfs_buf_t), 0,
SLAB_HWCACHE_ALIGN, NULL, NULL);
if (pagebuf_cache == NULL) {
@@ -1865,11 +1956,6 @@ pagebuf_init(void)
return -ENOMEM;
}
- for (i = 0; i < NHASH; i++) {
- spin_lock_init(&pbhash[i].pb_hash_lock);
- INIT_LIST_HEAD(&pbhash[i].pb_hash);
- }
-
return 0;
}
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index dc43517d8e31..74deed8e6d90 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -95,6 +95,11 @@ typedef enum page_buf_flags_e { /* pb_flags values */
#define PBF_NOT_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) != 0)
#define PBF_DONE(pb) (((pb)->pb_flags & (PBF_PARTIAL|PBF_NONE)) == 0)
+typedef struct xfs_bufhash {
+ struct list_head bh_list;
+ spinlock_t bh_lock;
+} xfs_bufhash_t;
+
typedef struct xfs_buftarg {
dev_t pbr_dev;
struct block_device *pbr_bdev;
@@ -102,32 +107,35 @@ typedef struct xfs_buftarg {
unsigned int pbr_bsize;
unsigned int pbr_sshift;
size_t pbr_smask;
+
+ /* per-device buffer hash table */
+ uint bt_hashmask;
+ uint bt_hashshift;
+ xfs_bufhash_t *bt_hash;
} xfs_buftarg_t;
/*
* xfs_buf_t: Buffer structure for page cache-based buffers
*
* This buffer structure is used by the page cache buffer management routines
- * to refer to an assembly of pages forming a logical buffer. The actual
- * I/O is performed with buffer_head or bio structures, as required by drivers,
- * for drivers which do not understand this structure. The buffer structure is
- * used on temporary basis only, and discarded when released.
- *
- * The real data storage is recorded in the page cache. Metadata is
- * hashed to the inode for the block device on which the file system resides.
- * File data is hashed to the inode for the file. Pages which are only
- * partially filled with data have bits set in their block_map entry
- * to indicate which disk blocks in the page are not valid.
+ * to refer to an assembly of pages forming a logical buffer. The actual I/O
+ * is performed with buffer_head structures, as required by drivers.
+ *
+ * The buffer structure is used on temporary basis only, and discarded when
+ * released. The real data storage is recorded in the page cache. Metadata is
+ * hashed to the block device on which the file system resides.
*/
struct xfs_buf;
+
+/* call-back function on I/O completion */
typedef void (*page_buf_iodone_t)(struct xfs_buf *);
- /* call-back function on I/O completion */
+/* call-back function on I/O completion */
typedef void (*page_buf_relse_t)(struct xfs_buf *);
- /* call-back function on I/O completion */
+/* pre-write function */
typedef int (*page_buf_bdstrat_t)(struct xfs_buf *);
-#define PB_PAGES 4
+#define PB_PAGES 2
typedef struct xfs_buf {
struct semaphore pb_sema; /* semaphore for lockables */
@@ -136,8 +144,9 @@ typedef struct xfs_buf {
wait_queue_head_t pb_waiters; /* unpin waiters */
struct list_head pb_list;
page_buf_flags_t pb_flags; /* status flags */
- struct list_head pb_hash_list;
- xfs_buftarg_t *pb_target; /* logical object */
+ struct list_head pb_hash_list; /* hash table list */
+ xfs_bufhash_t *pb_hash; /* hash table list start */
+ xfs_buftarg_t *pb_target; /* buffer target (device) */
atomic_t pb_hold; /* reference count */
xfs_daddr_t pb_bn; /* block number for I/O */
loff_t pb_file_offset; /* offset in file */
@@ -154,10 +163,9 @@ typedef struct xfs_buf {
void *pb_fspriv2;
void *pb_fspriv3;
unsigned short pb_error; /* error code on I/O */
- unsigned short pb_page_count; /* size of page array */
- unsigned short pb_offset; /* page offset in first page */
- unsigned char pb_locked; /* page array is locked */
- unsigned char pb_hash_index; /* hash table index */
+ unsigned short pb_locked; /* page array is locked */
+ unsigned int pb_page_count; /* size of page array */
+ unsigned int pb_offset; /* page offset in first page */
struct page **pb_pages; /* array of page pointers */
struct page *pb_page_array[PB_PAGES]; /* inline pages */
#ifdef PAGEBUF_LOCK_TRACKING
@@ -455,7 +463,7 @@ extern inline xfs_caddr_t xfs_buf_offset(xfs_buf_t *bp, size_t offset)
pagebuf_associate_memory(bp, val, count)
#define XFS_BUF_ADDR(bp) ((bp)->pb_bn)
#define XFS_BUF_SET_ADDR(bp, blk) \
- ((bp)->pb_bn = (blk))
+ ((bp)->pb_bn = (xfs_daddr_t)(blk))
#define XFS_BUF_OFFSET(bp) ((bp)->pb_file_offset)
#define XFS_BUF_SET_OFFSET(bp, off) \
((bp)->pb_file_offset = (off))
@@ -564,7 +572,7 @@ static inline int xfs_bdwrite(void *mp, xfs_buf_t *bp)
* Handling of buftargs.
*/
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *);
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
extern void xfs_free_buftarg(xfs_buftarg_t *, int);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
new file mode 100644
index 000000000000..772d216d8146
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ * Further, this software is distributed without any warranty that it is
+ * free of the rightful claim of any third person regarding infringement
+ * or the like. Any license provided herein, whether implied or
+ * otherwise, applies only to this software file. Patent licenses, if
+ * any, provided herein do not apply to combinations of this program with
+ * other software, or any other product whatsoever.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
+ * Mountain View, CA 94043, or:
+ *
+ * http://www.sgi.com
+ *
+ * For further information regarding this notice, see:
+ *
+ * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
+ */
+
+#include "xfs.h"
+
+
+STATIC struct dentry *
+linvfs_decode_fh(
+ struct super_block *sb,
+ __u32 *fh,
+ int fh_len,
+ int fileid_type,
+ int (*acceptable)(
+ void *context,
+ struct dentry *de),
+ void *context)
+{
+ __u32 parent[2];
+ parent[0] = parent[1] = 0;
+
+ if (fh_len < 2 || fileid_type > 2)
+ return NULL;
+
+ if (fileid_type == 2 && fh_len > 2) {
+ if (fh_len == 3) {
+ printk(KERN_WARNING
+ "XFS: detected filehandle without "
+ "parent inode generation information.");
+ return ERR_PTR(-ESTALE);
+ }
+
+ parent[0] = fh[2];
+ parent[1] = fh[3];
+ }
+
+ return find_exported_dentry(sb, fh, parent, acceptable, context);
+
+}
+
+STATIC struct dentry *
+linvfs_get_dentry(
+ struct super_block *sb,
+ void *data)
+{
+ vnode_t *vp;
+ struct inode *inode;
+ struct dentry *result;
+ xfs_fid2_t xfid;
+ vfs_t *vfsp = LINVFS_GET_VFS(sb);
+ int error;
+
+ xfid.fid_len = sizeof(xfs_fid2_t) - sizeof(xfid.fid_len);
+ xfid.fid_pad = 0;
+ xfid.fid_gen = ((__u32 *)data)[1];
+ xfid.fid_ino = ((__u32 *)data)[0];
+
+ VFS_VGET(vfsp, &vp, (fid_t *)&xfid, error);
+ if (error || vp == NULL)
+ return ERR_PTR(-ESTALE) ;
+
+ inode = LINVFS_GET_IP(vp);
+ result = d_alloc_anon(inode);
+ if (!result) {
+ iput(inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ return result;
+}
+
+STATIC struct dentry *
+linvfs_get_parent(
+ struct dentry *child)
+{
+ int error;
+ vnode_t *vp, *cvp;
+ struct dentry *parent;
+ struct dentry dotdot;
+
+ dotdot.d_name.name = "..";
+ dotdot.d_name.len = 2;
+ dotdot.d_inode = NULL;
+
+ cvp = NULL;
+ vp = LINVFS_GET_VP(child->d_inode);
+ VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error);
+ if (unlikely(error))
+ return ERR_PTR(-error);
+
+ parent = d_alloc_anon(LINVFS_GET_IP(cvp));
+ if (unlikely(!parent)) {
+ VN_RELE(cvp);
+ return ERR_PTR(-ENOMEM);
+ }
+ return parent;
+}
+
+struct export_operations linvfs_export_ops = {
+ .decode_fh = linvfs_decode_fh,
+ .get_parent = linvfs_get_parent,
+ .get_dentry = linvfs_get_dentry,
+};
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 66462e12597a..0dd97bd7b146 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -81,23 +81,23 @@ __linvfs_read(
STATIC ssize_t
-linvfs_read(
+linvfs_aio_read(
struct kiocb *iocb,
char __user *buf,
size_t count,
loff_t pos)
{
- return __linvfs_read(iocb, buf, 0, count, pos);
+ return __linvfs_read(iocb, buf, IO_ISAIO, count, pos);
}
STATIC ssize_t
-linvfs_read_invis(
+linvfs_aio_read_invis(
struct kiocb *iocb,
char __user *buf,
size_t count,
loff_t pos)
{
- return __linvfs_read(iocb, buf, IO_INVIS, count, pos);
+ return __linvfs_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
}
@@ -125,23 +125,23 @@ __linvfs_write(
STATIC ssize_t
-linvfs_write(
+linvfs_aio_write(
struct kiocb *iocb,
const char __user *buf,
size_t count,
loff_t pos)
{
- return __linvfs_write(iocb, buf, 0, count, pos);
+ return __linvfs_write(iocb, buf, IO_ISAIO, count, pos);
}
STATIC ssize_t
-linvfs_write_invis(
+linvfs_aio_write_invis(
struct kiocb *iocb,
const char __user *buf,
size_t count,
loff_t pos)
{
- return __linvfs_write(iocb, buf, IO_INVIS, count, pos);
+ return __linvfs_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos);
}
@@ -492,8 +492,8 @@ struct file_operations linvfs_file_operations = {
.write = do_sync_write,
.readv = linvfs_readv,
.writev = linvfs_writev,
- .aio_read = linvfs_read,
- .aio_write = linvfs_write,
+ .aio_read = linvfs_aio_read,
+ .aio_write = linvfs_aio_write,
.sendfile = linvfs_sendfile,
.ioctl = linvfs_ioctl,
.mmap = linvfs_file_mmap,
@@ -508,8 +508,8 @@ struct file_operations linvfs_invis_file_operations = {
.write = do_sync_write,
.readv = linvfs_readv_invis,
.writev = linvfs_writev_invis,
- .aio_read = linvfs_read_invis,
- .aio_write = linvfs_write_invis,
+ .aio_read = linvfs_aio_read_invis,
+ .aio_write = linvfs_aio_write_invis,
.sendfile = linvfs_sendfile,
.ioctl = linvfs_ioctl_invis,
.mmap = linvfs_file_mmap,
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 284f2ad68e3a..77c393654537 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -499,7 +499,7 @@ xfs_attrmulti_by_handle(
xfs_fsop_attrmulti_handlereq_t am_hreq;
struct inode *inode;
vnode_t *vp;
- int i, size;
+ unsigned int i, size;
error = xfs_vget_fsop_handlereq(mp, parinode, CAP_SYS_ADMIN, arg,
sizeof(xfs_fsop_attrmulti_handlereq_t),
@@ -509,6 +509,11 @@ xfs_attrmulti_by_handle(
return -error;
size = am_hreq.opcount * sizeof(attr_multiop_t);
+ if (!size || size > 16 * PAGE_SIZE) {
+ VN_RELE(vp);
+ return -XFS_ERROR(E2BIG);
+ }
+
ops = (xfs_attr_multiop_t *)kmalloc(size, GFP_KERNEL);
if (!ops) {
VN_RELE(vp);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 0cd5674cb3c5..407e99359391 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -369,33 +369,6 @@ linvfs_rename(
return 0;
}
-STATIC int
-linvfs_readlink(
- struct dentry *dentry,
- char __user *buf,
- int size)
-{
- vnode_t *vp = LINVFS_GET_VP(dentry->d_inode);
- uio_t uio;
- iovec_t iov;
- int error;
-
- iov.iov_base = buf;
- iov.iov_len = size;
-
- uio.uio_iov = &iov;
- uio.uio_offset = 0;
- uio.uio_segflg = UIO_USERSPACE;
- uio.uio_resid = size;
- uio.uio_iovcnt = 1;
-
- VOP_READLINK(vp, &uio, 0, NULL, error);
- if (error)
- return -error;
-
- return (size - uio.uio_resid);
-}
-
/*
* careful here - this function can get called recursively, so
* we need to be very careful about how much stack we use.
@@ -694,7 +667,7 @@ struct inode_operations linvfs_dir_inode_operations = {
};
struct inode_operations linvfs_symlink_inode_operations = {
- .readlink = linvfs_readlink,
+ .readlink = generic_readlink,
.follow_link = linvfs_follow_link,
.put_link = linvfs_put_link,
.permission = linvfs_permission,
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 88e2b7c7b73c..0dee823e5314 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -317,7 +317,7 @@ xfs_read(
xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore,
(void *)iovp, segs, *offset, ioflags);
ret = __generic_file_aio_read(iocb, iovp, segs, offset);
- if (ret == -EIOCBQUEUED)
+ if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
ret = wait_on_sync_kiocb(iocb);
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
@@ -854,7 +854,7 @@ retry:
current->backing_dev_info = NULL;
- if (ret == -EIOCBQUEUED)
+ if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO))
ret = wait_on_sync_kiocb(iocb);
if ((ret == -ENOSPC) &&
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index b96f6e2db285..8bfba484e40b 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -75,7 +75,6 @@
STATIC struct quotactl_ops linvfs_qops;
STATIC struct super_operations linvfs_sops;
-STATIC struct export_operations linvfs_export_ops;
STATIC kmem_zone_t *linvfs_inode_zone;
STATIC kmem_shaker_t xfs_inode_shaker;
@@ -659,63 +658,6 @@ linvfs_freeze_fs(
VFS_FREEZE(LINVFS_GET_VFS(sb));
}
-STATIC struct dentry *
-linvfs_get_parent(
- struct dentry *child)
-{
- int error;
- vnode_t *vp, *cvp;
- struct dentry *parent;
- struct dentry dotdot;
-
- dotdot.d_name.name = "..";
- dotdot.d_name.len = 2;
- dotdot.d_inode = NULL;
-
- cvp = NULL;
- vp = LINVFS_GET_VP(child->d_inode);
- VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error);
- if (unlikely(error))
- return ERR_PTR(-error);
-
- parent = d_alloc_anon(LINVFS_GET_IP(cvp));
- if (unlikely(!parent)) {
- VN_RELE(cvp);
- return ERR_PTR(-ENOMEM);
- }
- return parent;
-}
-
-STATIC struct dentry *
-linvfs_get_dentry(
- struct super_block *sb,
- void *data)
-{
- vnode_t *vp;
- struct inode *inode;
- struct dentry *result;
- xfs_fid2_t xfid;
- vfs_t *vfsp = LINVFS_GET_VFS(sb);
- int error;
-
- xfid.fid_len = sizeof(xfs_fid2_t) - sizeof(xfid.fid_len);
- xfid.fid_pad = 0;
- xfid.fid_gen = ((__u32 *)data)[1];
- xfid.fid_ino = ((__u32 *)data)[0];
-
- VFS_VGET(vfsp, &vp, (fid_t *)&xfid, error);
- if (error || vp == NULL)
- return ERR_PTR(-ESTALE) ;
-
- inode = LINVFS_GET_IP(vp);
- result = d_alloc_anon(inode);
- if (!result) {
- iput(inode);
- return ERR_PTR(-ENOMEM);
- }
- return result;
-}
-
STATIC int
linvfs_show_options(
struct seq_file *m,
@@ -808,7 +750,9 @@ linvfs_fill_super(
}
sb_min_blocksize(sb, BBSIZE);
+#ifdef CONFIG_EXPORTFS
sb->s_export_op = &linvfs_export_ops;
+#endif
sb->s_qcop = &linvfs_qops;
sb->s_op = &linvfs_sops;
@@ -877,12 +821,6 @@ linvfs_get_sb(
return get_sb_bdev(fs_type, flags, dev_name, data, linvfs_fill_super);
}
-
-STATIC struct export_operations linvfs_export_ops = {
- .get_parent = linvfs_get_parent,
- .get_dentry = linvfs_get_dentry,
-};
-
STATIC struct super_operations linvfs_sops = {
.alloc_inode = linvfs_alloc_inode,
.destroy_inode = linvfs_destroy_inode,
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 866c7ad75f92..ec7e0035c731 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -133,4 +133,6 @@ extern int xfs_blkdev_get(struct xfs_mount *, const char *,
struct block_device **);
extern void xfs_blkdev_put(struct block_device *);
+extern struct export_operations linvfs_export_ops;
+
#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 3466465fa8d5..da76c1f1e11c 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -379,6 +379,7 @@ typedef struct vnodeops {
/*
* Flags for read/write calls - same values as IRIX
*/
+#define IO_ISAIO 0x00001 /* don't wait for completion */
#define IO_ISDIRECT 0x00004 /* bypass page cache */
#define IO_INVIS 0x00020 /* don't update inode timestamps */
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index 4a49cde9b857..795a11e2dbe9 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -64,6 +64,7 @@ struct xfs_mount_args {
int sunit; /* stripe unit (BBs) */
int swidth; /* stripe width (BBs), multiple of sunit */
uchar_t iosizelog; /* log2 of the preferred I/O size */
+ int ihashsize; /* inode hash table size (buckets) */
};
/*
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index bef91642de88..1bf1cc15bdcd 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -55,22 +55,32 @@
#include "xfs_inode.h"
#include "xfs_quota.h"
#include "xfs_utils.h"
+#include "xfs_bit.h"
/*
* Initialize the inode hash table for the newly mounted file system.
- *
- * mp -- this is the mount point structure for the file system being
- * initialized
+ * Choose an initial table size based on user specified value, else
+ * use a simple algorithm using the maximum number of inodes as an
+ * indicator for table size, and cap it at 16 pages (gettin' big).
*/
void
xfs_ihash_init(xfs_mount_t *mp)
{
- int i;
+ __uint64_t icount;
+ uint i, flags = KM_SLEEP | KM_MAYFAIL;
+
+ if (!mp->m_ihsize) {
+ icount = mp->m_maxicount ? mp->m_maxicount :
+ (mp->m_sb.sb_dblocks << mp->m_sb.sb_inopblog);
+ mp->m_ihsize = 1 << max_t(uint, xfs_highbit64(icount) / 3, 8);
+ mp->m_ihsize = min_t(uint, mp->m_ihsize, 16 * PAGE_SIZE);
+ }
- mp->m_ihsize = XFS_BUCKETS(mp);
- mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize
- * sizeof(xfs_ihash_t), KM_SLEEP);
- ASSERT(mp->m_ihash != NULL);
+ while (!(mp->m_ihash = (xfs_ihash_t *)kmem_zalloc(mp->m_ihsize *
+ sizeof(xfs_ihash_t), flags))) {
+ if ((mp->m_ihsize >>= 1) <= NBPP)
+ flags = KM_SLEEP;
+ }
for (i = 0; i < mp->m_ihsize; i++) {
rwlock_init(&(mp->m_ihash[i].ih_lock));
}
@@ -88,29 +98,19 @@ xfs_ihash_free(xfs_mount_t *mp)
/*
* Initialize the inode cluster hash table for the newly mounted file system.
- *
- * mp -- this is the mount point structure for the file system being
- * initialized
+ * Its size is derived from the ihash table size.
*/
void
xfs_chash_init(xfs_mount_t *mp)
{
- int i;
+ uint i;
- /*
- * m_chash size is based on m_ihash
- * with a minimum of 37 entries
- */
- mp->m_chsize = (XFS_BUCKETS(mp)) /
- (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
- if (mp->m_chsize < 37) {
- mp->m_chsize = 37;
- }
+ mp->m_chsize = max_t(uint, 1, mp->m_ihsize /
+ (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog));
+ mp->m_chsize = min_t(uint, mp->m_chsize, mp->m_ihsize);
mp->m_chash = (xfs_chash_t *)kmem_zalloc(mp->m_chsize
* sizeof(xfs_chash_t),
KM_SLEEP);
- ASSERT(mp->m_chash != NULL);
-
for (i = 0; i < mp->m_chsize; i++) {
spinlock_init(&mp->m_chash[i].ch_lock,"xfshash");
}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 97bd24ba4198..3d427c2b065c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -182,10 +182,6 @@ typedef struct xfs_ihash {
uint ih_version;
} xfs_ihash_t;
-/*
- * Inode hashing and hash bucket locking.
- */
-#define XFS_BUCKETS(mp) (37*(mp)->m_sb.sb_agcount-1)
#define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)(ino)) % (mp)->m_ihsize))
/*
@@ -193,7 +189,6 @@ typedef struct xfs_ihash {
* find inodes that share a cluster and can be flushed to disk at the same
* time.
*/
-
typedef struct xfs_chashlist {
struct xfs_chashlist *chl_next;
struct xfs_inode *chl_ip;
@@ -207,6 +202,8 @@ typedef struct xfs_chash {
lock_t ch_lock;
} xfs_chash_t;
+#define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize))
+
/*
* This is the xfs in-core inode structure.
@@ -450,12 +447,6 @@ xfs_inode_t *xfs_bhvtoi(struct bhv_desc *bhvp);
#define BHV_IS_XFS(bdp) (BHV_OPS(bdp) == &xfs_vnodeops)
/*
- * Pick the inode cluster hash bucket
- * (m_chash is the same size as m_ihash)
- */
-#define XFS_CHASH(mp,blk) ((mp)->m_chash + (((uint)blk) % (mp)->m_chsize))
-
-/*
* For multiple groups support: if S_ISGID bit is set in the parent
* directory, group of new file is set to that of the parent, and
* new subdirectory gets S_ISGID bit from parent.
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a37420eb5fc6..ef727508e3a2 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -296,7 +296,7 @@ typedef struct xfs_mount {
xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */
lock_t m_agirotor_lock;/* .. and lock protecting it */
xfs_agnumber_t m_maxagi; /* highest inode alloc group */
- int m_ihsize; /* size of next field */
+ uint m_ihsize; /* size of next field */
struct xfs_ihash *m_ihash; /* fs private inode hash table*/
struct xfs_inode *m_inodes; /* active inode list */
struct list_head m_del_inodes; /* inodes to reclaim */
@@ -376,7 +376,7 @@ typedef struct xfs_mount {
xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */
xfs_dablk_t m_dirleafblk; /* blockno of dir non-data v2 */
xfs_dablk_t m_dirfreeblk; /* blockno of dirfreeindex v2 */
- int m_chsize; /* size of next field */
+ uint m_chsize; /* size of next field */
struct xfs_chash *m_chash; /* fs private inode per-cluster
* hash table */
struct xfs_dmops m_dm_ops; /* vector of DMI ops */
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 863a67c586d9..7745d23eae7f 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -252,6 +252,7 @@ xfs_start_flags(
ap->logbufsize);
return XFS_ERROR(EINVAL);
}
+ mp->m_ihsize = ap->ihashsize;
mp->m_logbsize = ap->logbufsize;
mp->m_fsname_len = strlen(ap->fsname) + 1;
mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
@@ -468,19 +469,19 @@ xfs_mount(
* Setup xfs_mount buffer target pointers
*/
error = ENOMEM;
- mp->m_ddev_targp = xfs_alloc_buftarg(ddev);
+ mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
if (!mp->m_ddev_targp) {
xfs_blkdev_put(logdev);
xfs_blkdev_put(rtdev);
return error;
}
if (rtdev) {
- mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev);
+ mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
if (!mp->m_rtdev_targp)
goto error0;
}
mp->m_logdev_targp = (logdev && logdev != ddev) ?
- xfs_alloc_buftarg(logdev) : mp->m_ddev_targp;
+ xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp;
if (!mp->m_logdev_targp)
goto error0;
@@ -1579,7 +1580,7 @@ xfs_syncsub(
}
/*
- * xfs_vget - called by DMAPI to get vnode from file handle
+ * xfs_vget - called by DMAPI and NFSD to get vnode from file handle
*/
STATIC int
xfs_vget(
@@ -1621,7 +1622,7 @@ xfs_vget(
return XFS_ERROR(EIO);
}
- if (ip->i_d.di_mode == 0 || (igen && (ip->i_d.di_gen != igen))) {
+ if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) {
xfs_iput_new(ip, XFS_ILOCK_SHARED);
*vpp = NULL;
return XFS_ERROR(ENOENT);
@@ -1646,6 +1647,7 @@ xfs_vget(
#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */
#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */
#define MNTOPT_MTPT "mtpt" /* filesystem mount point */
+#define MNTOPT_IHASHSIZE "ihashsize" /* size of inode hash table */
#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */
#define MNTOPT_NOLOGFLUSH "nologflush" /* don't hard flush on log writes */
#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
@@ -1734,6 +1736,13 @@ xfs_parseargs(
iosize = simple_strtoul(value, &eov, 10);
args->flags |= XFSMNT_IOSIZE;
args->iosizelog = (uint8_t) iosize;
+ } else if (!strcmp(this_char, MNTOPT_IHASHSIZE)) {
+ if (!value || !*value) {
+ printk("XFS: %s option requires an argument\n",
+ this_char);
+ return EINVAL;
+ }
+ args->ihashsize = simple_strtoul(value, &eov, 10);
} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
args->flags |= XFSMNT_WSYNC;
} else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index da847a19d4a5..c6f6c4ee63ae 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -3900,7 +3900,7 @@ xfs_finish_reclaim(
int error;
if (vp && VN_BAD(vp))
- return 0;
+ goto reclaim;
/* The hash lock here protects a thread in xfs_iget_core from
* racing with us on linking the inode back with a vnode.
@@ -3948,8 +3948,7 @@ xfs_finish_reclaim(
*/
if (error) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_ireclaim(ip);
- return (0);
+ goto reclaim;
}
xfs_iflock(ip); /* synchronize with xfs_iflush_done */
}
@@ -3968,6 +3967,7 @@ xfs_finish_reclaim(
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
+ reclaim:
xfs_ireclaim(ip);
return 0;
}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9ca3c328b217..1f69baf46f6e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1149,6 +1149,10 @@ struct export_operations {
};
+extern struct dentry *
+find_exported_dentry(struct super_block *sb, void *obj, void *parent,
+ int (*acceptable)(void *context, struct dentry *de),
+ void *context);
struct file_system_type {
const char *name;