diff options
| author | Andrew Morton <akpm@zip.com.au> | 2002-05-19 02:22:01 -0700 |
|---|---|---|
| committer | Arnaldo Carvalho de Melo <acme@conectiva.com.br> | 2002-05-19 02:22:01 -0700 |
| commit | 1f6acea0de867d7f5e5a43ba43cf3be744da412c (patch) | |
| tree | 895e785d534e0b4965559493e80b361e9c4d0f80 | |
| parent | 610c5ab86ed7e1647ba3cedd20ab0f946b264c9d (diff) | |
[PATCH] pdflush exclusion infrastructure
Collision avoidance for pdflush threads.
Turns the request_queue-based `unsigned long ra_pages' into a structure
which contains ra_pages as well as a longword.
That longword is used to record the fact that a pdflush thread is
currently writing something back against this request_queue.
Avoids the situation where several pdflush threads are sleeping on the
same request_queue.
This patch provides only the infrastructure for the pdflush exclusion.
This infrastructure gets used in pdflush-single.patch
| -rw-r--r-- | drivers/block/blkpg.c | 15 | ||||
| -rw-r--r-- | drivers/block/ll_rw_blk.c | 14 | ||||
| -rw-r--r-- | fs/block_dev.c | 17 | ||||
| -rw-r--r-- | fs/fs-writeback.c | 38 | ||||
| -rw-r--r-- | fs/inode.c | 20 | ||||
| -rw-r--r-- | fs/ntfs/super.c | 5 | ||||
| -rw-r--r-- | fs/open.c | 3 | ||||
| -rw-r--r-- | include/linux/backing-dev.h | 30 | ||||
| -rw-r--r-- | include/linux/blkdev.h | 9 | ||||
| -rw-r--r-- | include/linux/fs.h | 3 | ||||
| -rw-r--r-- | include/linux/mm.h | 1 | ||||
| -rw-r--r-- | mm/page-writeback.c | 8 | ||||
| -rw-r--r-- | mm/pdflush.c | 4 | ||||
| -rw-r--r-- | mm/readahead.c | 6 |
14 files changed, 125 insertions, 48 deletions
diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c index e8059084b8f0..595fa49af3ef 100644 --- a/drivers/block/blkpg.c +++ b/drivers/block/blkpg.c @@ -35,6 +35,7 @@ #include <linux/blkpg.h> #include <linux/genhd.h> #include <linux/module.h> /* for EXPORT_SYMBOL */ +#include <linux/backing-dev.h> #include <asm/uaccess.h> @@ -219,7 +220,7 @@ int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg) unsigned short usval; kdev_t dev = to_kdev_t(bdev->bd_dev); int holder; - unsigned long *ra_pages; + struct backing_dev_info *bdi; intval = block_ioctl(bdev, cmd, arg); if (intval != -ENOTTY) @@ -241,20 +242,20 @@ int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg) case BLKFRASET: if(!capable(CAP_SYS_ADMIN)) return -EACCES; - ra_pages = blk_get_ra_pages(bdev); - if (ra_pages == NULL) + bdi = blk_get_backing_dev_info(bdev); + if (bdi == NULL) return -ENOTTY; - *ra_pages = (arg * 512) / PAGE_CACHE_SIZE; + bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE; return 0; case BLKRAGET: case BLKFRAGET: if (!arg) return -EINVAL; - ra_pages = blk_get_ra_pages(bdev); - if (ra_pages == NULL) + bdi = blk_get_backing_dev_info(bdev); + if (bdi == NULL) return -ENOTTY; - return put_user((*ra_pages * PAGE_CACHE_SIZE) / 512, + return put_user((bdi->ra_pages * PAGE_CACHE_SIZE) / 512, (long *)arg); case BLKSECTGET: diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 5430dea71325..51fd5be00995 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -27,6 +27,7 @@ #include <linux/completion.h> #include <linux/compiler.h> #include <scsi/scsi.h> +#include <linux/backing-dev.h> #include <asm/system.h> #include <asm/io.h> @@ -100,21 +101,21 @@ inline request_queue_t *blk_get_queue(kdev_t dev) } /** - * blk_get_ra_pages - get the address of a queue's readahead tunable + * blk_get_backing_dev_info - get the address of a queue's backing_dev_info * @dev: device * * Locates the passed device's request queue and returns the address of its - * readahead setting. + * backing_dev_info * * Will return NULL if the request queue cannot be located. */ -unsigned long *blk_get_ra_pages(struct block_device *bdev) +struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) { - unsigned long *ret = NULL; + struct backing_dev_info *ret = NULL; request_queue_t *q = blk_get_queue(to_kdev_t(bdev->bd_dev)); if (q) - ret = &q->ra_pages; + ret = &q->backing_dev_info; return ret; } @@ -153,7 +154,8 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) q->max_phys_segments = MAX_PHYS_SEGMENTS; q->max_hw_segments = MAX_HW_SEGMENTS; q->make_request_fn = mfn; - q->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; + q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; + q->backing_dev_info.state = 0; blk_queue_max_sectors(q, MAX_SECTORS); blk_queue_hardsect_size(q, 512); diff --git a/fs/block_dev.c b/fs/block_dev.c index f9326d65a756..76c5e5cf0555 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -331,7 +331,7 @@ struct block_device *bdget(dev_t dev) inode->i_bdev = new_bdev; inode->i_data.a_ops = &def_blk_aops; inode->i_data.gfp_mask = GFP_USER; - inode->i_data.ra_pages = &default_ra_pages; + inode->i_data.backing_dev_info = &default_backing_dev_info; spin_lock(&bdev_lock); bdev = bdfind(dev, head); if (!bdev) { @@ -594,11 +594,12 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file * } } } - if (bdev->bd_inode->i_data.ra_pages == &default_ra_pages) { - unsigned long *ra_pages = blk_get_ra_pages(bdev); - if (ra_pages == NULL) - ra_pages = &default_ra_pages; - inode->i_data.ra_pages = ra_pages; + if (bdev->bd_inode->i_data.backing_dev_info == + &default_backing_dev_info) { + struct backing_dev_info *bdi = blk_get_backing_dev_info(bdev); + if (bdi == NULL) + bdi = &default_backing_dev_info; + inode->i_data.backing_dev_info = bdi; } if (bdev->bd_op->open) { ret = bdev->bd_op->open(inode, file); @@ -624,7 +625,7 @@ static int do_open(struct block_device *bdev, struct inode *inode, struct file * out2: if (!bdev->bd_openers) { bdev->bd_op = NULL; - bdev->bd_inode->i_data.ra_pages = &default_ra_pages; + bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) { blkdev_put(bdev->bd_contains, BDEV_RAW); bdev->bd_contains = NULL; @@ -698,7 +699,7 @@ int blkdev_put(struct block_device *bdev, int kind) __MOD_DEC_USE_COUNT(bdev->bd_op->owner); if (!bdev->bd_openers) { bdev->bd_op = NULL; - bdev->bd_inode->i_data.ra_pages = &default_ra_pages; + bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) { blkdev_put(bdev->bd_contains, BDEV_RAW); bdev->bd_contains = NULL; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d1880be27437..139283a310a6 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -19,6 +19,7 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/writeback.h> +#include <linux/backing-dev.h> /** * __mark_inode_dirty - internal function @@ -508,3 +509,40 @@ int generic_osync_inode(struct inode *inode, int what) return err; } + +/** + * writeback_acquire: attempt to get exclusive writeback access to a device + * @bdi: the device's backing_dev_info structure + * + * It is a waste of resources to have more than one pdflush thread blocked on + * a single request queue. Exclusion at the request_queue level is obtained + * via a flag in the request_queue's backing_dev_info.state. + * + * Non-request_queue-backed address_spaces will share default_backing_dev_info, + * unless they implement their own. Which is somewhat inefficient, as this + * may prevent concurrent writeback against multiple devices. + */ +int writeback_acquire(struct backing_dev_info *bdi) +{ + return !test_and_set_bit(BDI_pdflush, &bdi->state); +} + +/** + * writeback_in_progress: determine whether there is writeback in progress + * against a backing device. + * @bdi: the device's backing_dev_info structure. + */ +int writeback_in_progress(struct backing_dev_info *bdi) +{ + return test_bit(BDI_pdflush, &bdi->state); +} + +/** + * writeback_release: relinquish exclusive writeback access against a device. + * @bdi: the device's backing_dev_info structure + */ +void writeback_release(struct backing_dev_info *bdi) +{ + BUG_ON(!writeback_in_progress(bdi)); + clear_bit(BDI_pdflush, &bdi->state); +} diff --git a/fs/inode.c b/fs/inode.c index fc748da51c0c..1c1256a5f799 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -12,6 +12,7 @@ #include <linux/quotaops.h> #include <linux/slab.h> #include <linux/writeback.h> +#include <linux/backing-dev.h> /* * New inode.c implementation. @@ -83,6 +84,8 @@ static struct inode *alloc_inode(struct super_block *sb) inode = (struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL); if (inode) { + struct address_space * const mapping = &inode->i_data; + inode->i_sb = sb; inode->i_dev = sb->s_dev; inode->i_blkbits = sb->s_blocksize_bits; @@ -100,16 +103,17 @@ static struct inode *alloc_inode(struct super_block *sb) inode->i_pipe = NULL; inode->i_bdev = NULL; inode->i_cdev = NULL; - inode->i_data.a_ops = &empty_aops; - inode->i_data.host = inode; - inode->i_data.gfp_mask = GFP_HIGHUSER; - inode->i_data.dirtied_when = 0; - inode->i_mapping = &inode->i_data; - inode->i_data.ra_pages = &default_ra_pages; - inode->i_data.assoc_mapping = NULL; + + mapping->a_ops = &empty_aops; + mapping->host = inode; + mapping->gfp_mask = GFP_HIGHUSER; + mapping->dirtied_when = 0; + mapping->assoc_mapping = NULL; + mapping->backing_dev_info = &default_backing_dev_info; if (sb->s_bdev) - inode->i_data.ra_pages = sb->s_bdev->bd_inode->i_mapping->ra_pages; + inode->i_data.backing_dev_info = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; memset(&inode->u, 0, sizeof(inode->u)); + inode->i_mapping = mapping; } return inode; } diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index b51def4b641c..546eb46bb51a 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -26,6 +26,7 @@ #include <linux/locks.h> #include <linux/spinlock.h> #include <linux/blkdev.h> /* For bdev_hardsect_size(). */ +#include <linux/backing-dev.h> #include "ntfs.h" #include "sysctl.h" @@ -1519,8 +1520,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) vol->mftbmp_mapping.assoc_mapping = NULL; vol->mftbmp_mapping.dirtied_when = 0; vol->mftbmp_mapping.gfp_mask = GFP_HIGHUSER; - vol->mftbmp_mapping.ra_pages = - sb->s_bdev->bd_inode->i_mapping->ra_pages; + vol->mftbmp_mapping.backing_dev_info = + sb->s_bdev->bd_inode->i_mapping->backing_dev_info; /* * Default is group and other don't have any access to files or diff --git a/fs/open.c b/fs/open.c index e0231b191336..2ef917feadb7 100644 --- a/fs/open.c +++ b/fs/open.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/tty.h> #include <linux/iobuf.h> +#include <linux/backing-dev.h> #include <asm/uaccess.h> @@ -632,7 +633,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) goto cleanup_file; } - f->f_ra.ra_pages = *inode->i_mapping->ra_pages; + f->f_ra.ra_pages = inode->i_mapping->backing_dev_info->ra_pages; f->f_dentry = dentry; f->f_vfsmnt = mnt; f->f_pos = 0; diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h new file mode 100644 index 000000000000..075cacc389e1 --- /dev/null +++ b/include/linux/backing-dev.h @@ -0,0 +1,30 @@ +/* + * include/linux/backing-dev.h + * + * low-level device information and state which is propagated up through + * to high-level code. + */ + +#ifndef _LINUX_BACKING_DEV_H +#define _LINUX_BACKING_DEV_H + +/* + * Bits in backing_dev_info.state + */ +enum bdi_state { + BDI_pdflush, /* A pdflush thread is working this device */ + BDI_unused, /* Available bits start here */ +}; + +struct backing_dev_info { + unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ + unsigned long state; /* Always use atomic bitops on this */ +}; + +extern struct backing_dev_info default_backing_dev_info; + +int writeback_acquire(struct backing_dev_info *bdi); +int writeback_in_progress(struct backing_dev_info *bdi); +void writeback_release(struct backing_dev_info *bdi); + +#endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d8175ccc104c..ac373e6a2454 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -7,6 +7,7 @@ #include <linux/tqueue.h> #include <linux/list.h> #include <linux/pagemap.h> +#include <linux/backing-dev.h> #include <asm/scatterlist.h> @@ -162,11 +163,7 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; - /* - * The VM-level readahead tunable for this device. In - * units of PAGE_CACHE_SIZE pages. - */ - unsigned long ra_pages; + struct backing_dev_info backing_dev_info; /* * The queue owner gets to use this for whatever they like. @@ -328,7 +325,7 @@ extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short); extern void blk_queue_segment_boundary(request_queue_t *q, unsigned long); extern void blk_queue_assign_lock(request_queue_t *q, spinlock_t *); extern void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn); -extern unsigned long *blk_get_ra_pages(struct block_device *bdev); +extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 25578c7a5e62..374045884cb8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -305,6 +305,7 @@ struct address_space_operations { int (*direct_IO)(int, struct inode *, struct kiobuf *, unsigned long, int); }; +struct backing_dev_info; struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ @@ -320,7 +321,7 @@ struct address_space { spinlock_t i_shared_lock; /* and spinlock protecting it */ unsigned long dirtied_when; /* jiffies of first page dirtying */ int gfp_mask; /* how to allocate the pages */ - unsigned long *ra_pages; /* device readahead */ + struct backing_dev_info *backing_dev_info; /* device readahead, etc */ spinlock_t private_lock; /* for use by the address_space */ struct list_head private_list; /* ditto */ struct address_space *assoc_mapping; /* ditto */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 2f0b56f0183b..451cdff1ec16 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -454,7 +454,6 @@ void do_page_cache_readahead(struct file *file, void page_cache_readahead(struct file *file, unsigned long offset); void page_cache_readaround(struct file *file, unsigned long offset); void handle_ra_thrashing(struct file *file); -extern unsigned long default_ra_pages; /* vma is the first one with address < vma->vm_end, * and even address < vma->vm_start. Have to extend vma. */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 661f1860880c..e2c65e1057df 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -166,6 +166,7 @@ int pdflush_flush(unsigned long nr_pages) * to perform their I/O against a large file. */ static int wb_writeback_jifs = 5 * HZ; +static struct timer_list wb_timer; /* * Periodic writeback of "old" data. @@ -206,16 +207,11 @@ static void wb_kupdate(unsigned long arg) yield(); } run_task_queue(&tq_disk); + mod_timer(&wb_timer, jiffies + wb_writeback_jifs); } -/* - * The writeback timer, for kupdate-style functionality - */ -static struct timer_list wb_timer; - static void wb_timer_fn(unsigned long unused) { - mod_timer(&wb_timer, jiffies + wb_writeback_jifs); pdflush_operation(wb_kupdate, 0); } diff --git a/mm/pdflush.c b/mm/pdflush.c index 07ceb439e9ae..5e7d0125c39d 100644 --- a/mm/pdflush.c +++ b/mm/pdflush.c @@ -103,6 +103,7 @@ static int __pdflush(struct pdflush_work *my_work) preempt_disable(); spin_lock_irq(&pdflush_lock); nr_pdflush_threads++; +// printk("pdflush %d [%d] starts\n", nr_pdflush_threads, current->pid); for ( ; ; ) { struct pdflush_work *pdf; @@ -124,7 +125,7 @@ static int __pdflush(struct pdflush_work *my_work) if (jiffies - last_empty_jifs > 1 * HZ) { /* unlocked list_empty() test is OK here */ if (list_empty(&pdflush_list)) { - /* unlocked nr_pdflush_threads test is OK here */ + /* unlocked test is OK here */ if (nr_pdflush_threads < MAX_PDFLUSH_THREADS) start_one_pdflush_thread(); } @@ -147,6 +148,7 @@ static int __pdflush(struct pdflush_work *my_work) } } nr_pdflush_threads--; +// printk("pdflush %d [%d] ends\n", nr_pdflush_threads, current->pid); spin_unlock_irq(&pdflush_lock); preempt_enable(); return 0; diff --git a/mm/readahead.c b/mm/readahead.c index b59f8f4c57bc..03fd19c23bbb 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -11,8 +11,12 @@ #include <linux/fs.h> #include <linux/mm.h> #include <linux/blkdev.h> +#include <linux/backing-dev.h> -unsigned long default_ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; +struct backing_dev_info default_backing_dev_info = { + ra_pages: (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE, + state: 0, +}; /* * Return max readahead size for this inode in number-of-pages. |
