diff options
| author | Andrew Morton <akpm@zip.com.au> | 2002-05-27 05:12:50 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@home.transmeta.com> | 2002-05-27 05:12:50 -0700 |
| commit | ab9e89416c2a7ade1f1ef56b31935aefc82412cc (patch) | |
| tree | cee04f356d01a14240bb75816b1550ce0c65d566 | |
| parent | bc67de559fb1ceda7a34ad67f03b643399dfa284 (diff) | |
[PATCH] direct-to-BIO writeback
Multipage BIO writeout from the pagecache.
It's pretty much the same as multipage reads. It falls back to buffers
if things got complex.
The write case is a little more complex because it handles pages which
have buffers and pages which do not. If the page didn't have buffers
this code does not add them.
| -rw-r--r-- | fs/buffer.c | 13 | ||||
| -rw-r--r-- | fs/ext2/inode.c | 2 | ||||
| -rw-r--r-- | fs/mpage.c | 275 | ||||
| -rw-r--r-- | include/linux/buffer_head.h | 1 | ||||
| -rw-r--r-- | include/linux/mpage.h | 3 |
5 files changed, 288 insertions, 6 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index 96d2b68a057c..68349fbe7be5 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1448,11 +1448,11 @@ EXPORT_SYMBOL(create_empty_buffers); * wait on that I/O in bforget() - it's more efficient to wait on the I/O * only if we really need to. That happens here. */ -static void unmap_underlying_metadata(struct buffer_head *bh) +void unmap_underlying_metadata(struct block_device *bdev, sector_t block) { struct buffer_head *old_bh; - old_bh = __get_hash_table(bh->b_bdev, bh->b_blocknr, 0); + old_bh = __get_hash_table(bdev, block, 0); if (old_bh) { #if 0 /* This happens. Later. */ if (buffer_dirty(old_bh)) @@ -1548,7 +1548,8 @@ static int __block_write_full_page(struct inode *inode, if (buffer_new(bh)) { /* blockdev mappings never come here */ clear_buffer_new(bh); - unmap_underlying_metadata(bh); + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); } } bh = bh->b_this_page; @@ -1689,7 +1690,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page, goto out; if (buffer_new(bh)) { clear_buffer_new(bh); - unmap_underlying_metadata(bh); + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); if (PageUptodate(page)) { if (!buffer_mapped(bh)) buffer_error(); @@ -2191,7 +2193,8 @@ int generic_direct_IO(int rw, struct inode *inode, } } else { if (buffer_new(&bh)) - unmap_underlying_metadata(&bh); + unmap_underlying_metadata(bh.b_bdev, + bh.b_blocknr); if (!buffer_mapped(&bh)) BUG(); } diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index a2f7b186bdc1..add7a0026e19 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -622,7 +622,7 @@ ext2_writeback_mapping(struct address_space *mapping, int *nr_to_write) int err; ret = write_mapping_buffers(mapping); - err = generic_writeback_mapping(mapping, nr_to_write); + err = mpage_writeback_mapping(mapping, nr_to_write, ext2_get_block); if (!ret) ret = err; return ret; diff --git a/fs/mpage.c b/fs/mpage.c index 0eca6f42f4d0..bcdac3145537 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -60,11 +60,31 @@ static void mpage_end_io_read(struct bio *bio) bio_put(bio); } +static void mpage_end_io_write(struct bio *bio) +{ + const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; + + do { + struct page *page = bvec->bv_page; + + if (--bvec >= bio->bi_io_vec) + prefetchw(&bvec->bv_page->flags); + + if (!uptodate) + SetPageError(page); + end_page_writeback(page); + } while (bvec >= bio->bi_io_vec); + bio_put(bio); +} + struct bio *mpage_bio_submit(int rw, struct bio *bio) { bio->bi_vcnt = bio->bi_idx; bio->bi_idx = 0; bio->bi_end_io = mpage_end_io_read; + if (rw == WRITE) + bio->bi_end_io = mpage_end_io_write; submit_bio(rw, bio); return NULL; } @@ -270,3 +290,258 @@ int mpage_readpage(struct page *page, get_block_t get_block) return 0; } EXPORT_SYMBOL(mpage_readpage); + +/* + * Writing is not so simple. + * + * If the page has buffers then they will be used for obtaining the disk + * mapping. We only support pages which are fully mapped-and-dirty, with a + * special case for pages which are unmapped at the end: end-of-file. + * + * If the page has no buffers (preferred) then the page is mapped here. + * + * If all blocks are found to be contiguous then the page can go into the + * BIO. Otherwise fall back to block_write_full_page(). + * + * FIXME: This code wants an estimate of how many pages are still to be + * written, so it can intelligently allocate a suitably-sized BIO. For now, + * just allocate full-size (16-page) BIOs. + */ +static /* inline */ struct bio * +mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block, + sector_t *last_block_in_bio, int *ret) +{ + struct inode *inode = page->mapping->host; + const unsigned blkbits = inode->i_blkbits; + unsigned long end_index; + const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits; + struct bio_vec *bvec; + sector_t last_block; + sector_t block_in_file; + sector_t blocks[MAX_BUF_PER_PAGE]; + unsigned page_block; + unsigned first_unmapped = blocks_per_page; + struct block_device *bdev = NULL; + int boundary = 0; + + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + + /* If they're all mapped and dirty, do it */ + page_block = 0; + do { + BUG_ON(buffer_locked(bh)); + if (!buffer_mapped(bh)) { + /* + * unmapped dirty buffers are created by + * __set_page_dirty_buffers -> mmapped data + */ + if (buffer_dirty(bh)) + goto confused; + if (first_unmapped == blocks_per_page) + first_unmapped = page_block; + continue; + } + + if (first_unmapped != blocks_per_page) + goto confused; /* hole -> non-hole */ + + if (!buffer_dirty(bh) || !buffer_uptodate(bh)) + goto confused; + if (page_block) { + if (bh->b_blocknr != blocks[page_block-1] + 1) + goto confused; + } + blocks[page_block++] = bh->b_blocknr; + boundary = buffer_boundary(bh); + bdev = bh->b_bdev; + } while ((bh = bh->b_this_page) != head); + + if (first_unmapped) + goto page_is_mapped; + + /* + * Page has buffers, but they are all unmapped. The page was + * created by pagein or read over a hole which was handled by + * block_read_full_page(). If this address_space is also + * using mpage_readpages then this can rarely happen. + */ + goto confused; + } + + /* + * The page has no buffers: map it to disk + */ + BUG_ON(!PageUptodate(page)); + block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); + last_block = (inode->i_size - 1) >> blkbits; + for (page_block = 0; page_block < blocks_per_page; ) { + struct buffer_head map_bh; + + map_bh.b_state = 0; + if (get_block(inode, block_in_file, &map_bh, 1)) + goto confused; + if (buffer_new(&map_bh)) + unmap_underlying_metadata(map_bh.b_bdev, + map_bh.b_blocknr); + if (page_block) { + if (map_bh.b_blocknr != blocks[page_block-1] + 1) + goto confused; + } + blocks[page_block++] = map_bh.b_blocknr; + boundary = buffer_boundary(&map_bh); + bdev = map_bh.b_bdev; + if (block_in_file == last_block) + break; + block_in_file++; + } + if (page_block == 0) + buffer_error(); + + first_unmapped = page_block; + + end_index = inode->i_size >> PAGE_CACHE_SHIFT; + if (page->index >= end_index) { + unsigned offset = inode->i_size & (PAGE_CACHE_SIZE - 1); + + if (page->index > end_index || !offset) + goto confused; + memset(kmap(page) + offset, 0, PAGE_CACHE_SIZE - offset); + flush_dcache_page(page); + kunmap(page); + } + +page_is_mapped: + + /* + * This page will go to BIO. Do we need to send this BIO off first? + */ + if (bio && (bio->bi_idx == bio->bi_vcnt || + *last_block_in_bio != blocks[0] - 1)) + bio = mpage_bio_submit(WRITE, bio); + + if (bio == NULL) { + unsigned nr_bvecs = MPAGE_BIO_MAX_SIZE / PAGE_CACHE_SIZE; + + bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), + nr_bvecs, GFP_NOFS); + if (bio == NULL) + goto confused; + } + + /* + * OK, we have our BIO, so we can now mark the buffers clean. Make + * sure to only clean buffers which we know we'll be writing. + */ + if (page_has_buffers(page)) { + struct buffer_head *head = page_buffers(page); + struct buffer_head *bh = head; + unsigned buffer_counter = 0; + + do { + if (buffer_counter++ == first_unmapped) + break; + clear_buffer_dirty(bh); + bh = bh->b_this_page; + } while (bh != head); + } + + bvec = &bio->bi_io_vec[bio->bi_idx++]; + bvec->bv_page = page; + bvec->bv_len = (first_unmapped << blkbits); + bvec->bv_offset = 0; + bio->bi_size += bvec->bv_len; + BUG_ON(PageWriteback(page)); + SetPageWriteback(page); + unlock_page(page); + if (boundary || (first_unmapped != blocks_per_page)) + bio = mpage_bio_submit(WRITE, bio); + else + *last_block_in_bio = blocks[blocks_per_page - 1]; + goto out; + +confused: + if (bio) + bio = mpage_bio_submit(WRITE, bio); + *ret = block_write_full_page(page, get_block); +out: + return bio; +} + +/* + * This is a cut-n-paste of generic_writeback_mapping(). We _could_ + * generalise that function. It'd get a bit messy. We'll see. + */ +int +mpage_writeback_mapping(struct address_space *mapping, + int *nr_to_write, get_block_t get_block) +{ + struct bio *bio = NULL; + sector_t last_block_in_bio = 0; + int ret = 0; + int done = 0; + + write_lock(&mapping->page_lock); + + list_splice(&mapping->dirty_pages, &mapping->io_pages); + INIT_LIST_HEAD(&mapping->dirty_pages); + + while (!list_empty(&mapping->io_pages) && !done) { + struct page *page = list_entry(mapping->io_pages.prev, + struct page, list); + list_del(&page->list); + if (PageWriteback(page)) { + if (PageDirty(page)) { + list_add(&page->list, &mapping->dirty_pages); + continue; + } + list_add(&page->list, &mapping->locked_pages); + continue; + } + if (!PageDirty(page)) { + list_add(&page->list, &mapping->clean_pages); + continue; + } + list_add(&page->list, &mapping->locked_pages); + + page_cache_get(page); + write_unlock(&mapping->page_lock); + + lock_page(page); + + if (page->mapping && TestClearPageDirty(page) && + !PageWriteback(page)) { + /* FIXME: batch this up */ + if (!PageActive(page) && PageLRU(page)) { + spin_lock(&pagemap_lru_lock); + if (!PageActive(page) && PageLRU(page)) { + list_del(&page->lru); + list_add(&page->lru, &inactive_list); + } + spin_unlock(&pagemap_lru_lock); + } + bio = mpage_writepage(bio, page, get_block, + &last_block_in_bio, &ret); + if (ret || (nr_to_write && --(*nr_to_write) <= 0)) + done = 1; + } else { + unlock_page(page); + } + + page_cache_release(page); + write_lock(&mapping->page_lock); + } + if (!list_empty(&mapping->io_pages)) { + /* + * Put the rest back, in the correct order. + */ + list_splice(&mapping->io_pages, mapping->dirty_pages.prev); + INIT_LIST_HEAD(&mapping->io_pages); + } + write_unlock(&mapping->page_lock); + if (bio) + mpage_bio_submit(WRITE, bio); + return ret; +} +EXPORT_SYMBOL(mpage_writeback_mapping); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 7550c3bfb7c0..402a3af8c41e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -162,6 +162,7 @@ int inode_has_buffers(struct inode *); void invalidate_inode_buffers(struct inode *); int fsync_buffers_list(spinlock_t *lock, struct list_head *); int sync_mapping_buffers(struct address_space *mapping); +void unmap_underlying_metadata(struct block_device *bdev, sector_t block); void mark_buffer_async_read(struct buffer_head *bh); void mark_buffer_async_write(struct buffer_head *bh); diff --git a/include/linux/mpage.h b/include/linux/mpage.h index f41ee607416f..068cc6080253 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -13,3 +13,6 @@ int mpage_readpages(struct address_space *mapping, struct list_head *pages, unsigned nr_pages, get_block_t get_block); int mpage_readpage(struct page *page, get_block_t get_block); +int mpage_writeback_mapping(struct address_space *mapping, + int *nr_to_write, get_block_t get_block); + |
