summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Morton <akpm@zip.com.au>2002-08-14 21:20:57 -0700
committerLinus Torvalds <torvalds@home.transmeta.com>2002-08-14 21:20:57 -0700
commit9eb76ee2a6f64fe412bef315eccbb1dd63a203ae (patch)
tree71651d8b58f95a04a53598107877bc169c42911b
parent823e0df87c01883c05b3ee0f1c1d109a56d22cd3 (diff)
[PATCH] batched addition of pages to the LRU
The patch goes through the various places which were calling lru_cache_add() against bulk pages and batches them up. Also. This whole patch series improves the behaviour of the system under heavy writeback load. There is a reduction in page allocation failures, some reduction in loss of interactivity due to page allocators getting stuck on writeback from the VM. (This is still bad though). I think it's due to the change here in mpage_writepages(). That function was originally unconditionally refiling written-back pages to the head of the inactive list. The theory being that they should be moved out of the way of page allocators, who would end up waiting on them. It appears that this simply had the effect of pushing dirty, unwritten data closer to the tail of the inactive list, making things worse. So instead, if the caller is (typically) balance_dirty_pages() then leave the pages where they are on the LRU. If the caller is PF_MEMALLOC then the pages *have* to be refiled. This is because VM writeback is clustered along mapping->dirty_pages, and it's almost certain that the pages which are being written are near the tail of the LRU. If they were left there, page allocators would block on them too soon. It would effectively become a synchronous write.
-rw-r--r--fs/mpage.c14
-rw-r--r--include/linux/pagemap.h2
-rw-r--r--mm/filemap.c49
-rw-r--r--mm/readahead.c13
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/swap_state.c6
6 files changed, 65 insertions, 21 deletions
diff --git a/fs/mpage.c b/fs/mpage.c
index f098220fcf8e..b4a678288565 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -263,18 +263,25 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
struct bio *bio = NULL;
unsigned page_idx;
sector_t last_block_in_bio = 0;
+ struct pagevec lru_pvec;
+ pagevec_init(&lru_pvec);
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_entry(pages->prev, struct page, list);
prefetchw(&page->flags);
list_del(&page->list);
- if (!add_to_page_cache(page, mapping, page->index))
+ if (!add_to_page_cache(page, mapping, page->index)) {
bio = do_mpage_readpage(bio, page,
nr_pages - page_idx,
&last_block_in_bio, get_block);
- page_cache_release(page);
+ if (!pagevec_add(&lru_pvec, page))
+ __pagevec_lru_add(&lru_pvec);
+ } else {
+ page_cache_release(page);
+ }
}
+ pagevec_lru_add(&lru_pvec);
BUG_ON(!list_empty(pages));
if (bio)
mpage_bio_submit(READ, bio);
@@ -566,7 +573,8 @@ mpage_writepages(struct address_space *mapping,
bio = mpage_writepage(bio, page, get_block,
&last_block_in_bio, &ret);
}
- if (!PageActive(page) && PageLRU(page)) {
+ if ((current->flags & PF_MEMALLOC) &&
+ !PageActive(page) && PageLRU(page)) {
if (!pagevec_add(&pvec, page))
pagevec_deactivate_inactive(&pvec);
page = NULL;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b559ccd68520..69e214920908 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -58,6 +58,8 @@ extern struct page * read_cache_page(struct address_space *mapping,
extern int add_to_page_cache(struct page *page,
struct address_space *mapping, unsigned long index);
+extern int add_to_page_cache_lru(struct page *page,
+ struct address_space *mapping, unsigned long index);
extern void remove_from_page_cache(struct page *page);
extern void __remove_from_page_cache(struct page *page);
diff --git a/mm/filemap.c b/mm/filemap.c
index 68ad674fa961..454786425aa0 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -21,6 +21,7 @@
#include <linux/iobuf.h>
#include <linux/hash.h>
#include <linux/writeback.h>
+#include <linux/pagevec.h>
#include <linux/security.h>
/*
* This is needed for the following functions:
@@ -530,27 +531,37 @@ int filemap_fdatawait(struct address_space * mapping)
* In the case of swapcache, try_to_swap_out() has already locked the page, so
* SetPageLocked() is ugly-but-OK there too. The required page state has been
* set up by swap_out_add_to_swap_cache().
+ *
+ * This function does not add the page to the LRU. The caller must do that.
*/
int add_to_page_cache(struct page *page,
- struct address_space *mapping, unsigned long offset)
+ struct address_space *mapping, pgoff_t offset)
{
int error;
+ page_cache_get(page);
write_lock(&mapping->page_lock);
error = radix_tree_insert(&mapping->page_tree, offset, page);
if (!error) {
SetPageLocked(page);
ClearPageDirty(page);
___add_to_page_cache(page, mapping, offset);
- page_cache_get(page);
+ } else {
+ page_cache_release(page);
}
write_unlock(&mapping->page_lock);
- /* Anon pages are already on the LRU */
- if (!error && !PageSwapCache(page))
- lru_cache_add(page);
return error;
}
+int add_to_page_cache_lru(struct page *page,
+ struct address_space *mapping, pgoff_t offset)
+{
+ int ret = add_to_page_cache(page, mapping, offset);
+ if (ret == 0)
+ lru_cache_add(page);
+ return ret;
+}
+
/*
* This adds the requested page to the page cache if it isn't already there,
* and schedules an I/O to read in its contents from disk.
@@ -566,7 +577,7 @@ static int page_cache_read(struct file * file, unsigned long offset)
if (!page)
return -ENOMEM;
- error = add_to_page_cache(page, mapping, offset);
+ error = add_to_page_cache_lru(page, mapping, offset);
if (!error) {
error = mapping->a_ops->readpage(file, page);
page_cache_release(page);
@@ -797,7 +808,7 @@ repeat:
if (!cached_page)
return NULL;
}
- err = add_to_page_cache(cached_page, mapping, index);
+ err = add_to_page_cache_lru(cached_page, mapping, index);
if (!err) {
page = cached_page;
cached_page = NULL;
@@ -830,7 +841,7 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
return NULL;
}
page = alloc_pages(mapping->gfp_mask & ~__GFP_FS, 0);
- if (page && add_to_page_cache(page, mapping, index)) {
+ if (page && add_to_page_cache_lru(page, mapping, index)) {
page_cache_release(page);
page = NULL;
}
@@ -994,7 +1005,7 @@ no_cached_page:
break;
}
}
- error = add_to_page_cache(cached_page, mapping, index);
+ error = add_to_page_cache_lru(cached_page, mapping, index);
if (error) {
if (error == -EEXIST)
goto find_page;
@@ -1704,7 +1715,7 @@ repeat:
if (!cached_page)
return ERR_PTR(-ENOMEM);
}
- err = add_to_page_cache(cached_page, mapping, index);
+ err = add_to_page_cache_lru(cached_page, mapping, index);
if (err == -EEXIST)
goto repeat;
if (err < 0) {
@@ -1764,8 +1775,14 @@ retry:
return page;
}
-static inline struct page * __grab_cache_page(struct address_space *mapping,
- unsigned long index, struct page **cached_page)
+/*
+ * If the page was newly created, increment its refcount and add it to the
+ * caller's lru-buffering pagevec. This function is specifically for
+ * generic_file_write().
+ */
+static inline struct page *
+__grab_cache_page(struct address_space *mapping, unsigned long index,
+ struct page **cached_page, struct pagevec *lru_pvec)
{
int err;
struct page *page;
@@ -1782,6 +1799,9 @@ repeat:
goto repeat;
if (err == 0) {
page = *cached_page;
+ page_cache_get(page);
+ if (!pagevec_add(lru_pvec, page))
+ __pagevec_lru_add(lru_pvec);
*cached_page = NULL;
}
}
@@ -1828,6 +1848,7 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf,
int err;
unsigned bytes;
time_t time_now;
+ struct pagevec lru_pvec;
if (unlikely((ssize_t)count < 0))
return -EINVAL;
@@ -1949,6 +1970,7 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf,
goto out_status;
}
+ pagevec_init(&lru_pvec);
do {
unsigned long index;
unsigned long offset;
@@ -1972,7 +1994,7 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf,
__get_user(dummy, buf+bytes-1);
}
- page = __grab_cache_page(mapping, index, &cached_page);
+ page = __grab_cache_page(mapping, index, &cached_page, &lru_pvec);
if (!page) {
status = -ENOMEM;
break;
@@ -2034,6 +2056,7 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf,
out_status:
err = written ? written : status;
out:
+ pagevec_lru_add(&lru_pvec);
return err;
}
diff --git a/mm/readahead.c b/mm/readahead.c
index 194f56db2ae4..209c9813525d 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -12,6 +12,7 @@
#include <linux/mm.h>
#include <linux/blkdev.h>
#include <linux/backing-dev.h>
+#include <linux/pagevec.h>
struct backing_dev_info default_backing_dev_info = {
.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE,
@@ -36,6 +37,9 @@ read_pages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
unsigned page_idx;
+ struct pagevec lru_pvec;
+
+ pagevec_init(&lru_pvec);
if (mapping->a_ops->readpages)
return mapping->a_ops->readpages(mapping, pages, nr_pages);
@@ -43,10 +47,15 @@ read_pages(struct file *file, struct address_space *mapping,
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_entry(pages->prev, struct page, list);
list_del(&page->list);
- if (!add_to_page_cache(page, mapping, page->index))
+ if (!add_to_page_cache(page, mapping, page->index)) {
+ if (!pagevec_add(&lru_pvec, page))
+ __pagevec_lru_add(&lru_pvec);
mapping->a_ops->readpage(file, page);
- page_cache_release(page);
+ } else {
+ page_cache_release(page);
+ }
}
+ pagevec_lru_add(&lru_pvec);
return 0;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index abb0685049e2..e4f8340f7a5d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -668,7 +668,7 @@ repeat:
page = page_cache_alloc(mapping);
if (!page)
goto no_mem;
- error = add_to_page_cache(page, mapping, idx);
+ error = add_to_page_cache_lru(page, mapping, idx);
if (error < 0) {
page_cache_release(page);
goto no_mem;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 332e0d26732d..a69e81415c2b 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -71,6 +71,9 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry)
return -ENOENT;
}
error = add_to_page_cache(page, &swapper_space, entry.val);
+ /*
+ * Anon pages are already on the LRU, we don't run lru_cache_add here.
+ */
if (error != 0) {
swap_free(entry);
if (error == -EEXIST)
@@ -275,8 +278,7 @@ int move_from_swap_cache(struct page *page, unsigned long index,
SetPageDirty(page);
___add_to_page_cache(page, mapping, index);
/* fix that up */
- list_del(&page->list);
- list_add(&page->list, &mapping->dirty_pages);
+ list_move(&page->list, &mapping->dirty_pages);
write_unlock(&mapping->page_lock);
write_unlock(&swapper_space.page_lock);