diff options
| author | Andrew Morton <akpm@zip.com.au> | 2002-08-27 21:03:50 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@penguin.transmeta.com> | 2002-08-27 21:03:50 -0700 |
| commit | a8382cf1153689a1caac0e707e951e7869bb92e1 (patch) | |
| tree | 71e2722fd8fd5e08fb7862171f8fdb1443ce31c6 | |
| parent | e6f0e61d9ed94134f57bcf6c72b81848b9d3c2fe (diff) | |
[PATCH] per-zone LRU locking
Now the LRUs are per-zone, make their lock per-zone as well.
In this patch the per-zone lock shares a cacheline with the zone's
buddy list lock, which is very bad. Some groundwork is needed to fix
this well.
This change is expected to be a significant win on NUMA, where most
page allocation comes from the local node's zones.
For NUMA the `struct zone' itself should really be placed in that
node's memory, which is something the platform owners should look at.
However the internode cache will help here.
Per-node kswapd would make heaps of sense too.
| -rw-r--r-- | include/linux/mm.h | 2 | ||||
| -rw-r--r-- | include/linux/mmzone.h | 1 | ||||
| -rw-r--r-- | include/linux/page-flags.h | 2 | ||||
| -rw-r--r-- | include/linux/swap.h | 2 | ||||
| -rw-r--r-- | mm/filemap.c | 1 | ||||
| -rw-r--r-- | mm/page_alloc.c | 3 | ||||
| -rw-r--r-- | mm/rmap.c | 6 | ||||
| -rw-r--r-- | mm/swap.c | 99 | ||||
| -rw-r--r-- | mm/vmscan.c | 34 |
9 files changed, 82 insertions, 68 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 7416dae6b550..045a861e4024 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -157,7 +157,7 @@ struct page { struct address_space *mapping; /* The inode (or ...) we belong to. */ unsigned long index; /* Our offset within mapping. */ struct list_head lru; /* Pageout list, eg. active_list; - protected by pagemap_lru_lock !! */ + protected by zone->lru_lock !! */ union { struct pte_chain * chain; /* Reverse pte mapping pointer. * protected by PG_chainlock */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 928000348e6b..f62e36b902a2 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -44,6 +44,7 @@ struct zone { unsigned long pages_min, pages_low, pages_high; int need_balance; + spinlock_t lru_lock; struct list_head active_list; struct list_head inactive_list; atomic_t refill_counter; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index cc74c699f3ad..5a49020e728b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -28,7 +28,7 @@ * * Note that the referenced bit, the page->lru list_head and the active, * inactive_dirty and inactive_clean lists are protected by the - * pagemap_lru_lock, and *NOT* by the usual PG_locked bit! + * zone->lru_lock, and *NOT* by the usual PG_locked bit! * * PG_error is set to indicate that an I/O error occurred on this page. * diff --git a/include/linux/swap.h b/include/linux/swap.h index d9a4a9dc00ce..affa89d77eb1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -209,8 +209,6 @@ extern struct swap_list_t swap_list; asmlinkage long sys_swapoff(const char *); asmlinkage long sys_swapon(const char *, int); -extern spinlock_t _pagemap_lru_lock; - extern void FASTCALL(mark_page_accessed(struct page *)); extern spinlock_t swaplock; diff --git a/mm/filemap.c b/mm/filemap.c index 1b4d27d6b7cd..bf748dabf67d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -61,7 +61,6 @@ * ->inode_lock (__mark_inode_dirty) * ->sb_lock (fs/fs-writeback.c) */ -spinlock_t _pagemap_lru_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; /* * Remove a page from the page cache and free it. Caller has to make diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 64f2434ab7b1..bc37f860b0cf 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -828,7 +828,8 @@ void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap, printk("zone(%lu): %lu pages.\n", j, size); zone->size = size; zone->name = zone_names[j]; - zone->lock = SPIN_LOCK_UNLOCKED; + spin_lock_init(&zone->lock); + spin_lock_init(&zone->lru_lock); zone->zone_pgdat = pgdat; zone->free_pages = 0; zone->need_balance = 0; diff --git a/mm/rmap.c b/mm/rmap.c index cac891e978ea..727e50efca39 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -14,7 +14,7 @@ /* * Locking: * - the page->pte.chain is protected by the PG_chainlock bit, - * which nests within the pagemap_lru_lock, then the + * which nests within the zone->lru_lock, then the * mm->page_table_lock, and then the page lock. * - because swapout locking is opposite to the locking order * in the page fault path, the swapout path uses trylocks @@ -260,7 +260,7 @@ out: * table entry mapping a page. Because locking order here is opposite * to the locking order used by the page fault path, we use trylocks. * Locking: - * pagemap_lru_lock page_launder() + * zone->lru_lock page_launder() * page lock page_launder(), trylock * pte_chain_lock page_launder() * mm->page_table_lock try_to_unmap_one(), trylock @@ -328,7 +328,7 @@ out_unlock: * @page: the page to get unmapped * * Tries to remove all the page table entries which are mapping this - * page, used in the pageout path. Caller must hold pagemap_lru_lock + * page, used in the pageout path. Caller must hold zone->lru_lock * and the page lock. Return values are: * * SWAP_SUCCESS - we succeeded in removing all mappings diff --git a/mm/swap.c b/mm/swap.c index d78f8c8309e4..99902f717638 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -26,26 +26,20 @@ int page_cluster; /* - * Move an inactive page to the active list. + * FIXME: speed this up? */ -static inline void activate_page_nolock(struct page * page) +void activate_page(struct page *page) { + struct zone *zone = page_zone(page); + + spin_lock_irq(&zone->lru_lock); if (PageLRU(page) && !PageActive(page)) { - del_page_from_inactive_list(page); + del_page_from_inactive_list(zone, page); SetPageActive(page); - add_page_to_active_list(page); + add_page_to_active_list(zone, page); KERNEL_STAT_INC(pgactivate); } -} - -/* - * FIXME: speed this up? - */ -void activate_page(struct page * page) -{ - spin_lock_irq(&_pagemap_lru_lock); - activate_page_nolock(page); - spin_unlock_irq(&_pagemap_lru_lock); + spin_unlock_irq(&zone->lru_lock); } /** @@ -79,13 +73,14 @@ void lru_add_drain(void) void __page_cache_release(struct page *page) { unsigned long flags; + struct zone *zone = page_zone(page); - spin_lock_irqsave(&_pagemap_lru_lock, flags); + spin_lock_irqsave(&zone->lru_lock, flags); if (TestClearPageLRU(page)) - del_page_from_lru(page); + del_page_from_lru(zone, page); if (page_count(page) != 0) page = NULL; - spin_unlock_irqrestore(&_pagemap_lru_lock, flags); + spin_unlock_irqrestore(&zone->lru_lock, flags); if (page) __free_pages_ok(page, 0); } @@ -95,7 +90,7 @@ void __page_cache_release(struct page *page) * pagevec's pages. If it fell to zero then remove the page from the LRU and * free it. * - * Avoid taking pagemap_lru_lock if possible, but if it is taken, retain it + * Avoid taking zone->lru_lock if possible, but if it is taken, retain it * for the remainder of the operation. * * The locking in this function is against shrink_cache(): we recheck the @@ -107,28 +102,31 @@ void __page_cache_release(struct page *page) void __pagevec_release(struct pagevec *pvec) { int i; - int lock_held = 0; struct pagevec pages_to_free; + struct zone *zone = NULL; pagevec_init(&pages_to_free); for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; + struct zone *pagezone; if (PageReserved(page) || !put_page_testzero(page)) continue; - if (!lock_held) { - spin_lock_irq(&_pagemap_lru_lock); - lock_held = 1; + pagezone = page_zone(page); + if (pagezone != zone) { + if (zone) + spin_unlock_irq(&zone->lru_lock); + zone = pagezone; + spin_lock_irq(&zone->lru_lock); } - if (TestClearPageLRU(page)) - del_page_from_lru(page); + del_page_from_lru(zone, page); if (page_count(page) == 0) pagevec_add(&pages_to_free, page); } - if (lock_held) - spin_unlock_irq(&_pagemap_lru_lock); + if (zone) + spin_unlock_irq(&zone->lru_lock); pagevec_free(&pages_to_free); pagevec_init(pvec); @@ -163,26 +161,27 @@ void __pagevec_release_nonlru(struct pagevec *pvec) void pagevec_deactivate_inactive(struct pagevec *pvec) { int i; - int lock_held = 0; + struct zone *zone = NULL; if (pagevec_count(pvec) == 0) return; for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; + struct zone *pagezone = page_zone(page); - if (!lock_held) { + if (pagezone != zone) { if (PageActive(page) || !PageLRU(page)) continue; - spin_lock_irq(&_pagemap_lru_lock); - lock_held = 1; - } - if (!PageActive(page) && PageLRU(page)) { - struct zone *zone = page_zone(page); - list_move(&page->lru, &zone->inactive_list); + if (zone) + spin_unlock_irq(&zone->lru_lock); + zone = pagezone; + spin_lock_irq(&zone->lru_lock); } + if (!PageActive(page) && PageLRU(page)) + list_move(&page->lru, &pagezone->inactive_list); } - if (lock_held) - spin_unlock_irq(&_pagemap_lru_lock); + if (zone) + spin_unlock_irq(&zone->lru_lock); __pagevec_release(pvec); } @@ -193,16 +192,24 @@ void pagevec_deactivate_inactive(struct pagevec *pvec) void __pagevec_lru_add(struct pagevec *pvec) { int i; + struct zone *zone = NULL; - spin_lock_irq(&_pagemap_lru_lock); for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; + struct zone *pagezone = page_zone(page); + if (pagezone != zone) { + if (zone) + spin_unlock_irq(&zone->lru_lock); + zone = pagezone; + spin_lock_irq(&zone->lru_lock); + } if (TestSetPageLRU(page)) BUG(); - add_page_to_inactive_list(page); + add_page_to_inactive_list(zone, page); } - spin_unlock_irq(&_pagemap_lru_lock); + if (zone) + spin_unlock_irq(&zone->lru_lock); pagevec_release(pvec); } @@ -213,16 +220,24 @@ void __pagevec_lru_add(struct pagevec *pvec) void __pagevec_lru_del(struct pagevec *pvec) { int i; + struct zone *zone = NULL; - spin_lock_irq(&_pagemap_lru_lock); for (i = 0; i < pagevec_count(pvec); i++) { struct page *page = pvec->pages[i]; + struct zone *pagezone = page_zone(page); + if (pagezone != zone) { + if (zone) + spin_unlock_irq(&zone->lru_lock); + zone = pagezone; + spin_lock_irq(&zone->lru_lock); + } if (!TestClearPageLRU(page)) BUG(); - del_page_from_lru(page); + del_page_from_lru(zone, page); } - spin_unlock_irq(&_pagemap_lru_lock); + if (zone) + spin_unlock_irq(&zone->lru_lock); pagevec_release(pvec); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 3e6c8b734c72..41712a97b079 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -263,7 +263,7 @@ keep: } /* - * pagemap_lru_lock is heavily contented. We relieve it by quickly privatising + * zone->lru_lock is heavily contented. We relieve it by quickly privatising * a batch of pages and working on them outside the lock. Any pages which were * not freed will be added back to the LRU. * @@ -291,7 +291,7 @@ shrink_cache(int nr_pages, struct zone *zone, pagevec_init(&pvec); lru_add_drain(); - spin_lock_irq(&_pagemap_lru_lock); + spin_lock_irq(&zone->lru_lock); while (max_scan > 0 && nr_pages > 0) { struct page *page; int n = 0; @@ -317,7 +317,7 @@ shrink_cache(int nr_pages, struct zone *zone, n++; } zone->nr_inactive -= n; - spin_unlock_irq(&_pagemap_lru_lock); + spin_unlock_irq(&zone->lru_lock); if (list_empty(&page_list)) goto done; @@ -330,7 +330,7 @@ shrink_cache(int nr_pages, struct zone *zone, if (nr_pages <= 0 && list_empty(&page_list)) goto done; - spin_lock_irq(&_pagemap_lru_lock); + spin_lock_irq(&zone->lru_lock); /* * Put back any unfreeable pages. */ @@ -344,13 +344,13 @@ shrink_cache(int nr_pages, struct zone *zone, else add_page_to_inactive_list(zone, page); if (!pagevec_add(&pvec, page)) { - spin_unlock_irq(&_pagemap_lru_lock); + spin_unlock_irq(&zone->lru_lock); __pagevec_release(&pvec); - spin_lock_irq(&_pagemap_lru_lock); + spin_lock_irq(&zone->lru_lock); } } } - spin_unlock_irq(&_pagemap_lru_lock); + spin_unlock_irq(&zone->lru_lock); done: pagevec_release(&pvec); return nr_pages; @@ -363,9 +363,9 @@ done: * processes, from rmap. * * If the pages are mostly unmapped, the processing is fast and it is - * appropriate to hold pagemap_lru_lock across the whole operation. But if + * appropriate to hold zone->lru_lock across the whole operation. But if * the pages are mapped, the processing is slow (page_referenced()) so we - * should drop pagemap_lru_lock around each page. It's impossible to balance + * should drop zone->lru_lock around each page. It's impossible to balance * this, so instead we remove the pages from the LRU while processing them. * It is safe to rely on PG_active against the non-LRU pages in here because * nobody will play with that bit on a non-LRU page. @@ -385,7 +385,7 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in) struct pagevec pvec; lru_add_drain(); - spin_lock_irq(&_pagemap_lru_lock); + spin_lock_irq(&zone->lru_lock); while (nr_pages && !list_empty(&zone->active_list)) { page = list_entry(zone->active_list.prev, struct page, lru); prefetchw_prev_lru_page(page, &zone->active_list, flags); @@ -402,7 +402,7 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in) list_add(&page->lru, &l_hold); nr_pages--; } - spin_unlock_irq(&_pagemap_lru_lock); + spin_unlock_irq(&zone->lru_lock); while (!list_empty(&l_hold)) { page = list_entry(l_hold.prev, struct page, lru); @@ -421,7 +421,7 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in) } pagevec_init(&pvec); - spin_lock_irq(&_pagemap_lru_lock); + spin_lock_irq(&zone->lru_lock); while (!list_empty(&l_inactive)) { page = list_entry(l_inactive.prev, struct page, lru); prefetchw_prev_lru_page(page, &l_inactive, flags); @@ -431,9 +431,9 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in) BUG(); list_move(&page->lru, &zone->inactive_list); if (!pagevec_add(&pvec, page)) { - spin_unlock_irq(&_pagemap_lru_lock); + spin_unlock_irq(&zone->lru_lock); __pagevec_release(&pvec); - spin_lock_irq(&_pagemap_lru_lock); + spin_lock_irq(&zone->lru_lock); } } while (!list_empty(&l_active)) { @@ -444,14 +444,14 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in) BUG_ON(!PageActive(page)); list_move(&page->lru, &zone->active_list); if (!pagevec_add(&pvec, page)) { - spin_unlock_irq(&_pagemap_lru_lock); + spin_unlock_irq(&zone->lru_lock); __pagevec_release(&pvec); - spin_lock_irq(&_pagemap_lru_lock); + spin_lock_irq(&zone->lru_lock); } } zone->nr_active -= pgdeactivate; zone->nr_inactive += pgdeactivate; - spin_unlock_irq(&_pagemap_lru_lock); + spin_unlock_irq(&zone->lru_lock); pagevec_release(&pvec); KERNEL_STAT_ADD(pgscan, nr_pages_in - nr_pages); |
