diff options
| author | Andrew Morton <akpm@digeo.com> | 2002-09-29 02:20:34 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@home.transmeta.com> | 2002-09-29 02:20:34 -0700 |
| commit | db7b0c9fcd4e93c07f16a0bfd449b49210fa523d (patch) | |
| tree | 8667176c948733d10759b854610edcafe3dee5f2 | |
| parent | 1f76929196ad4a0877d0b411deedb75767a3564e (diff) | |
[PATCH] per-node kswapd instances
Patch from David Hansen.
Start one kswapd instance for each NUMA node. That kswapd instance
only works against the pages which are local to that node.
We need to bind that kswapd to that node's CPU set, but the
infrastructure for this is not yet in place.
| -rw-r--r-- | include/linux/mmzone.h | 1 | ||||
| -rw-r--r-- | include/linux/swap.h | 1 | ||||
| -rw-r--r-- | mm/page_alloc.c | 10 | ||||
| -rw-r--r-- | mm/vmscan.c | 52 |
4 files changed, 21 insertions, 43 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0b14f37ca016..fc439380cb0e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -168,6 +168,7 @@ typedef struct pglist_data { unsigned long node_size; int node_id; struct pglist_data *pgdat_next; + wait_queue_head_t kswapd_wait; } pg_data_t; extern int numnodes; diff --git a/include/linux/swap.h b/include/linux/swap.h index 8844b1408788..f4acbd1e9b46 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -162,7 +162,6 @@ extern void FASTCALL(activate_page(struct page *)); extern void swap_setup(void); /* linux/mm/vmscan.c */ -extern wait_queue_head_t kswapd_wait; extern int try_to_free_pages(struct zone *, unsigned int, unsigned int); /* linux/mm/page_io.c */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1443104b6973..86b534cef5f8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -348,8 +348,12 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, classzone->need_balance = 1; mb(); /* we're somewhat low on memory, failed to find what we needed */ - if (waitqueue_active(&kswapd_wait)) - wake_up_interruptible(&kswapd_wait); + for (i = 0; zones[i] != NULL; i++) { + struct zone *z = zones[i]; + if (z->free_pages <= z->pages_low && + waitqueue_active(&z->zone_pgdat->kswapd_wait)) + wake_up_interruptible(&z->zone_pgdat->kswapd_wait); + } /* Go through the zonelist again, taking __GFP_HIGH into account */ min = 1UL << order; @@ -836,6 +840,8 @@ void __init free_area_init_core(pg_data_t *pgdat, unsigned long zone_start_pfn = pgdat->node_start_pfn; pgdat->nr_zones = 0; + init_waitqueue_head(&pgdat->kswapd_wait); + local_offset = 0; /* offset within lmem_map */ for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; diff --git a/mm/vmscan.c b/mm/vmscan.c index 4302f698a7a4..9bec21dc9f0c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -667,8 +667,6 @@ try_to_free_pages(struct zone *classzone, return 0; } -DECLARE_WAIT_QUEUE_HEAD(kswapd_wait); - static int check_classzone_need_balance(struct zone *classzone) { struct zone *first_classzone; @@ -707,20 +705,6 @@ static int kswapd_balance_pgdat(pg_data_t * pgdat) return need_more_balance; } -static void kswapd_balance(void) -{ - int need_more_balance; - pg_data_t * pgdat; - - do { - need_more_balance = 0; - pgdat = pgdat_list; - do - need_more_balance |= kswapd_balance_pgdat(pgdat); - while ((pgdat = pgdat->pgdat_next)); - } while (need_more_balance); -} - static int kswapd_can_sleep_pgdat(pg_data_t * pgdat) { struct zone *zone; @@ -728,28 +712,13 @@ static int kswapd_can_sleep_pgdat(pg_data_t * pgdat) for (i = pgdat->nr_zones-1; i >= 0; i--) { zone = pgdat->node_zones + i; - if (!zone->need_balance) - continue; - return 0; + if (zone->need_balance) + return 0; } return 1; } -static int kswapd_can_sleep(void) -{ - pg_data_t * pgdat; - - pgdat = pgdat_list; - do { - if (kswapd_can_sleep_pgdat(pgdat)) - continue; - return 0; - } while ((pgdat = pgdat->pgdat_next)); - - return 1; -} - /* * The background pageout daemon, started as a kernel thread * from the init process. @@ -763,13 +732,14 @@ static int kswapd_can_sleep(void) * If there are applications that are active memory-allocators * (most normal use), this basically shouldn't matter. */ -int kswapd(void *unused) +int kswapd(void *p) { + pg_data_t *pgdat = (pg_data_t*)p; struct task_struct *tsk = current; DECLARE_WAITQUEUE(wait, tsk); daemonize(); - strcpy(tsk->comm, "kswapd"); + sprintf(tsk->comm, "kswapd%d", pgdat->node_id); sigfillset(&tsk->blocked); /* @@ -793,30 +763,32 @@ int kswapd(void *unused) if (current->flags & PF_FREEZE) refrigerator(PF_IOTHREAD); __set_current_state(TASK_INTERRUPTIBLE); - add_wait_queue(&kswapd_wait, &wait); + add_wait_queue(&pgdat->kswapd_wait, &wait); mb(); - if (kswapd_can_sleep()) + if (kswapd_can_sleep_pgdat(pgdat)) schedule(); __set_current_state(TASK_RUNNING); - remove_wait_queue(&kswapd_wait, &wait); + remove_wait_queue(&pgdat->kswapd_wait, &wait); /* * If we actually get into a low-memory situation, * the processes needing more memory will wake us * up on a more timely basis. */ - kswapd_balance(); + kswapd_balance_pgdat(pgdat); blk_run_queues(); } } static int __init kswapd_init(void) { + pg_data_t *pgdat; printk("Starting kswapd\n"); swap_setup(); - kernel_thread(kswapd, NULL, CLONE_KERNEL); + for_each_pgdat(pgdat) + kernel_thread(kswapd, pgdat, CLONE_KERNEL); return 0; } |
