diff options
| author | Martin J. Bligh <mbligh@aracnet.com> | 2004-06-26 21:00:45 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2004-06-26 21:00:45 -0700 |
| commit | 9278aa3910c86374cf7bd2e0cb1dc26838dd1f55 (patch) | |
| tree | 5003594d94e615058f035007d003ffd4e7c56beb | |
| parent | 4736ba03c29ab2e7764e1aed9858de823f69d2ad (diff) | |
[PATCH] fix NUMA boundaray between ZONE_NORMAL and HIGHMEM
From: Andy Whitcroft <apw@shadowen.org>
This patch eliminates the false hole which can form between ZONE_NORMAL and
ZONE_HIGHMEM. This is most easily seen when 4g/4g split is enabled, but
it's always broken, and we just happen not to hit it most of the time.
Basically, the patch changes the allocation of the numa remaps regions (the
source of the holes) such that they officially fall within VMALLOC space,
where they belong.
Tested in -mjb for a couple of months, and again against 2.6.7-mm1.
Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Martin J. Bligh <mbligh@aracnet.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
| -rw-r--r-- | arch/i386/mm/discontig.c | 18 | ||||
| -rw-r--r-- | include/asm-i386/pgtable.h | 4 | ||||
| -rw-r--r-- | include/linux/mm.h | 1 | ||||
| -rw-r--r-- | mm/memory.c | 9 |
4 files changed, 26 insertions, 6 deletions
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index ad04cfd73dc9..99ff78c1093b 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -236,6 +236,13 @@ unsigned long __init setup_memory(void) unsigned long bootmap_size, system_start_pfn, system_max_low_pfn; unsigned long reserve_pages; + /* + * When mapping a NUMA machine we allocate the node_mem_map arrays + * from node local memory. They are then mapped directly into KVA + * between zone normal and vmalloc space. Calculate the size of + * this space and use it to adjust the boundry between ZONE_NORMAL + * and ZONE_HIGHMEM. + */ get_memcfg_numa(); reserve_pages = calculate_numa_remap_pages(); @@ -243,7 +250,10 @@ unsigned long __init setup_memory(void) system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end); find_max_pfn(); - system_max_low_pfn = max_low_pfn = find_max_low_pfn(); + system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages; + printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n", + reserve_pages, max_low_pfn + reserve_pages); + printk("max_pfn = %ld\n", max_pfn); #ifdef CONFIG_HIGHMEM highstart_pfn = highend_pfn = max_pfn; if (max_pfn > system_max_low_pfn) @@ -251,7 +261,6 @@ unsigned long __init setup_memory(void) printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", pages_to_mb(highend_pfn - highstart_pfn)); #endif - system_max_low_pfn = max_low_pfn = max_low_pfn - reserve_pages; printk(KERN_NOTICE "%ldMB LOWMEM available.\n", pages_to_mb(system_max_low_pfn)); printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n", @@ -261,15 +270,16 @@ unsigned long __init setup_memory(void) (ulong) pfn_to_kaddr(max_low_pfn)); for (nid = 0; nid < numnodes; nid++) { node_remap_start_vaddr[nid] = pfn_to_kaddr( - highstart_pfn - node_remap_offset[nid]); + (highstart_pfn + reserve_pages) - node_remap_offset[nid]); allocate_pgdat(nid); printk ("node %d will remap to vaddr %08lx - %08lx\n", nid, (ulong) node_remap_start_vaddr[nid], - (ulong) pfn_to_kaddr(highstart_pfn + (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages - node_remap_offset[nid] + node_remap_size[nid])); } printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); + vmalloc_earlyreserve = reserve_pages * PAGE_SIZE; for (nid = 0; nid < numnodes; nid++) find_max_pfn_node(nid); diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 135bd3093c32..b95fedb5b32d 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -83,8 +83,8 @@ void paging_init(void); * area for the same reason. ;) */ #define VMALLOC_OFFSET (8*1024*1024) -#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \ - ~(VMALLOC_OFFSET-1)) +#define VMALLOC_START (((unsigned long) high_memory + vmalloc_earlyreserve + \ + 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1)) #ifdef CONFIG_HIGHMEM # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) #else diff --git a/include/linux/mm.h b/include/linux/mm.h index 8f8a8a3a31b7..0205d4cd35eb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -23,6 +23,7 @@ extern unsigned long max_mapnr; extern unsigned long num_physpages; extern void * high_memory; +extern unsigned long vmalloc_earlyreserve; extern int page_cluster; #include <asm/page.h> diff --git a/mm/memory.c b/mm/memory.c index a91e3837fcaf..e1fafbb4deed 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -66,12 +66,21 @@ EXPORT_SYMBOL(mem_map); #endif unsigned long num_physpages; +/* + * A number of key systems in x86 including ioremap() rely on the assumption + * that high_memory defines the upper bound on direct map memory, then end + * of ZONE_NORMAL. Under CONFIG_DISCONTIG this means that max_low_pfn and + * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL + * and ZONE_HIGHMEM. + */ void * high_memory; struct page *highmem_start_page; +unsigned long vmalloc_earlyreserve; EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(high_memory); +EXPORT_SYMBOL(vmalloc_earlyreserve); /* * We special-case the C-O-W ZERO_PAGE, because it's such |
