summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartin J. Bligh <mbligh@aracnet.com>2004-06-26 21:00:45 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-06-26 21:00:45 -0700
commit9278aa3910c86374cf7bd2e0cb1dc26838dd1f55 (patch)
tree5003594d94e615058f035007d003ffd4e7c56beb
parent4736ba03c29ab2e7764e1aed9858de823f69d2ad (diff)
[PATCH] fix NUMA boundaray between ZONE_NORMAL and HIGHMEM
From: Andy Whitcroft <apw@shadowen.org> This patch eliminates the false hole which can form between ZONE_NORMAL and ZONE_HIGHMEM. This is most easily seen when 4g/4g split is enabled, but it's always broken, and we just happen not to hit it most of the time. Basically, the patch changes the allocation of the numa remaps regions (the source of the holes) such that they officially fall within VMALLOC space, where they belong. Tested in -mjb for a couple of months, and again against 2.6.7-mm1. Signed-off-by: Andy Whitcroft <apw@shadowen.org> Signed-off-by: Martin J. Bligh <mbligh@aracnet.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/i386/mm/discontig.c18
-rw-r--r--include/asm-i386/pgtable.h4
-rw-r--r--include/linux/mm.h1
-rw-r--r--mm/memory.c9
4 files changed, 26 insertions, 6 deletions
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c
index ad04cfd73dc9..99ff78c1093b 100644
--- a/arch/i386/mm/discontig.c
+++ b/arch/i386/mm/discontig.c
@@ -236,6 +236,13 @@ unsigned long __init setup_memory(void)
unsigned long bootmap_size, system_start_pfn, system_max_low_pfn;
unsigned long reserve_pages;
+ /*
+ * When mapping a NUMA machine we allocate the node_mem_map arrays
+ * from node local memory. They are then mapped directly into KVA
+ * between zone normal and vmalloc space. Calculate the size of
+ * this space and use it to adjust the boundry between ZONE_NORMAL
+ * and ZONE_HIGHMEM.
+ */
get_memcfg_numa();
reserve_pages = calculate_numa_remap_pages();
@@ -243,7 +250,10 @@ unsigned long __init setup_memory(void)
system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
find_max_pfn();
- system_max_low_pfn = max_low_pfn = find_max_low_pfn();
+ system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages;
+ printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n",
+ reserve_pages, max_low_pfn + reserve_pages);
+ printk("max_pfn = %ld\n", max_pfn);
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
if (max_pfn > system_max_low_pfn)
@@ -251,7 +261,6 @@ unsigned long __init setup_memory(void)
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
#endif
- system_max_low_pfn = max_low_pfn = max_low_pfn - reserve_pages;
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
pages_to_mb(system_max_low_pfn));
printk("min_low_pfn = %ld, max_low_pfn = %ld, highstart_pfn = %ld\n",
@@ -261,15 +270,16 @@ unsigned long __init setup_memory(void)
(ulong) pfn_to_kaddr(max_low_pfn));
for (nid = 0; nid < numnodes; nid++) {
node_remap_start_vaddr[nid] = pfn_to_kaddr(
- highstart_pfn - node_remap_offset[nid]);
+ (highstart_pfn + reserve_pages) - node_remap_offset[nid]);
allocate_pgdat(nid);
printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
(ulong) node_remap_start_vaddr[nid],
- (ulong) pfn_to_kaddr(highstart_pfn
+ (ulong) pfn_to_kaddr(highstart_pfn + reserve_pages
- node_remap_offset[nid] + node_remap_size[nid]));
}
printk("High memory starts at vaddr %08lx\n",
(ulong) pfn_to_kaddr(highstart_pfn));
+ vmalloc_earlyreserve = reserve_pages * PAGE_SIZE;
for (nid = 0; nid < numnodes; nid++)
find_max_pfn_node(nid);
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 135bd3093c32..b95fedb5b32d 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -83,8 +83,8 @@ void paging_init(void);
* area for the same reason. ;)
*/
#define VMALLOC_OFFSET (8*1024*1024)
-#define VMALLOC_START (((unsigned long) high_memory + 2*VMALLOC_OFFSET-1) & \
- ~(VMALLOC_OFFSET-1))
+#define VMALLOC_START (((unsigned long) high_memory + vmalloc_earlyreserve + \
+ 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1))
#ifdef CONFIG_HIGHMEM
# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)
#else
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8f8a8a3a31b7..0205d4cd35eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -23,6 +23,7 @@ extern unsigned long max_mapnr;
extern unsigned long num_physpages;
extern void * high_memory;
+extern unsigned long vmalloc_earlyreserve;
extern int page_cluster;
#include <asm/page.h>
diff --git a/mm/memory.c b/mm/memory.c
index a91e3837fcaf..e1fafbb4deed 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -66,12 +66,21 @@ EXPORT_SYMBOL(mem_map);
#endif
unsigned long num_physpages;
+/*
+ * A number of key systems in x86 including ioremap() rely on the assumption
+ * that high_memory defines the upper bound on direct map memory, then end
+ * of ZONE_NORMAL. Under CONFIG_DISCONTIG this means that max_low_pfn and
+ * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL
+ * and ZONE_HIGHMEM.
+ */
void * high_memory;
struct page *highmem_start_page;
+unsigned long vmalloc_earlyreserve;
EXPORT_SYMBOL(num_physpages);
EXPORT_SYMBOL(highmem_start_page);
EXPORT_SYMBOL(high_memory);
+EXPORT_SYMBOL(vmalloc_earlyreserve);
/*
* We special-case the C-O-W ZERO_PAGE, because it's such