[PATCH] Use numa policy API for boot time policy

Suggested by Manfred Spraul. __get_free_pages had a hack to do node interleaving allocation at boot time. This patch sets an interleave process policy using the NUMA API for init and the idle threads instead. Before entering the user space init the policy is reset to default again. Result is the same. Advantage is less code and removing of a check from a fast path. Removes more code than it adds. I verified that the memory distribution after boot is roughly the same. Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
author: Andi Kleen <ak@suse.de> 2004-06-17 18:06:43 -0700
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2004-06-17 18:06:43 -0700
commit: 3bdafaa17a0e56f117a8efa088ef8e3c749c07e0 (patch)
tree: c0c55235dba9edefa2c6431d25444ceae7559370
parent: e7f32b95b106d6e127a16a64874288424a45e709 (diff)
4 files changed, 30 insertions, 44 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 2aeecaf7145b..775c415908ae 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -153,6 +153,9 @@ void mpol_free_shared_policy(struct shared_policy *p);
 struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 					    unsigned long idx);
 
+extern void numa_default_policy(void);
+extern void numa_policy_init(void);
+
 #else
 
 struct mempolicy {};
@@ -215,6 +218,14 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
 #define vma_policy(vma) NULL
 #define vma_set_policy(vma, pol) do {} while(0)
 
+static inline void numa_policy_init(void)
+{
+}
+
+static inline void numa_default_policy(void)
+{
+}
+
 #endif /* CONFIG_NUMA */
 #endif /* __KERNEL__ */
 
diff --git a/init/main.c b/init/main.c
index 613aaaba89f8..3f9e9e74ab00 100644
--- a/init/main.c
+++ b/init/main.c
@@ -43,6 +43,7 @@
 #include <linux/efi.h>
 #include <linux/unistd.h>
 #include <linux/rmap.h>
+#include <linux/mempolicy.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -385,6 +386,7 @@ static void __init smp_init(void)
 static void noinline rest_init(void)
 {
 	kernel_thread(init, NULL, CLONE_FS | CLONE_SIGHAND);
+	numa_default_policy();
 	unlock_kernel();
  	cpu_idle();
 } 
@@ -456,6 +458,7 @@ asmlinkage void __init start_kernel(void)
 #endif
 	mem_init();
 	kmem_cache_init();
+	numa_policy_init();
 	if (late_time_init)
 		late_time_init();
 	calibrate_delay();
@@ -645,6 +648,7 @@ static int init(void * unused)
 	free_initmem();
 	unlock_kernel();
 	system_state = SYSTEM_RUNNING;
+	numa_default_policy();
 
 	if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
 		printk("Warning: unable to open an initial console.\n");
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 1b11685f0391..ad260e0dbb59 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1002,7 +1002,8 @@ void mpol_free_shared_policy(struct shared_policy *p)
 	up(&p->sem);
 }
 
-static __init int numa_policy_init(void)
+/* assumes fs == KERNEL_DS */
+void __init numa_policy_init(void)
 {
 	policy_cache = kmem_cache_create("numa_policy",
 					 sizeof(struct mempolicy),
@@ -1011,6 +1012,17 @@ static __init int numa_policy_init(void)
 	sn_cache = kmem_cache_create("shared_policy_node",
 				     sizeof(struct sp_node),
 				     0, SLAB_PANIC, NULL, NULL);
-	return 0;
+
+	/* Set interleaving policy for system init. This way not all
+	   the data structures allocated at system boot end up in node zero. */
+
+	if (sys_set_mempolicy(MPOL_INTERLEAVE, node_online_map, MAX_NUMNODES) < 0)
+		printk("numa_policy_init: interleaving failed\n");
+}
+
+/* Reset policy of current process to default.
+ * Assumes fs == KERNEL_DS */
+void __init numa_default_policy(void)
+{
+	sys_set_mempolicy(MPOL_DEFAULT, NULL, 0);
 }
-module_init(numa_policy_init);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3dbbeb2f3293..444bb534dbd8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -732,53 +732,12 @@ got_pg:
 
 EXPORT_SYMBOL(__alloc_pages);
 
-#ifdef CONFIG_NUMA
-/* Early boot: Everything is done by one cpu, but the data structures will be
- * used by all cpus - spread them on all nodes.
- */
-static __init unsigned long get_boot_pages(unsigned int gfp_mask, unsigned int order)
-{
-static int nodenr;
-	int i = nodenr;
-	struct page *page;
-
-	for (;;) {
-		if (i > nodenr + numnodes)
-			return 0;
-		if (node_present_pages(i%numnodes)) {
-			struct zone **z;
-			/* The node contains memory. Check that there is
-			 * memory in the intended zonelist.
-			 */
-			z = NODE_DATA(i%numnodes)->node_zonelists[gfp_mask & GFP_ZONEMASK].zones;
-			while (*z) {
-				if ( (*z)->free_pages > (1UL<<order))
-					goto found_node;
-				z++;
-			}
-		}
-		i++;
-	}
-found_node:
-	nodenr = i+1;
-	page = alloc_pages_node(i%numnodes, gfp_mask, order);
-	if (!page)
-		return 0;
-	return (unsigned long) page_address(page);
-}
-#endif
-
 /*
  * Common helper functions.
  */
 fastcall unsigned long __get_free_pages(unsigned int gfp_mask, unsigned int order)
 {
 	struct page * page;
-
-#ifdef CONFIG_NUMA
-	if (unlikely(system_state == SYSTEM_BOOTING))
-		return get_boot_pages(gfp_mask, order);
-#endif
 	page = alloc_pages(gfp_mask, order);
 	if (!page)
 		return 0;
author	Andi Kleen <ak@suse.de>	2004-06-17 18:06:43 -0700
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2004-06-17 18:06:43 -0700
commit	3bdafaa17a0e56f117a8efa088ef8e3c749c07e0 (patch)
tree	c0c55235dba9edefa2c6431d25444ceae7559370
parent	e7f32b95b106d6e127a16a64874288424a45e709 (diff)