diff options
| author | Andrew Morton <akpm@osdl.org> | 2004-05-09 23:24:38 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2004-05-09 23:24:38 -0700 |
| commit | e18e19ade9a0c0334cf8a2bc1945d97ec1697061 (patch) | |
| tree | 57afc4fa73702ed5d66fb8197f8668e65de28e8a | |
| parent | 7a1dc0ea9e84be7175d007b19ae9d8caab13c2e5 (diff) | |
[PATCH] sched: implement domains for i386 HT
From: Nick Piggin <piggin@cyberone.com.au>
The following patch builds a scheduling description for the i386
architecture using cpu_sibling_map to set up SMT if CONFIG_SCHED_SMT is
set.
It could be made more fancy and collapse degenerate domains at runtime (ie.
1 sibling per CPU, or 1 NUMA node in the computer).
From: Zwane Mwaikambo <zwane@arm.linux.org.uk>
This fixes an oops due to cpu_sibling_map being uninitialised when a
system with no MP table (most UP boxen) boots a CONFIG_SMT kernel. What
also happens is that the cpu_group lists end up not being terminated
properly, but this oops kills it first. Patch tested on UP w/o MP table,
2x P2 and UP Xeon w/ no siblings.
From: "Martin J. Bligh" <mbligh@aracnet.com>,
Nick Piggin <piggin@cyberone.com.au>
Change arch_init_sched_domains to use cpu_online_map
From: Anton Blanchard <anton@samba.org>
Fix build with NR_CPUS > BITS_PER_LONG
| -rw-r--r-- | arch/i386/Kconfig | 10 | ||||
| -rw-r--r-- | arch/i386/kernel/smpboot.c | 206 | ||||
| -rw-r--r-- | include/asm-i386/processor.h | 5 | ||||
| -rw-r--r-- | include/linux/sched.h | 16 | ||||
| -rw-r--r-- | kernel/sched.c | 35 |
5 files changed, 246 insertions, 26 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 954887332846..b73a04af6a20 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -479,6 +479,16 @@ config NR_CPUS This is purely to save memory - each supported CPU adds approximately eight kilobytes to the kernel image. +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on SMP + default off + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with Intel Pentium 4 chips with HyperThreading at a + cost of slightly increased overhead in some places. If unsure say + N here. + config PREEMPT bool "Preemptible Kernel" help diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 77d4309d6c6e..281f6d443b6c 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -39,6 +39,7 @@ #include <linux/kernel.h> #include <linux/mm.h> +#include <linux/sched.h> #include <linux/kernel_stat.h> #include <linux/smp_lock.h> #include <linux/irq.h> @@ -955,6 +956,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) current_thread_info()->cpu = 0; smp_tune_scheduling(); + cpus_clear(cpu_sibling_map[0]); + cpu_set(0, cpu_sibling_map[0]); /* * If we couldn't find an SMP configuration at boot time, @@ -1085,7 +1088,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) * efficiently. */ for (cpu = 0; cpu < NR_CPUS; cpu++) - cpu_sibling_map[cpu] = CPU_MASK_NONE; + cpus_clear(cpu_sibling_map[cpu]); for (cpu = 0; cpu < NR_CPUS; cpu++) { int siblings = 0; @@ -1122,6 +1125,207 @@ static void __init smp_boot_cpus(unsigned int max_cpus) synchronize_tsc_bp(); } +#ifdef CONFIG_SCHED_SMT +#ifdef CONFIG_NUMA +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static struct sched_group sched_group_nodes[MAX_NUMNODES]; +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static DEFINE_PER_CPU(struct sched_domain, node_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + /* Set up domains */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + int node = cpu_to_node(i); + cpumask_t nodemask = node_to_cpumask(node); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = nodemask; + phys_domain->flags |= SD_FLAG_IDLE; + + *node_domain = SD_NODE_INIT; + node_domain->span = cpu_online_map; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpu->cpumask = CPU_MASK_NONE; + cpu_set(j, cpu->cpumask); + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + for (i = 0; i < MAX_NUMNODES; i++) { + int j; + cpumask_t nodemask; + cpus_and(nodemask, node_to_cpumask(i), cpu_online_map); + + if (cpus_empty(nodemask)) + continue; + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu_mask(j, nodemask) { + struct sched_domain *cpu_domain = cpu_sched_domain(j); + struct sched_group *cpu = &sched_group_phys[j]; + + if (j != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + /* Set up nodes */ + first_cpu = last_cpu = NULL; + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *cpu = &sched_group_nodes[i]; + cpumask_t nodemask; + cpus_and(nodemask, node_to_cpumask(i), cpu_online_map); + + if (cpus_empty(nodemask)) + continue; + + cpu->cpumask = nodemask; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + + mb(); + for_each_cpu_mask(i, cpu_online_map) { + int node = cpu_to_node(i); + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_domain *node_domain = &per_cpu(node_domains, i); + struct sched_group *cpu_group = &sched_group_cpus[i]; + struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; + struct sched_group *node_group = &sched_group_nodes[node]; + + cpu_domain->parent = phys_domain; + phys_domain->parent = node_domain; + + node_domain->groups = node_group; + phys_domain->groups = phys_group; + cpu_domain->groups = cpu_group; + } +} +#else /* CONFIG_NUMA */ +static struct sched_group sched_group_cpus[NR_CPUS]; +static struct sched_group sched_group_phys[NR_CPUS]; +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +__init void arch_init_sched_domains(void) +{ + int i; + struct sched_group *first_cpu = NULL, *last_cpu = NULL; + + /* Set up domains */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + + *cpu_domain = SD_SIBLING_INIT; + cpu_domain->span = cpu_sibling_map[i]; + + *phys_domain = SD_CPU_INIT; + phys_domain->span = cpu_online_map; + phys_domain->flags |= SD_FLAG_IDLE; + } + + /* Set up CPU (sibling) groups */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + int j; + first_cpu = last_cpu = NULL; + + if (i != first_cpu(cpu_domain->span)) + continue; + + for_each_cpu_mask(j, cpu_domain->span) { + struct sched_group *cpu = &sched_group_cpus[j]; + + cpus_clear(cpu->cpumask); + cpu_set(j, cpu->cpumask); + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + } + + first_cpu = last_cpu = NULL; + /* Set up physical groups */ + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_group *cpu = &sched_group_phys[i]; + + if (i != first_cpu(cpu_domain->span)) + continue; + + cpu->cpumask = cpu_domain->span; + + if (!first_cpu) + first_cpu = cpu; + if (last_cpu) + last_cpu->next = cpu; + last_cpu = cpu; + } + last_cpu->next = first_cpu; + + mb(); + for_each_cpu_mask(i, cpu_online_map) { + struct sched_domain *cpu_domain = cpu_sched_domain(i); + struct sched_domain *phys_domain = &per_cpu(phys_domains, i); + struct sched_group *cpu_group = &sched_group_cpus[i]; + struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)]; + cpu_domain->parent = phys_domain; + phys_domain->groups = phys_group; + cpu_domain->groups = cpu_group; + } +} +#endif /* CONFIG_NUMA */ +#endif /* CONFIG_SCHED_SMT */ + /* These are wrappers to interface to the new boot process. Someone who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ void __init smp_prepare_cpus(unsigned int max_cpus) diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index e114f9fe9cbe..d8ecd6046b1a 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -648,4 +648,9 @@ extern inline void prefetchw(const void *x) extern void select_idle_routine(const struct cpuinfo_x86 *c); +#ifdef CONFIG_SCHED_SMT +#define ARCH_HAS_SCHED_DOMAIN +#define ARCH_HAS_SCHED_WAKE_BALANCE +#endif + #endif /* __ASM_I386_PROCESSOR_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 8cb322f0839d..fa0b49eb1578 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -572,6 +572,22 @@ struct sched_domain { unsigned int nr_balance_failed; /* initialise to 0 */ }; +/* Common values for SMT siblings */ +#define SD_SIBLING_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 1, \ + .max_interval = 2, \ + .busy_factor = 8, \ + .imbalance_pct = 110, \ + .cache_hot_time = 0, \ + .cache_nice_tries = 0, \ + .flags = SD_FLAG_FASTMIGRATE | SD_FLAG_NEWIDLE | SD_FLAG_WAKE,\ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} + /* Common values for CPUs */ #define SD_CPU_INIT (struct sched_domain) { \ .span = CPU_MASK_NONE, \ diff --git a/kernel/sched.c b/kernel/sched.c index 4b9db0bff5bf..7293c40707ec 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3295,28 +3295,20 @@ DEFINE_PER_CPU(struct sched_domain, node_domains); static void __init arch_init_sched_domains(void) { int i; - cpumask_t all_cpus = CPU_MASK_NONE; struct sched_group *first_node = NULL, *last_node = NULL; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; - - cpu_set(i, all_cpus); - } - /* Set up domains */ - for_each_cpu_mask(i, all_cpus) { + for_each_cpu_mask(i, cpu_online_map) { int node = cpu_to_node(i); cpumask_t nodemask = node_to_cpumask(node); struct sched_domain *node_domain = &per_cpu(node_domains, i); struct sched_domain *cpu_domain = cpu_sched_domain(i); *node_domain = SD_NODE_INIT; - node_domain->span = all_cpus; + node_domain->span = cpu_online_map; *cpu_domain = SD_CPU_INIT; - cpus_and(cpu_domain->span, nodemask, all_cpus); + cpus_and(cpu_domain->span, nodemask, cpu_online_map); cpu_domain->parent = node_domain; } @@ -3326,8 +3318,9 @@ static void __init arch_init_sched_domains(void) int j; cpumask_t nodemask; struct sched_group *node = &sched_group_nodes[i]; + cpumask_t tmp = node_to_cpumask(i); - cpus_and(nodemask, node_to_cpumask(i), all_cpus); + cpus_and(nodemask, tmp, cpu_online_map); if (cpus_empty(nodemask)) continue; @@ -3357,7 +3350,7 @@ static void __init arch_init_sched_domains(void) last_node->next = first_node; mb(); - for_each_cpu_mask(i, all_cpus) { + for_each_cpu_mask(i, cpu_online_map) { struct sched_domain *node_domain = &per_cpu(node_domains, i); struct sched_domain *cpu_domain = cpu_sched_domain(i); node_domain->groups = &sched_group_nodes[cpu_to_node(i)]; @@ -3369,26 +3362,18 @@ static void __init arch_init_sched_domains(void) static void __init arch_init_sched_domains(void) { int i; - cpumask_t all_cpus = CPU_MASK_NONE; struct sched_group *first_cpu = NULL, *last_cpu = NULL; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_possible(i)) - continue; - - cpu_set(i, all_cpus); - } - /* Set up domains */ - for_each_cpu_mask(i, all_cpus) { + for_each_cpu_mask(i, cpu_online_map) { struct sched_domain *cpu_domain = cpu_sched_domain(i); *cpu_domain = SD_CPU_INIT; - cpu_domain->span = all_cpus; + cpu_domain->span = cpu_online_map; } /* Set up CPU groups */ - for_each_cpu_mask(i, all_cpus) { + for_each_cpu_mask(i, cpu_online_map) { struct sched_group *cpu = &sched_group_cpus[i]; cpus_clear(cpu->cpumask); @@ -3403,7 +3388,7 @@ static void __init arch_init_sched_domains(void) last_cpu->next = first_cpu; mb(); - for_each_cpu_mask(i, all_cpus) { + for_each_cpu_mask(i, cpu_online_map) { struct sched_domain *cpu_domain = cpu_sched_domain(i); cpu_domain->groups = &sched_group_cpus[i]; } |
