summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Morton <akpm@osdl.org>2004-05-09 23:24:38 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-05-09 23:24:38 -0700
commite18e19ade9a0c0334cf8a2bc1945d97ec1697061 (patch)
tree57afc4fa73702ed5d66fb8197f8668e65de28e8a
parent7a1dc0ea9e84be7175d007b19ae9d8caab13c2e5 (diff)
[PATCH] sched: implement domains for i386 HT
From: Nick Piggin <piggin@cyberone.com.au> The following patch builds a scheduling description for the i386 architecture using cpu_sibling_map to set up SMT if CONFIG_SCHED_SMT is set. It could be made more fancy and collapse degenerate domains at runtime (ie. 1 sibling per CPU, or 1 NUMA node in the computer). From: Zwane Mwaikambo <zwane@arm.linux.org.uk> This fixes an oops due to cpu_sibling_map being uninitialised when a system with no MP table (most UP boxen) boots a CONFIG_SMT kernel. What also happens is that the cpu_group lists end up not being terminated properly, but this oops kills it first. Patch tested on UP w/o MP table, 2x P2 and UP Xeon w/ no siblings. From: "Martin J. Bligh" <mbligh@aracnet.com>, Nick Piggin <piggin@cyberone.com.au> Change arch_init_sched_domains to use cpu_online_map From: Anton Blanchard <anton@samba.org> Fix build with NR_CPUS > BITS_PER_LONG
-rw-r--r--arch/i386/Kconfig10
-rw-r--r--arch/i386/kernel/smpboot.c206
-rw-r--r--include/asm-i386/processor.h5
-rw-r--r--include/linux/sched.h16
-rw-r--r--kernel/sched.c35
5 files changed, 246 insertions, 26 deletions
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 954887332846..b73a04af6a20 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -479,6 +479,16 @@ config NR_CPUS
This is purely to save memory - each supported CPU adds
approximately eight kilobytes to the kernel image.
+config SCHED_SMT
+ bool "SMT (Hyperthreading) scheduler support"
+ depends on SMP
+ default off
+ help
+ SMT scheduler support improves the CPU scheduler's decision making
+ when dealing with Intel Pentium 4 chips with HyperThreading at a
+ cost of slightly increased overhead in some places. If unsure say
+ N here.
+
config PREEMPT
bool "Preemptible Kernel"
help
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index 77d4309d6c6e..281f6d443b6c 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -39,6 +39,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/smp_lock.h>
#include <linux/irq.h>
@@ -955,6 +956,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
current_thread_info()->cpu = 0;
smp_tune_scheduling();
+ cpus_clear(cpu_sibling_map[0]);
+ cpu_set(0, cpu_sibling_map[0]);
/*
* If we couldn't find an SMP configuration at boot time,
@@ -1085,7 +1088,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
* efficiently.
*/
for (cpu = 0; cpu < NR_CPUS; cpu++)
- cpu_sibling_map[cpu] = CPU_MASK_NONE;
+ cpus_clear(cpu_sibling_map[cpu]);
for (cpu = 0; cpu < NR_CPUS; cpu++) {
int siblings = 0;
@@ -1122,6 +1125,207 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
synchronize_tsc_bp();
}
+#ifdef CONFIG_SCHED_SMT
+#ifdef CONFIG_NUMA
+static struct sched_group sched_group_cpus[NR_CPUS];
+static struct sched_group sched_group_phys[NR_CPUS];
+static struct sched_group sched_group_nodes[MAX_NUMNODES];
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+static DEFINE_PER_CPU(struct sched_domain, node_domains);
+__init void arch_init_sched_domains(void)
+{
+ int i;
+ struct sched_group *first_cpu = NULL, *last_cpu = NULL;
+
+ /* Set up domains */
+ for_each_cpu_mask(i, cpu_online_map) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
+ struct sched_domain *node_domain = &per_cpu(node_domains, i);
+ int node = cpu_to_node(i);
+ cpumask_t nodemask = node_to_cpumask(node);
+
+ *cpu_domain = SD_SIBLING_INIT;
+ cpu_domain->span = cpu_sibling_map[i];
+
+ *phys_domain = SD_CPU_INIT;
+ phys_domain->span = nodemask;
+ phys_domain->flags |= SD_FLAG_IDLE;
+
+ *node_domain = SD_NODE_INIT;
+ node_domain->span = cpu_online_map;
+ }
+
+ /* Set up CPU (sibling) groups */
+ for_each_cpu_mask(i, cpu_online_map) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ int j;
+ first_cpu = last_cpu = NULL;
+
+ if (i != first_cpu(cpu_domain->span))
+ continue;
+
+ for_each_cpu_mask(j, cpu_domain->span) {
+ struct sched_group *cpu = &sched_group_cpus[j];
+
+ cpu->cpumask = CPU_MASK_NONE;
+ cpu_set(j, cpu->cpumask);
+
+ if (!first_cpu)
+ first_cpu = cpu;
+ if (last_cpu)
+ last_cpu->next = cpu;
+ last_cpu = cpu;
+ }
+ last_cpu->next = first_cpu;
+ }
+
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ int j;
+ cpumask_t nodemask;
+ cpus_and(nodemask, node_to_cpumask(i), cpu_online_map);
+
+ if (cpus_empty(nodemask))
+ continue;
+
+ first_cpu = last_cpu = NULL;
+ /* Set up physical groups */
+ for_each_cpu_mask(j, nodemask) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(j);
+ struct sched_group *cpu = &sched_group_phys[j];
+
+ if (j != first_cpu(cpu_domain->span))
+ continue;
+
+ cpu->cpumask = cpu_domain->span;
+
+ if (!first_cpu)
+ first_cpu = cpu;
+ if (last_cpu)
+ last_cpu->next = cpu;
+ last_cpu = cpu;
+ }
+ last_cpu->next = first_cpu;
+ }
+
+ /* Set up nodes */
+ first_cpu = last_cpu = NULL;
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ struct sched_group *cpu = &sched_group_nodes[i];
+ cpumask_t nodemask;
+ cpus_and(nodemask, node_to_cpumask(i), cpu_online_map);
+
+ if (cpus_empty(nodemask))
+ continue;
+
+ cpu->cpumask = nodemask;
+
+ if (!first_cpu)
+ first_cpu = cpu;
+ if (last_cpu)
+ last_cpu->next = cpu;
+ last_cpu = cpu;
+ }
+ last_cpu->next = first_cpu;
+
+
+ mb();
+ for_each_cpu_mask(i, cpu_online_map) {
+ int node = cpu_to_node(i);
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
+ struct sched_domain *node_domain = &per_cpu(node_domains, i);
+ struct sched_group *cpu_group = &sched_group_cpus[i];
+ struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)];
+ struct sched_group *node_group = &sched_group_nodes[node];
+
+ cpu_domain->parent = phys_domain;
+ phys_domain->parent = node_domain;
+
+ node_domain->groups = node_group;
+ phys_domain->groups = phys_group;
+ cpu_domain->groups = cpu_group;
+ }
+}
+#else /* CONFIG_NUMA */
+static struct sched_group sched_group_cpus[NR_CPUS];
+static struct sched_group sched_group_phys[NR_CPUS];
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+__init void arch_init_sched_domains(void)
+{
+ int i;
+ struct sched_group *first_cpu = NULL, *last_cpu = NULL;
+
+ /* Set up domains */
+ for_each_cpu_mask(i, cpu_online_map) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
+
+ *cpu_domain = SD_SIBLING_INIT;
+ cpu_domain->span = cpu_sibling_map[i];
+
+ *phys_domain = SD_CPU_INIT;
+ phys_domain->span = cpu_online_map;
+ phys_domain->flags |= SD_FLAG_IDLE;
+ }
+
+ /* Set up CPU (sibling) groups */
+ for_each_cpu_mask(i, cpu_online_map) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ int j;
+ first_cpu = last_cpu = NULL;
+
+ if (i != first_cpu(cpu_domain->span))
+ continue;
+
+ for_each_cpu_mask(j, cpu_domain->span) {
+ struct sched_group *cpu = &sched_group_cpus[j];
+
+ cpus_clear(cpu->cpumask);
+ cpu_set(j, cpu->cpumask);
+
+ if (!first_cpu)
+ first_cpu = cpu;
+ if (last_cpu)
+ last_cpu->next = cpu;
+ last_cpu = cpu;
+ }
+ last_cpu->next = first_cpu;
+ }
+
+ first_cpu = last_cpu = NULL;
+ /* Set up physical groups */
+ for_each_cpu_mask(i, cpu_online_map) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ struct sched_group *cpu = &sched_group_phys[i];
+
+ if (i != first_cpu(cpu_domain->span))
+ continue;
+
+ cpu->cpumask = cpu_domain->span;
+
+ if (!first_cpu)
+ first_cpu = cpu;
+ if (last_cpu)
+ last_cpu->next = cpu;
+ last_cpu = cpu;
+ }
+ last_cpu->next = first_cpu;
+
+ mb();
+ for_each_cpu_mask(i, cpu_online_map) {
+ struct sched_domain *cpu_domain = cpu_sched_domain(i);
+ struct sched_domain *phys_domain = &per_cpu(phys_domains, i);
+ struct sched_group *cpu_group = &sched_group_cpus[i];
+ struct sched_group *phys_group = &sched_group_phys[first_cpu(cpu_domain->span)];
+ cpu_domain->parent = phys_domain;
+ phys_domain->groups = phys_group;
+ cpu_domain->groups = cpu_group;
+ }
+}
+#endif /* CONFIG_NUMA */
+#endif /* CONFIG_SCHED_SMT */
+
/* These are wrappers to interface to the new boot process. Someone
who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
void __init smp_prepare_cpus(unsigned int max_cpus)
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index e114f9fe9cbe..d8ecd6046b1a 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -648,4 +648,9 @@ extern inline void prefetchw(const void *x)
extern void select_idle_routine(const struct cpuinfo_x86 *c);
+#ifdef CONFIG_SCHED_SMT
+#define ARCH_HAS_SCHED_DOMAIN
+#define ARCH_HAS_SCHED_WAKE_BALANCE
+#endif
+
#endif /* __ASM_I386_PROCESSOR_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8cb322f0839d..fa0b49eb1578 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -572,6 +572,22 @@ struct sched_domain {
unsigned int nr_balance_failed; /* initialise to 0 */
};
+/* Common values for SMT siblings */
+#define SD_SIBLING_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .groups = NULL, \
+ .min_interval = 1, \
+ .max_interval = 2, \
+ .busy_factor = 8, \
+ .imbalance_pct = 110, \
+ .cache_hot_time = 0, \
+ .cache_nice_tries = 0, \
+ .flags = SD_FLAG_FASTMIGRATE | SD_FLAG_NEWIDLE | SD_FLAG_WAKE,\
+ .balance_interval = 1, \
+ .nr_balance_failed = 0, \
+}
+
/* Common values for CPUs */
#define SD_CPU_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
diff --git a/kernel/sched.c b/kernel/sched.c
index 4b9db0bff5bf..7293c40707ec 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3295,28 +3295,20 @@ DEFINE_PER_CPU(struct sched_domain, node_domains);
static void __init arch_init_sched_domains(void)
{
int i;
- cpumask_t all_cpus = CPU_MASK_NONE;
struct sched_group *first_node = NULL, *last_node = NULL;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_possible(i))
- continue;
-
- cpu_set(i, all_cpus);
- }
-
/* Set up domains */
- for_each_cpu_mask(i, all_cpus) {
+ for_each_cpu_mask(i, cpu_online_map) {
int node = cpu_to_node(i);
cpumask_t nodemask = node_to_cpumask(node);
struct sched_domain *node_domain = &per_cpu(node_domains, i);
struct sched_domain *cpu_domain = cpu_sched_domain(i);
*node_domain = SD_NODE_INIT;
- node_domain->span = all_cpus;
+ node_domain->span = cpu_online_map;
*cpu_domain = SD_CPU_INIT;
- cpus_and(cpu_domain->span, nodemask, all_cpus);
+ cpus_and(cpu_domain->span, nodemask, cpu_online_map);
cpu_domain->parent = node_domain;
}
@@ -3326,8 +3318,9 @@ static void __init arch_init_sched_domains(void)
int j;
cpumask_t nodemask;
struct sched_group *node = &sched_group_nodes[i];
+ cpumask_t tmp = node_to_cpumask(i);
- cpus_and(nodemask, node_to_cpumask(i), all_cpus);
+ cpus_and(nodemask, tmp, cpu_online_map);
if (cpus_empty(nodemask))
continue;
@@ -3357,7 +3350,7 @@ static void __init arch_init_sched_domains(void)
last_node->next = first_node;
mb();
- for_each_cpu_mask(i, all_cpus) {
+ for_each_cpu_mask(i, cpu_online_map) {
struct sched_domain *node_domain = &per_cpu(node_domains, i);
struct sched_domain *cpu_domain = cpu_sched_domain(i);
node_domain->groups = &sched_group_nodes[cpu_to_node(i)];
@@ -3369,26 +3362,18 @@ static void __init arch_init_sched_domains(void)
static void __init arch_init_sched_domains(void)
{
int i;
- cpumask_t all_cpus = CPU_MASK_NONE;
struct sched_group *first_cpu = NULL, *last_cpu = NULL;
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_possible(i))
- continue;
-
- cpu_set(i, all_cpus);
- }
-
/* Set up domains */
- for_each_cpu_mask(i, all_cpus) {
+ for_each_cpu_mask(i, cpu_online_map) {
struct sched_domain *cpu_domain = cpu_sched_domain(i);
*cpu_domain = SD_CPU_INIT;
- cpu_domain->span = all_cpus;
+ cpu_domain->span = cpu_online_map;
}
/* Set up CPU groups */
- for_each_cpu_mask(i, all_cpus) {
+ for_each_cpu_mask(i, cpu_online_map) {
struct sched_group *cpu = &sched_group_cpus[i];
cpus_clear(cpu->cpumask);
@@ -3403,7 +3388,7 @@ static void __init arch_init_sched_domains(void)
last_cpu->next = first_cpu;
mb();
- for_each_cpu_mask(i, all_cpus) {
+ for_each_cpu_mask(i, cpu_online_map) {
struct sched_domain *cpu_domain = cpu_sched_domain(i);
cpu_domain->groups = &sched_group_cpus[i];
}