diff options
| author | Nick Piggin <nickpiggin@yahoo.com.au> | 2004-08-23 21:08:41 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2004-08-23 21:08:41 -0700 |
| commit | 8a7a2318dc07c98c8ae87bcf8258a829846a9512 (patch) | |
| tree | f0d826479c44bb8251b74f251f547a4e812d8c0c /kernel | |
| parent | c62e7cdb3655b52b3d8904efa3183b88248c92fd (diff) | |
[PATCH] sched: consolidate sched domains
Teach the generic domains builder about SMT, and consolidate all
architecture specific domain code into that. Also, the SD_*_INIT macros can
now be redefined by arch code without duplicating the entire setup code.
This can be done by defining ARCH_HASH_SCHED_TUNE.
The generic builder has been simplified with the addition of a helper
macro which will probably prove to be useful to arch specific code as well
and should be exported if that is the case.
Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
From: Matthew Dobson <colpatch@us.ibm.com>
The attached patch is against 2.6.8-rc2-mm2, and removes Nick's
conditional definition & population of cpu_sibling_map[] in favor of my
unconditional ones. This does not affect how cpu_sibling_map is used, just
gives it broader scope.
From: Nick Piggin <nickpiggin@yahoo.com.au>
Small fix to sched-consolidate-domains.patch picked up by
From: Suresh <suresh.b.siddha@intel.com>
another sched consolidate domains fix
From: Nick Piggin <nickpiggin@yahoo.com.au>
Don't use cpu_sibling_map if !CONFIG_SCHED_SMT
This one spotted by Dimitri Sivanich <sivanich@sgi.com>
Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched.c | 226 |
1 files changed, 145 insertions, 81 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 8a58b9f62124..99fedf5840eb 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3659,118 +3659,182 @@ void cpu_attach_domain(struct sched_domain *sd, int cpu) #ifdef ARCH_HAS_SCHED_DOMAIN extern void __init arch_init_sched_domains(void); #else -static struct sched_group sched_group_cpus[NR_CPUS]; + +#ifdef CONFIG_SCHED_SMT static DEFINE_PER_CPU(struct sched_domain, cpu_domains); -#ifdef CONFIG_NUMA -static struct sched_group sched_group_nodes[MAX_NUMNODES]; -static DEFINE_PER_CPU(struct sched_domain, node_domains); -static void __init arch_init_sched_domains(void) +static struct sched_group sched_group_cpus[NR_CPUS]; +__init static int cpu_to_cpu_group(int cpu) { - int i; - struct sched_group *first_node = NULL, *last_node = NULL; + return cpu; +} +#endif - /* Set up domains */ - for_each_cpu(i) { - int node = cpu_to_node(i); - cpumask_t nodemask = node_to_cpumask(node); - struct sched_domain *node_sd = &per_cpu(node_domains, i); - struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); +static DEFINE_PER_CPU(struct sched_domain, phys_domains); +static struct sched_group sched_group_phys[NR_CPUS]; +__init static int cpu_to_phys_group(int cpu) +{ +#ifdef CONFIG_SCHED_SMT + return first_cpu(cpu_sibling_map[cpu]); +#else + return cpu; +#endif +} - *node_sd = SD_NODE_INIT; - node_sd->span = cpu_possible_map; - node_sd->groups = &sched_group_nodes[cpu_to_node(i)]; +#ifdef CONFIG_NUMA +static DEFINE_PER_CPU(struct sched_domain, node_domains); +static struct sched_group sched_group_nodes[MAX_NUMNODES]; +__init static int cpu_to_node_group(int cpu) +{ + return cpu_to_node(cpu); +} +#endif - *cpu_sd = SD_CPU_INIT; - cpus_and(cpu_sd->span, nodemask, cpu_possible_map); - cpu_sd->groups = &sched_group_cpus[i]; - cpu_sd->parent = node_sd; - } +/* + * init_sched_build_groups takes an array of groups, the cpumask we wish + * to span, and a pointer to a function which identifies what group a CPU + * belongs to. The return value of group_fn must be a valid index into the + * groups[] array, and must be >= 0 and < NR_CPUS (due to the fact that we + * keep track of groups covered with a cpumask_t). + * + * init_sched_build_groups will build a circular linked list of the groups + * covered by the given span, and will set each group's ->cpumask correctly, + * and ->cpu_power to 0. + */ +__init static void init_sched_build_groups(struct sched_group groups[], + cpumask_t span, int (*group_fn)(int cpu)) +{ + struct sched_group *first = NULL, *last = NULL; + cpumask_t covered = CPU_MASK_NONE; + int i; - /* Set up groups */ - for (i = 0; i < MAX_NUMNODES; i++) { - cpumask_t tmp = node_to_cpumask(i); - cpumask_t nodemask; - struct sched_group *first_cpu = NULL, *last_cpu = NULL; - struct sched_group *node = &sched_group_nodes[i]; + for_each_cpu_mask(i, span) { + int group = group_fn(i); + struct sched_group *sg = &groups[group]; int j; - cpus_and(nodemask, tmp, cpu_possible_map); - - if (cpus_empty(nodemask)) + if (cpu_isset(i, covered)) continue; - node->cpumask = nodemask; - node->cpu_power = SCHED_LOAD_SCALE * cpus_weight(node->cpumask); - - for_each_cpu_mask(j, node->cpumask) { - struct sched_group *cpu = &sched_group_cpus[j]; + sg->cpumask = CPU_MASK_NONE; + sg->cpu_power = 0; - cpus_clear(cpu->cpumask); - cpu_set(j, cpu->cpumask); - cpu->cpu_power = SCHED_LOAD_SCALE; + for_each_cpu_mask(j, span) { + if (group_fn(j) != group) + continue; - if (!first_cpu) - first_cpu = cpu; - if (last_cpu) - last_cpu->next = cpu; - last_cpu = cpu; + cpu_set(j, covered); + cpu_set(j, sg->cpumask); } - last_cpu->next = first_cpu; - - if (!first_node) - first_node = node; - if (last_node) - last_node->next = node; - last_node = node; - } - last_node->next = first_node; - - mb(); - for_each_cpu(i) { - struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); - cpu_attach_domain(cpu_sd, i); + if (!first) + first = sg; + if (last) + last->next = sg; + last = sg; } + last->next = first; } -#else /* !CONFIG_NUMA */ -static void __init arch_init_sched_domains(void) +__init static void arch_init_sched_domains(void) { int i; - struct sched_group *first_cpu = NULL, *last_cpu = NULL; /* Set up domains */ for_each_cpu(i) { - struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); + int group; + struct sched_domain *sd = NULL, *p; + cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); + +#ifdef CONFIG_NUMA + sd = &per_cpu(node_domains, i); + group = cpu_to_node_group(i); + *sd = SD_NODE_INIT; + sd->span = cpu_possible_map; + sd->groups = &sched_group_nodes[group]; +#endif - *cpu_sd = SD_CPU_INIT; - cpu_sd->span = cpu_possible_map; - cpu_sd->groups = &sched_group_cpus[i]; + p = sd; + sd = &per_cpu(phys_domains, i); + group = cpu_to_phys_group(i); + *sd = SD_CPU_INIT; + sd->span = nodemask; + sd->parent = p; + sd->groups = &sched_group_phys[group]; + +#ifdef CONFIG_SCHED_SMT + p = sd; + sd = &per_cpu(cpu_domains, i); + group = cpu_to_cpu_group(i); + *sd = SD_SIBLING_INIT; + sd->span = cpu_sibling_map[i]; + sd->parent = p; + sd->groups = &sched_group_cpus[group]; +#endif } - /* Set up CPU groups */ - for_each_cpu_mask(i, cpu_possible_map) { - struct sched_group *cpu = &sched_group_cpus[i]; +#ifdef CONFIG_SCHED_SMT + /* Set up CPU (sibling) groups */ + for_each_cpu(i) { + if (i != first_cpu(cpu_sibling_map[i])) + continue; - cpus_clear(cpu->cpumask); - cpu_set(i, cpu->cpumask); - cpu->cpu_power = SCHED_LOAD_SCALE; + init_sched_build_groups(sched_group_cpus, cpu_sibling_map[i], + &cpu_to_cpu_group); + } +#endif + + /* Set up physical groups */ + for (i = 0; i < MAX_NUMNODES; i++) { + cpumask_t nodemask = node_to_cpumask(i); - if (!first_cpu) - first_cpu = cpu; - if (last_cpu) - last_cpu->next = cpu; - last_cpu = cpu; + cpus_and(nodemask, nodemask, cpu_possible_map); + if (cpus_empty(nodemask)) + continue; + + init_sched_build_groups(sched_group_phys, nodemask, + &cpu_to_phys_group); } - last_cpu->next = first_cpu; - mb(); /* domains were modified outside the lock */ +#ifdef CONFIG_NUMA + /* Set up node groups */ + init_sched_build_groups(sched_group_nodes, cpu_possible_map, + &cpu_to_node_group); +#endif + + /* Calculate CPU power for physical packages and nodes */ + for_each_cpu(i) { + int power; + struct sched_domain *sd; +#ifdef CONFIG_SCHED_SMT + sd = &per_cpu(cpu_domains, i); + power = SCHED_LOAD_SCALE; + sd->groups->cpu_power = power; +#endif + + sd = &per_cpu(phys_domains, i); + power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * + (cpus_weight(sd->groups->cpumask)-1) / 10; + sd->groups->cpu_power = power; + +#ifdef CONFIG_NUMA + if (i == first_cpu(sd->groups->cpumask)) { + /* Only add "power" once for each physical package. */ + sd = &per_cpu(node_domains, i); + sd->groups->cpu_power += power; + } +#endif + } + + /* Attach the domains */ for_each_cpu(i) { - struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i); - cpu_attach_domain(cpu_sd, i); + struct sched_domain *sd; +#ifdef CONFIG_SCHED_SMT + sd = &per_cpu(cpu_domains, i); +#else + sd = &per_cpu(phys_domains, i); +#endif + cpu_attach_domain(sd, i); } } - -#endif /* CONFIG_NUMA */ #endif /* ARCH_HAS_SCHED_DOMAIN */ #define SCHED_DOMAIN_DEBUG |
