summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorNick Piggin <nickpiggin@yahoo.com.au>2004-08-23 21:08:41 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-08-23 21:08:41 -0700
commit8a7a2318dc07c98c8ae87bcf8258a829846a9512 (patch)
treef0d826479c44bb8251b74f251f547a4e812d8c0c /kernel
parentc62e7cdb3655b52b3d8904efa3183b88248c92fd (diff)
[PATCH] sched: consolidate sched domains
Teach the generic domains builder about SMT, and consolidate all architecture specific domain code into that. Also, the SD_*_INIT macros can now be redefined by arch code without duplicating the entire setup code. This can be done by defining ARCH_HASH_SCHED_TUNE. The generic builder has been simplified with the addition of a helper macro which will probably prove to be useful to arch specific code as well and should be exported if that is the case. Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au> From: Matthew Dobson <colpatch@us.ibm.com> The attached patch is against 2.6.8-rc2-mm2, and removes Nick's conditional definition & population of cpu_sibling_map[] in favor of my unconditional ones. This does not affect how cpu_sibling_map is used, just gives it broader scope. From: Nick Piggin <nickpiggin@yahoo.com.au> Small fix to sched-consolidate-domains.patch picked up by From: Suresh <suresh.b.siddha@intel.com> another sched consolidate domains fix From: Nick Piggin <nickpiggin@yahoo.com.au> Don't use cpu_sibling_map if !CONFIG_SCHED_SMT This one spotted by Dimitri Sivanich <sivanich@sgi.com> Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c226
1 files changed, 145 insertions, 81 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 8a58b9f62124..99fedf5840eb 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3659,118 +3659,182 @@ void cpu_attach_domain(struct sched_domain *sd, int cpu)
#ifdef ARCH_HAS_SCHED_DOMAIN
extern void __init arch_init_sched_domains(void);
#else
-static struct sched_group sched_group_cpus[NR_CPUS];
+
+#ifdef CONFIG_SCHED_SMT
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
-#ifdef CONFIG_NUMA
-static struct sched_group sched_group_nodes[MAX_NUMNODES];
-static DEFINE_PER_CPU(struct sched_domain, node_domains);
-static void __init arch_init_sched_domains(void)
+static struct sched_group sched_group_cpus[NR_CPUS];
+__init static int cpu_to_cpu_group(int cpu)
{
- int i;
- struct sched_group *first_node = NULL, *last_node = NULL;
+ return cpu;
+}
+#endif
- /* Set up domains */
- for_each_cpu(i) {
- int node = cpu_to_node(i);
- cpumask_t nodemask = node_to_cpumask(node);
- struct sched_domain *node_sd = &per_cpu(node_domains, i);
- struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i);
+static DEFINE_PER_CPU(struct sched_domain, phys_domains);
+static struct sched_group sched_group_phys[NR_CPUS];
+__init static int cpu_to_phys_group(int cpu)
+{
+#ifdef CONFIG_SCHED_SMT
+ return first_cpu(cpu_sibling_map[cpu]);
+#else
+ return cpu;
+#endif
+}
- *node_sd = SD_NODE_INIT;
- node_sd->span = cpu_possible_map;
- node_sd->groups = &sched_group_nodes[cpu_to_node(i)];
+#ifdef CONFIG_NUMA
+static DEFINE_PER_CPU(struct sched_domain, node_domains);
+static struct sched_group sched_group_nodes[MAX_NUMNODES];
+__init static int cpu_to_node_group(int cpu)
+{
+ return cpu_to_node(cpu);
+}
+#endif
- *cpu_sd = SD_CPU_INIT;
- cpus_and(cpu_sd->span, nodemask, cpu_possible_map);
- cpu_sd->groups = &sched_group_cpus[i];
- cpu_sd->parent = node_sd;
- }
+/*
+ * init_sched_build_groups takes an array of groups, the cpumask we wish
+ * to span, and a pointer to a function which identifies what group a CPU
+ * belongs to. The return value of group_fn must be a valid index into the
+ * groups[] array, and must be >= 0 and < NR_CPUS (due to the fact that we
+ * keep track of groups covered with a cpumask_t).
+ *
+ * init_sched_build_groups will build a circular linked list of the groups
+ * covered by the given span, and will set each group's ->cpumask correctly,
+ * and ->cpu_power to 0.
+ */
+__init static void init_sched_build_groups(struct sched_group groups[],
+ cpumask_t span, int (*group_fn)(int cpu))
+{
+ struct sched_group *first = NULL, *last = NULL;
+ cpumask_t covered = CPU_MASK_NONE;
+ int i;
- /* Set up groups */
- for (i = 0; i < MAX_NUMNODES; i++) {
- cpumask_t tmp = node_to_cpumask(i);
- cpumask_t nodemask;
- struct sched_group *first_cpu = NULL, *last_cpu = NULL;
- struct sched_group *node = &sched_group_nodes[i];
+ for_each_cpu_mask(i, span) {
+ int group = group_fn(i);
+ struct sched_group *sg = &groups[group];
int j;
- cpus_and(nodemask, tmp, cpu_possible_map);
-
- if (cpus_empty(nodemask))
+ if (cpu_isset(i, covered))
continue;
- node->cpumask = nodemask;
- node->cpu_power = SCHED_LOAD_SCALE * cpus_weight(node->cpumask);
-
- for_each_cpu_mask(j, node->cpumask) {
- struct sched_group *cpu = &sched_group_cpus[j];
+ sg->cpumask = CPU_MASK_NONE;
+ sg->cpu_power = 0;
- cpus_clear(cpu->cpumask);
- cpu_set(j, cpu->cpumask);
- cpu->cpu_power = SCHED_LOAD_SCALE;
+ for_each_cpu_mask(j, span) {
+ if (group_fn(j) != group)
+ continue;
- if (!first_cpu)
- first_cpu = cpu;
- if (last_cpu)
- last_cpu->next = cpu;
- last_cpu = cpu;
+ cpu_set(j, covered);
+ cpu_set(j, sg->cpumask);
}
- last_cpu->next = first_cpu;
-
- if (!first_node)
- first_node = node;
- if (last_node)
- last_node->next = node;
- last_node = node;
- }
- last_node->next = first_node;
-
- mb();
- for_each_cpu(i) {
- struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i);
- cpu_attach_domain(cpu_sd, i);
+ if (!first)
+ first = sg;
+ if (last)
+ last->next = sg;
+ last = sg;
}
+ last->next = first;
}
-#else /* !CONFIG_NUMA */
-static void __init arch_init_sched_domains(void)
+__init static void arch_init_sched_domains(void)
{
int i;
- struct sched_group *first_cpu = NULL, *last_cpu = NULL;
/* Set up domains */
for_each_cpu(i) {
- struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i);
+ int group;
+ struct sched_domain *sd = NULL, *p;
+ cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
+
+#ifdef CONFIG_NUMA
+ sd = &per_cpu(node_domains, i);
+ group = cpu_to_node_group(i);
+ *sd = SD_NODE_INIT;
+ sd->span = cpu_possible_map;
+ sd->groups = &sched_group_nodes[group];
+#endif
- *cpu_sd = SD_CPU_INIT;
- cpu_sd->span = cpu_possible_map;
- cpu_sd->groups = &sched_group_cpus[i];
+ p = sd;
+ sd = &per_cpu(phys_domains, i);
+ group = cpu_to_phys_group(i);
+ *sd = SD_CPU_INIT;
+ sd->span = nodemask;
+ sd->parent = p;
+ sd->groups = &sched_group_phys[group];
+
+#ifdef CONFIG_SCHED_SMT
+ p = sd;
+ sd = &per_cpu(cpu_domains, i);
+ group = cpu_to_cpu_group(i);
+ *sd = SD_SIBLING_INIT;
+ sd->span = cpu_sibling_map[i];
+ sd->parent = p;
+ sd->groups = &sched_group_cpus[group];
+#endif
}
- /* Set up CPU groups */
- for_each_cpu_mask(i, cpu_possible_map) {
- struct sched_group *cpu = &sched_group_cpus[i];
+#ifdef CONFIG_SCHED_SMT
+ /* Set up CPU (sibling) groups */
+ for_each_cpu(i) {
+ if (i != first_cpu(cpu_sibling_map[i]))
+ continue;
- cpus_clear(cpu->cpumask);
- cpu_set(i, cpu->cpumask);
- cpu->cpu_power = SCHED_LOAD_SCALE;
+ init_sched_build_groups(sched_group_cpus, cpu_sibling_map[i],
+ &cpu_to_cpu_group);
+ }
+#endif
+
+ /* Set up physical groups */
+ for (i = 0; i < MAX_NUMNODES; i++) {
+ cpumask_t nodemask = node_to_cpumask(i);
- if (!first_cpu)
- first_cpu = cpu;
- if (last_cpu)
- last_cpu->next = cpu;
- last_cpu = cpu;
+ cpus_and(nodemask, nodemask, cpu_possible_map);
+ if (cpus_empty(nodemask))
+ continue;
+
+ init_sched_build_groups(sched_group_phys, nodemask,
+ &cpu_to_phys_group);
}
- last_cpu->next = first_cpu;
- mb(); /* domains were modified outside the lock */
+#ifdef CONFIG_NUMA
+ /* Set up node groups */
+ init_sched_build_groups(sched_group_nodes, cpu_possible_map,
+ &cpu_to_node_group);
+#endif
+
+ /* Calculate CPU power for physical packages and nodes */
+ for_each_cpu(i) {
+ int power;
+ struct sched_domain *sd;
+#ifdef CONFIG_SCHED_SMT
+ sd = &per_cpu(cpu_domains, i);
+ power = SCHED_LOAD_SCALE;
+ sd->groups->cpu_power = power;
+#endif
+
+ sd = &per_cpu(phys_domains, i);
+ power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
+ (cpus_weight(sd->groups->cpumask)-1) / 10;
+ sd->groups->cpu_power = power;
+
+#ifdef CONFIG_NUMA
+ if (i == first_cpu(sd->groups->cpumask)) {
+ /* Only add "power" once for each physical package. */
+ sd = &per_cpu(node_domains, i);
+ sd->groups->cpu_power += power;
+ }
+#endif
+ }
+
+ /* Attach the domains */
for_each_cpu(i) {
- struct sched_domain *cpu_sd = &per_cpu(cpu_domains, i);
- cpu_attach_domain(cpu_sd, i);
+ struct sched_domain *sd;
+#ifdef CONFIG_SCHED_SMT
+ sd = &per_cpu(cpu_domains, i);
+#else
+ sd = &per_cpu(phys_domains, i);
+#endif
+ cpu_attach_domain(sd, i);
}
}
-
-#endif /* CONFIG_NUMA */
#endif /* ARCH_HAS_SCHED_DOMAIN */
#define SCHED_DOMAIN_DEBUG