From 135fb3e19773e66f56b60e3b9fdda6166e77c55d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Mar 2016 12:54:11 +0100 Subject: sched: Consolidate the notifier maze We can maintain the ordering of the scheduler cpu hotplug functionality nicely in one notifer. Get rid of the maze. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: rt@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux/cpu.h') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index f9b1fab4388a..17017051bfb1 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -61,19 +61,15 @@ struct notifier_block; enum { /* * SCHED_ACTIVE marks a cpu which is coming up active during - * CPU_ONLINE and CPU_DOWN_FAILED and must be the first - * notifier. CPUSET_ACTIVE adjusts cpuset according to - * cpu_active mask right after SCHED_ACTIVE. During - * CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are - * ordered in the similar way. + * CPU_ONLINE and CPU_DOWN_FAILED and must be the first notifier. Is + * also cpuset according to cpu_active mask right after activating the + * cpu. During CPU_DOWN_PREPARE, SCHED_INACTIVE reversed the operation. * * This ordering guarantees consistent cpu_active mask and * migration behavior to all cpu notifiers. */ CPU_PRI_SCHED_ACTIVE = INT_MAX, - CPU_PRI_CPUSET_ACTIVE = INT_MAX - 1, - CPU_PRI_SCHED_INACTIVE = INT_MIN + 1, - CPU_PRI_CPUSET_INACTIVE = INT_MIN, + CPU_PRI_SCHED_INACTIVE = INT_MIN, /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, -- cgit v1.2.3 From 40190a78f85fec29f0fdd21f6b4415712085711e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Mar 2016 12:54:13 +0100 Subject: sched/hotplug: Convert cpu_[in]active notifiers to state machine Now that we reduced everything into single notifiers, it's simple to move them into the hotplug state machine space. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: rt@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 12 --------- include/linux/cpuhotplug.h | 1 + include/linux/sched.h | 2 ++ kernel/cpu.c | 8 ++++-- kernel/sched/core.c | 67 +++++++++++++++------------------------------- 5 files changed, 30 insertions(+), 60 deletions(-) (limited to 'include/linux/cpu.h') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 17017051bfb1..b22b000cf6ee 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -59,18 +59,6 @@ struct notifier_block; * CPU notifier priorities. */ enum { - /* - * SCHED_ACTIVE marks a cpu which is coming up active during - * CPU_ONLINE and CPU_DOWN_FAILED and must be the first notifier. Is - * also cpuset according to cpu_active mask right after activating the - * cpu. During CPU_DOWN_PREPARE, SCHED_INACTIVE reversed the operation. - * - * This ordering guarantees consistent cpu_active mask and - * migration behavior to all cpu notifiers. - */ - CPU_PRI_SCHED_ACTIVE = INT_MAX, - CPU_PRI_SCHED_INACTIVE = INT_MIN, - /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, CPU_PRI_MIGRATION = 10, diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 99fd1d2f76fe..9e07468bf1c5 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -13,6 +13,7 @@ enum cpuhp_state { CPUHP_AP_ONLINE, CPUHP_TEARDOWN_CPU, CPUHP_AP_ONLINE_IDLE, + CPUHP_AP_ACTIVE, CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_NOTIFY_ONLINE, CPUHP_AP_ONLINE_DYN, diff --git a/include/linux/sched.h b/include/linux/sched.h index 39597d0a005e..1e5f961b1a74 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -373,6 +373,8 @@ extern void trap_init(void); extern void update_process_times(int user); extern void scheduler_tick(void); extern int sched_cpu_starting(unsigned int cpu); +extern int sched_cpu_activate(unsigned int cpu); +extern int sched_cpu_deactivate(unsigned int cpu); extern void sched_show_task(struct task_struct *p); diff --git a/kernel/cpu.c b/kernel/cpu.c index f46d02b966bf..15402b72fbc7 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -923,8 +923,6 @@ void cpuhp_online_idle(enum cpuhp_state state) st->state = CPUHP_AP_ONLINE_IDLE; - /* The cpu is marked online, set it active now */ - set_cpu_active(cpu, true); /* Unpark the stopper thread and the hotplug thread of this cpu */ stop_machine_unpark(cpu); kthread_unpark(st->thread); @@ -1259,6 +1257,12 @@ static struct cpuhp_step cpuhp_ap_states[] = { [CPUHP_AP_ONLINE] = { .name = "ap:online", }, + /* First state is scheduler control. Interrupts are enabled */ + [CPUHP_AP_ACTIVE] = { + .name = "sched:active", + .startup = sched_cpu_activate, + .teardown = sched_cpu_deactivate, + }, /* Handle smpboot threads park/unpark */ [CPUHP_AP_SMPBOOT_THREADS] = { .name = "smpboot:threads", diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 541f9ab8ce4f..73bcd937d436 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6634,9 +6634,6 @@ static void sched_domains_numa_masks_set(unsigned int cpu) int node = cpu_to_node(cpu); int i, j; - if (!sched_smp_initialized) - return; - for (i = 0; i < sched_domains_numa_levels; i++) { for (j = 0; j < nr_node_ids; j++) { if (node_distance(j, node) <= sched_domains_numa_distance[i]) @@ -6649,9 +6646,6 @@ static void sched_domains_numa_masks_clear(unsigned int cpu) { int i, j; - if (!sched_smp_initialized) - return; - for (i = 0; i < sched_domains_numa_levels; i++) { for (j = 0; j < nr_node_ids; j++) cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]); @@ -7051,12 +7045,9 @@ static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */ * If we come here as part of a suspend/resume, don't touch cpusets because we * want to restore it back to its original state upon resume anyway. */ -static void cpuset_cpu_active(bool frozen) +static void cpuset_cpu_active(void) { - if (!sched_smp_initialized) - return; - - if (frozen) { + if (cpuhp_tasks_frozen) { /* * num_cpus_frozen tracks how many CPUs are involved in suspend * resume sequence. As long as this is not the last online @@ -7077,17 +7068,14 @@ static void cpuset_cpu_active(bool frozen) cpuset_update_active_cpus(true); } -static int cpuset_cpu_inactive(unsigned int cpu, bool frozen) +static int cpuset_cpu_inactive(unsigned int cpu) { unsigned long flags; struct dl_bw *dl_b; bool overflow; int cpus; - if (!sched_smp_initialized) - return 0; - - if (!frozen) { + if (!cpuhp_tasks_frozen) { rcu_read_lock_sched(); dl_b = dl_bw_of(cpu); @@ -7108,42 +7096,33 @@ static int cpuset_cpu_inactive(unsigned int cpu, bool frozen) return 0; } -static int sched_cpu_active(struct notifier_block *nfb, unsigned long action, - void *hcpu) +int sched_cpu_activate(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; + set_cpu_active(cpu, true); - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_FAILED: - case CPU_ONLINE: - set_cpu_active(cpu, true); + if (sched_smp_initialized) { sched_domains_numa_masks_set(cpu); - cpuset_cpu_active(action & CPU_TASKS_FROZEN); - return NOTIFY_OK; - default: - return NOTIFY_DONE; + cpuset_cpu_active(); } + return 0; } -static int sched_cpu_inactive(struct notifier_block *nfb, - unsigned long action, void *hcpu) +int sched_cpu_deactivate(unsigned int cpu) { - unsigned int cpu = (unsigned long)hcpu; int ret; - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DOWN_PREPARE: - set_cpu_active(cpu, false); - ret = cpuset_cpu_inactive(cpu, action & CPU_TASKS_FROZEN); - if (ret) { - set_cpu_active(cpu, true); - return notifier_from_errno(ret); - } - sched_domains_numa_masks_clear(cpu); - return NOTIFY_OK; - default: - return NOTIFY_DONE; + set_cpu_active(cpu, false); + + if (!sched_smp_initialized) + return 0; + + ret = cpuset_cpu_inactive(cpu); + if (ret) { + set_cpu_active(cpu, true); + return ret; } + sched_domains_numa_masks_clear(cpu); + return 0; } int sched_cpu_starting(unsigned int cpu) @@ -7197,10 +7176,6 @@ static int __init migration_init(void) migration_call(&migration_notifier, CPU_ONLINE, cpu); register_cpu_notifier(&migration_notifier); - /* Register cpu active notifiers */ - cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE); - cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE); - return 0; } early_initcall(migration_init); -- cgit v1.2.3 From f2785ddb5367e217365099294b89d6a84668069e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 10 Mar 2016 12:54:18 +0100 Subject: sched/hotplug: Move migration CPU_DYING to sched_cpu_dying() Remove the hotplug notifier and make it an explicit state. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20160310120025.502222097@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 2 -- include/linux/sched.h | 6 +++++ kernel/cpu.c | 2 +- kernel/sched/core.c | 72 ++++++++++++++++----------------------------------- 4 files changed, 29 insertions(+), 53 deletions(-) (limited to 'include/linux/cpu.h') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index b22b000cf6ee..21597dcac0e2 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -59,9 +59,7 @@ struct notifier_block; * CPU notifier priorities. */ enum { - /* migration should happen before other stuff but after perf */ CPU_PRI_PERF = 20, - CPU_PRI_MIGRATION = 10, /* bring up workqueues before normal notifiers and down after */ CPU_PRI_WORKQUEUE_UP = 5, diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e5f961b1a74..47835cf8aefa 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -376,6 +376,12 @@ extern int sched_cpu_starting(unsigned int cpu); extern int sched_cpu_activate(unsigned int cpu); extern int sched_cpu_deactivate(unsigned int cpu); +#ifdef CONFIG_HOTPLUG_CPU +extern int sched_cpu_dying(unsigned int cpu); +#else +# define sched_cpu_dying NULL +#endif + extern void sched_show_task(struct task_struct *p); #ifdef CONFIG_LOCKUP_DETECTOR diff --git a/kernel/cpu.c b/kernel/cpu.c index c134a35374a1..d6eeb8c5ef88 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1223,7 +1223,7 @@ static struct cpuhp_step cpuhp_ap_states[] = { [CPUHP_AP_SCHED_STARTING] = { .name = "sched:starting", .startup = sched_cpu_starting, - .teardown = NULL, + .teardown = sched_cpu_dying, }, /* * Low level startup/teardown notifiers. Run with interrupts diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8d8d9034edff..a9a65ed772e3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5411,51 +5411,6 @@ static void set_rq_offline(struct rq *rq) } } -/* - * migration_call - callback that gets triggered when a CPU is added. - * Here we can start up the necessary migration thread for the new CPU. - */ -static int -migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) -{ - int cpu = (long)hcpu; - unsigned long flags; - struct rq *rq = cpu_rq(cpu); - - switch (action & ~CPU_TASKS_FROZEN) { - -#ifdef CONFIG_HOTPLUG_CPU - case CPU_DYING: - sched_ttwu_pending(); - /* Update our root-domain */ - raw_spin_lock_irqsave(&rq->lock, flags); - if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); - set_rq_offline(rq); - } - migrate_tasks(rq); - BUG_ON(rq->nr_running != 1); /* the migration thread */ - raw_spin_unlock_irqrestore(&rq->lock, flags); - calc_load_migrate(rq); - break; -#endif - } - - update_max_interval(); - - return NOTIFY_OK; -} - -/* - * Register at high priority so that task migration (migrate_all_tasks) - * happens before everything else. This has to be lower priority than - * the notifier in the perf_event subsystem, though. - */ -static struct notifier_block migration_notifier = { - .notifier_call = migration_call, - .priority = CPU_PRI_MIGRATION, -}; - static void set_cpu_rq_start_time(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); @@ -7158,6 +7113,28 @@ int sched_cpu_starting(unsigned int cpu) return 0; } +#ifdef CONFIG_HOTPLUG_CPU +int sched_cpu_dying(unsigned int cpu) +{ + struct rq *rq = cpu_rq(cpu); + unsigned long flags; + + /* Handle pending wakeups and then migrate everything off */ + sched_ttwu_pending(); + raw_spin_lock_irqsave(&rq->lock, flags); + if (rq->rd) { + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + set_rq_offline(rq); + } + migrate_tasks(rq); + BUG_ON(rq->nr_running != 1); + raw_spin_unlock_irqrestore(&rq->lock, flags); + calc_load_migrate(rq); + update_max_interval(); + return 0; +} +#endif + void __init sched_init_smp(void) { cpumask_var_t non_isolated_cpus; @@ -7194,12 +7171,7 @@ void __init sched_init_smp(void) static int __init migration_init(void) { - void *cpu = (void *)(long)smp_processor_id(); - sched_rq_cpu_starting(smp_processor_id()); - migration_call(&migration_notifier, CPU_ONLINE, cpu); - register_cpu_notifier(&migration_notifier); - return 0; } early_initcall(migration_init); -- cgit v1.2.3