From 2d045036322c29b69c22f06530f1130338d06373 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 19 Jul 2017 15:42:41 +0530 Subject: cpufreq: governor: Drop min_sampling_rate The cpufreq core and governors aren't supposed to set a limit on how fast we want to try changing the frequency. This is currently done for the legacy governors with help of min_sampling_rate. At worst, we may end up setting the sampling rate to a value lower than the rate at which frequency can be changed and then one of the CPUs in the policy will be only changing frequency for ever. But that is something for the user to decide and there is no need to have special handling for such cases in the core. Leave it for the user to figure out. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/cpufreq.h') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index f10a9b3761cd..02aec384cab9 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -491,9 +491,7 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL) * the ondemand governor will not work. All times here are in us (microseconds). */ -#define MIN_SAMPLING_RATE_RATIO (2) #define LATENCY_MULTIPLIER (1000) -#define MIN_LATENCY_MULTIPLIER (20) #define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) struct cpufreq_governor { -- cgit v1.2.3 From aa7519af450d3c62a057aece24877c34562fa25a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 19 Jul 2017 15:42:42 +0530 Subject: cpufreq: Use transition_delay_us for legacy governors as well The policy->transition_delay_us field is used only by the schedutil governor currently, and this field describes how fast the driver wants the cpufreq governor to change CPUs frequency. It should rather be a common thing across all governors, as it doesn't have any schedutil dependency here. Create a new helper cpufreq_policy_transition_delay_us() to get the transition delay across all governors. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 15 +++++++++++++++ drivers/cpufreq/cpufreq_governor.c | 9 +-------- include/linux/cpufreq.h | 1 + kernel/sched/cpufreq_schedutil.c | 11 +---------- 4 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux/cpufreq.h') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 9bf97a366029..c426d21822f7 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -524,6 +524,21 @@ unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, } EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); +unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy) +{ + unsigned int latency; + + if (policy->transition_delay_us) + return policy->transition_delay_us; + + latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC; + if (latency) + return latency * LATENCY_MULTIPLIER; + + return LATENCY_MULTIPLIER; +} +EXPORT_SYMBOL_GPL(cpufreq_policy_transition_delay_us); + /********************************************************************* * SYSFS INTERFACE * *********************************************************************/ diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 858081f9c3d7..eed069ecfd5e 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -389,7 +389,6 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) struct dbs_governor *gov = dbs_governor_of(policy); struct dbs_data *dbs_data; struct policy_dbs_info *policy_dbs; - unsigned int latency; int ret = 0; /* State should be equivalent to EXIT */ @@ -428,13 +427,7 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy) if (ret) goto free_policy_dbs_info; - /* policy latency is in ns. Convert it to us first */ - latency = policy->cpuinfo.transition_latency / 1000; - if (latency == 0) - latency = 1; - - /* Bring kernel and HW constraints together */ - dbs_data->sampling_rate = LATENCY_MULTIPLIER * latency; + dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy); if (!have_governor_per_policy()) gov->gdbs_data = dbs_data; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 02aec384cab9..aaadfc543f63 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -523,6 +523,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, unsigned int relation); unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, unsigned int target_freq); +unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy); int cpufreq_register_governor(struct cpufreq_governor *governor); void cpufreq_unregister_governor(struct cpufreq_governor *governor); diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 29a397067ffa..89c4dd9777bb 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -528,16 +528,7 @@ static int sugov_init(struct cpufreq_policy *policy) goto stop_kthread; } - if (policy->transition_delay_us) { - tunables->rate_limit_us = policy->transition_delay_us; - } else { - unsigned int lat; - - tunables->rate_limit_us = LATENCY_MULTIPLIER; - lat = policy->cpuinfo.transition_latency / NSEC_PER_USEC; - if (lat) - tunables->rate_limit_us *= lat; - } + tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy); policy->governor_data = sg_policy; sg_policy->tunables = tunables; -- cgit v1.2.3 From ed4676e254630c1f00a4fc8f3821890fff7e3643 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 19 Jul 2017 15:42:46 +0530 Subject: cpufreq: Replace "max_transition_latency" with "dynamic_switching" There is no limitation in the ondemand or conservative governors which disallow the transition_latency to be greater than 10 ms. The max_transition_latency field is rather used to disallow automatic dynamic frequency switching for platforms which didn't wanted these governors to run. Replace max_transition_latency with a boolean (dynamic_switching) and check for transition_latency == CPUFREQ_ETERNAL along with that. This makes it pretty straight forward to read/understand now. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 8 ++++---- drivers/cpufreq/cpufreq_governor.h | 2 +- include/linux/cpufreq.h | 9 ++------- 3 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include/linux/cpufreq.h') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index c426d21822f7..88139e5e87da 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2003,13 +2003,13 @@ static int cpufreq_init_governor(struct cpufreq_policy *policy) if (!policy->governor) return -EINVAL; - if (policy->governor->max_transition_latency && - policy->cpuinfo.transition_latency > - policy->governor->max_transition_latency) { + /* Platform doesn't want dynamic frequency switching ? */ + if (policy->governor->dynamic_switching && + policy->cpuinfo.transition_latency == CPUFREQ_ETERNAL) { struct cpufreq_governor *gov = cpufreq_fallback_governor(); if (gov) { - pr_warn("%s governor failed, too long transition latency of HW, fallback to %s governor\n", + pr_warn("Transition latency set to CPUFREQ_ETERNAL, can't use %s governor. Fallback to %s governor\n", policy->governor->name, gov->name); policy->governor = gov; } else { diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 95f207eb820e..8463f5def0f5 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -159,7 +159,7 @@ void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy); #define CPUFREQ_DBS_GOVERNOR_INITIALIZER(_name_) \ { \ .name = _name_, \ - .max_transition_latency = TRANSITION_LATENCY_LIMIT, \ + .dynamic_switching = true, \ .owner = THIS_MODULE, \ .init = cpufreq_dbs_governor_init, \ .exit = cpufreq_dbs_governor_exit, \ diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index aaadfc543f63..e141dbbb9d1c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -487,12 +487,8 @@ static inline unsigned long cpufreq_scale(unsigned long old, u_int div, * polling frequency is 1000 times the transition latency of the processor. The * ondemand governor will work on any processor with transition latency <= 10ms, * using appropriate sampling rate. - * - * For CPUs with transition latency > 10ms (mostly drivers with CPUFREQ_ETERNAL) - * the ondemand governor will not work. All times here are in us (microseconds). */ #define LATENCY_MULTIPLIER (1000) -#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) struct cpufreq_governor { char name[CPUFREQ_NAME_LEN]; @@ -505,9 +501,8 @@ struct cpufreq_governor { char *buf); int (*store_setspeed) (struct cpufreq_policy *policy, unsigned int freq); - unsigned int max_transition_latency; /* HW must be able to switch to - next freq faster than this value in nano secs or we - will fallback to performance governor */ + /* For governors which change frequency dynamically by themselves */ + bool dynamic_switching; struct list_head governor_list; struct module *owner; }; -- cgit v1.2.3 From fe829ed8ef1f3c7ac22843bd594ef2f6c4044288 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 19 Jul 2017 15:42:48 +0530 Subject: cpufreq: Add CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING cpufreq driver flag The policy->transition_latency field is used for multiple purposes today and its not straight forward at all. This is how it is used: A. Set the correct transition_latency value. B. Set it to CPUFREQ_ETERNAL because: 1. We don't want automatic dynamic switching (with ondemand/conservative) to happen at all. 2. We don't know the transition latency. This patch handles the B.1. case in a more readable way. A new flag for the cpufreq drivers is added to disallow use of cpufreq governors which have dynamic_switching flag set. All the current cpufreq drivers which are setting transition_latency unconditionally to CPUFREQ_ETERNAL are updated to use it. They don't need to set transition_latency anymore. There shouldn't be any functional change after this patch. Signed-off-by: Viresh Kumar Reviewed-by: Dominik Brodowski Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-nforce2.c | 2 +- drivers/cpufreq/cpufreq.c | 5 +++-- drivers/cpufreq/elanfreq.c | 4 +--- drivers/cpufreq/gx-suspmod.c | 2 +- drivers/cpufreq/pmac32-cpufreq.c | 7 +++++-- drivers/cpufreq/sa1100-cpufreq.c | 5 +++-- drivers/cpufreq/sa1110-cpufreq.c | 5 +++-- drivers/cpufreq/sh-cpufreq.c | 3 +-- drivers/cpufreq/speedstep-smi.c | 2 +- drivers/cpufreq/unicore2-cpufreq.c | 3 +-- include/linux/cpufreq.h | 6 ++++++ 11 files changed, 26 insertions(+), 18 deletions(-) (limited to 'include/linux/cpufreq.h') diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c index 5503d491b016..dbf82f36d270 100644 --- a/drivers/cpufreq/cpufreq-nforce2.c +++ b/drivers/cpufreq/cpufreq-nforce2.c @@ -357,7 +357,6 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) /* cpuinfo and default policy values */ policy->min = policy->cpuinfo.min_freq = min_fsb * fid * 100; policy->max = policy->cpuinfo.max_freq = max_fsb * fid * 100; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; return 0; } @@ -369,6 +368,7 @@ static int nforce2_cpu_exit(struct cpufreq_policy *policy) static struct cpufreq_driver nforce2_driver = { .name = "nforce2", + .flags = CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, .verify = nforce2_verify, .target = nforce2_target, .get = nforce2_get, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 88139e5e87da..6ec589c048b2 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2005,11 +2005,12 @@ static int cpufreq_init_governor(struct cpufreq_policy *policy) /* Platform doesn't want dynamic frequency switching ? */ if (policy->governor->dynamic_switching && - policy->cpuinfo.transition_latency == CPUFREQ_ETERNAL) { + (cpufreq_driver->flags & CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING || + policy->cpuinfo.transition_latency == CPUFREQ_ETERNAL)) { struct cpufreq_governor *gov = cpufreq_fallback_governor(); if (gov) { - pr_warn("Transition latency set to CPUFREQ_ETERNAL, can't use %s governor. Fallback to %s governor\n", + pr_warn("Can't use %s governor as dynamic switching is disallowed. Fallback to %s governor\n", policy->governor->name, gov->name); policy->governor = gov; } else { diff --git a/drivers/cpufreq/elanfreq.c b/drivers/cpufreq/elanfreq.c index bfce11cba1df..45e2ca62515e 100644 --- a/drivers/cpufreq/elanfreq.c +++ b/drivers/cpufreq/elanfreq.c @@ -165,9 +165,6 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) if (pos->frequency > max_freq) pos->frequency = CPUFREQ_ENTRY_INVALID; - /* cpuinfo and default policy values */ - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - return cpufreq_table_validate_and_show(policy, elanfreq_table); } @@ -196,6 +193,7 @@ __setup("elanfreq=", elanfreq_setup); static struct cpufreq_driver elanfreq_driver = { .get = elanfreq_get_cpu_frequency, + .flags = CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, .verify = cpufreq_generic_frequency_table_verify, .target_index = elanfreq_target, .init = elanfreq_cpu_init, diff --git a/drivers/cpufreq/gx-suspmod.c b/drivers/cpufreq/gx-suspmod.c index 3488c9c175eb..8f52a06664e3 100644 --- a/drivers/cpufreq/gx-suspmod.c +++ b/drivers/cpufreq/gx-suspmod.c @@ -428,7 +428,6 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) policy->max = maxfreq; policy->cpuinfo.min_freq = maxfreq / max_duration; policy->cpuinfo.max_freq = maxfreq; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; return 0; } @@ -438,6 +437,7 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) * MediaGX/Geode GX initialize cpufreq driver */ static struct cpufreq_driver gx_suspmod_driver = { + .flags = CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, .get = gx_get_cpuspeed, .verify = cpufreq_gx_verify, .target = cpufreq_gx_target, diff --git a/drivers/cpufreq/pmac32-cpufreq.c b/drivers/cpufreq/pmac32-cpufreq.c index ff44016ea031..61ae06ca008e 100644 --- a/drivers/cpufreq/pmac32-cpufreq.c +++ b/drivers/cpufreq/pmac32-cpufreq.c @@ -442,7 +442,8 @@ static struct cpufreq_driver pmac_cpufreq_driver = { .init = pmac_cpufreq_cpu_init, .suspend = pmac_cpufreq_suspend, .resume = pmac_cpufreq_resume, - .flags = CPUFREQ_PM_NO_WARN, + .flags = CPUFREQ_PM_NO_WARN | + CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, .attr = cpufreq_generic_attr, .name = "powermac", }; @@ -626,14 +627,16 @@ static int __init pmac_cpufreq_setup(void) if (!value) goto out; cur_freq = (*value) / 1000; - transition_latency = CPUFREQ_ETERNAL; /* Check for 7447A based MacRISC3 */ if (of_machine_is_compatible("MacRISC3") && of_get_property(cpunode, "dynamic-power-step", NULL) && PVR_VER(mfspr(SPRN_PVR)) == 0x8003) { pmac_cpufreq_init_7447A(cpunode); + + /* Allow dynamic switching */ transition_latency = 8000000; + pmac_cpufreq_driver.flags &= ~CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING; /* Check for other MacRISC3 machines */ } else if (of_machine_is_compatible("PowerBook3,4") || of_machine_is_compatible("PowerBook3,5") || diff --git a/drivers/cpufreq/sa1100-cpufreq.c b/drivers/cpufreq/sa1100-cpufreq.c index 728eab77e8e0..e2d8a77c36d5 100644 --- a/drivers/cpufreq/sa1100-cpufreq.c +++ b/drivers/cpufreq/sa1100-cpufreq.c @@ -197,11 +197,12 @@ static int sa1100_target(struct cpufreq_policy *policy, unsigned int ppcr) static int __init sa1100_cpu_init(struct cpufreq_policy *policy) { - return cpufreq_generic_init(policy, sa11x0_freq_table, CPUFREQ_ETERNAL); + return cpufreq_generic_init(policy, sa11x0_freq_table, 0); } static struct cpufreq_driver sa1100_driver __refdata = { - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK | + CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, .verify = cpufreq_generic_frequency_table_verify, .target_index = sa1100_target, .get = sa11x0_getspeed, diff --git a/drivers/cpufreq/sa1110-cpufreq.c b/drivers/cpufreq/sa1110-cpufreq.c index 2bac9b6cfeea..66e5fb088ecc 100644 --- a/drivers/cpufreq/sa1110-cpufreq.c +++ b/drivers/cpufreq/sa1110-cpufreq.c @@ -306,13 +306,14 @@ static int sa1110_target(struct cpufreq_policy *policy, unsigned int ppcr) static int __init sa1110_cpu_init(struct cpufreq_policy *policy) { - return cpufreq_generic_init(policy, sa11x0_freq_table, CPUFREQ_ETERNAL); + return cpufreq_generic_init(policy, sa11x0_freq_table, 0); } /* sa1110_driver needs __refdata because it must remain after init registers * it with cpufreq_register_driver() */ static struct cpufreq_driver sa1110_driver __refdata = { - .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK, + .flags = CPUFREQ_STICKY | CPUFREQ_NEED_INITIAL_FREQ_CHECK | + CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, .verify = cpufreq_generic_frequency_table_verify, .target_index = sa1110_target, .get = sa11x0_getspeed, diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c index 719c3d9f07fb..28893d435cf5 100644 --- a/drivers/cpufreq/sh-cpufreq.c +++ b/drivers/cpufreq/sh-cpufreq.c @@ -137,8 +137,6 @@ static int sh_cpufreq_cpu_init(struct cpufreq_policy *policy) (clk_round_rate(cpuclk, ~0UL) + 500) / 1000; } - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; - dev_info(dev, "CPU Frequencies - Minimum %u.%03u MHz, " "Maximum %u.%03u MHz.\n", policy->min / 1000, policy->min % 1000, @@ -159,6 +157,7 @@ static int sh_cpufreq_cpu_exit(struct cpufreq_policy *policy) static struct cpufreq_driver sh_cpufreq_driver = { .name = "sh", + .flags = CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, .get = sh_cpufreq_get, .target = sh_cpufreq_target, .verify = sh_cpufreq_verify, diff --git a/drivers/cpufreq/speedstep-smi.c b/drivers/cpufreq/speedstep-smi.c index 37b30071c220..d23f24ccff38 100644 --- a/drivers/cpufreq/speedstep-smi.c +++ b/drivers/cpufreq/speedstep-smi.c @@ -266,7 +266,6 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) pr_debug("workaround worked.\n"); } - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; return cpufreq_table_validate_and_show(policy, speedstep_freqs); } @@ -290,6 +289,7 @@ static int speedstep_resume(struct cpufreq_policy *policy) static struct cpufreq_driver speedstep_driver = { .name = "speedstep-smi", + .flags = CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, .verify = cpufreq_generic_frequency_table_verify, .target_index = speedstep_target, .init = speedstep_cpu_init, diff --git a/drivers/cpufreq/unicore2-cpufreq.c b/drivers/cpufreq/unicore2-cpufreq.c index 6f9dfa80563a..db62d9844751 100644 --- a/drivers/cpufreq/unicore2-cpufreq.c +++ b/drivers/cpufreq/unicore2-cpufreq.c @@ -58,13 +58,12 @@ static int __init ucv2_cpu_init(struct cpufreq_policy *policy) policy->min = policy->cpuinfo.min_freq = 250000; policy->max = policy->cpuinfo.max_freq = 1000000; - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; policy->clk = clk_get(NULL, "MAIN_CLK"); return PTR_ERR_OR_ZERO(policy->clk); } static struct cpufreq_driver ucv2_driver = { - .flags = CPUFREQ_STICKY, + .flags = CPUFREQ_STICKY | CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING, .verify = ucv2_verify_speed, .target = ucv2_target, .get = cpufreq_generic_get, diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index e141dbbb9d1c..5f40522ec98c 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -370,6 +370,12 @@ struct cpufreq_driver { */ #define CPUFREQ_NEED_INITIAL_FREQ_CHECK (1 << 5) +/* + * Set by drivers to disallow use of governors with "dynamic_switching" flag + * set. + */ +#define CPUFREQ_NO_AUTO_DYNAMIC_SWITCHING (1 << 6) + int cpufreq_register_driver(struct cpufreq_driver *driver_data); int cpufreq_unregister_driver(struct cpufreq_driver *driver_data); -- cgit v1.2.3 From 674e75411fc260b0d4532701228cfe12fc090da8 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 28 Jul 2017 12:16:38 +0530 Subject: sched: cpufreq: Allow remote cpufreq callbacks With Android UI and benchmarks the latency of cpufreq response to certain scheduling events can become very critical. Currently, callbacks into cpufreq governors are only made from the scheduler if the target CPU of the event is the same as the current CPU. This means there are certain situations where a target CPU may not run the cpufreq governor for some time. One testcase to show this behavior is where a task starts running on CPU0, then a new task is also spawned on CPU0 by a task on CPU1. If the system is configured such that the new tasks should receive maximum demand initially, this should result in CPU0 increasing frequency immediately. But because of the above mentioned limitation though, this does not occur. This patch updates the scheduler core to call the cpufreq callbacks for remote CPUs as well. The schedutil, ondemand and conservative governors are updated to process cpufreq utilization update hooks called for remote CPUs where the remote CPU is managed by the cpufreq policy of the local CPU. The intel_pstate driver is updated to always reject remote callbacks. This is tested with couple of usecases (Android: hackbench, recentfling, galleryfling, vellamo, Ubuntu: hackbench) on ARM hikey board (64 bit octa-core, single policy). Only galleryfling showed minor improvements, while others didn't had much deviation. The reason being that this patch only targets a corner case, where following are required to be true to improve performance and that doesn't happen too often with these tests: - Task is migrated to another CPU. - The task has high demand, and should take the target CPU to higher OPPs. - And the target CPU doesn't call into the cpufreq governor until the next tick. Based on initial work from Steve Muckle. Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan Acked-by: Peter Zijlstra (Intel) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_governor.c | 3 +++ drivers/cpufreq/intel_pstate.c | 8 ++++++++ include/linux/cpufreq.h | 9 +++++++++ kernel/sched/cpufreq_schedutil.c | 31 ++++++++++++++++++++++++++----- kernel/sched/deadline.c | 2 +- kernel/sched/fair.c | 8 +++++--- kernel/sched/rt.c | 2 +- kernel/sched/sched.h | 10 ++-------- 8 files changed, 55 insertions(+), 18 deletions(-) (limited to 'include/linux/cpufreq.h') diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 47e24b5384b3..ce5f3ec7ce71 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -275,6 +275,9 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time, struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; u64 delta_ns, lst; + if (!cpufreq_can_do_remote_dvfs(policy_dbs->policy)) + return; + /* * The work may not be allowed to be queued up right now. * Possible reasons: diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 6cd503525638..d299b86a5a00 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1747,6 +1747,10 @@ static void intel_pstate_update_util_pid(struct update_util_data *data, struct cpudata *cpu = container_of(data, struct cpudata, update_util); u64 delta_ns = time - cpu->sample.time; + /* Don't allow remote callbacks */ + if (smp_processor_id() != cpu->cpu) + return; + if ((s64)delta_ns < pid_params.sample_rate_ns) return; @@ -1764,6 +1768,10 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time, struct cpudata *cpu = container_of(data, struct cpudata, update_util); u64 delta_ns; + /* Don't allow remote callbacks */ + if (smp_processor_id() != cpu->cpu) + return; + if (flags & SCHED_CPUFREQ_IOWAIT) { cpu->iowait_boost = int_tofp(1); } else if (cpu->iowait_boost) { diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index f10a9b3761cd..c4035964e6b3 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -562,6 +562,15 @@ struct governor_attr { size_t count); }; +static inline bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy) +{ + /* Allow remote callbacks only on the CPUs sharing cpufreq policy */ + if (cpumask_test_cpu(smp_processor_id(), policy->cpus)) + return true; + + return false; +} + /********************************************************************* * FREQUENCY TABLE HELPERS * *********************************************************************/ diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index ddd385f2a985..7dbc76801f86 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -52,6 +52,7 @@ struct sugov_policy { struct sugov_cpu { struct update_util_data update_util; struct sugov_policy *sg_policy; + unsigned int cpu; bool iowait_boost_pending; unsigned int iowait_boost; @@ -77,6 +78,21 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) { s64 delta_ns; + /* + * Since cpufreq_update_util() is called with rq->lock held for + * the @target_cpu, our per-cpu data is fully serialized. + * + * However, drivers cannot in general deal with cross-cpu + * requests, so while get_next_freq() will work, our + * sugov_update_commit() call may not. + * + * Hence stop here for remote requests if they aren't supported + * by the hardware, as calculating the frequency is pointless if + * we cannot in fact act on it. + */ + if (!cpufreq_can_do_remote_dvfs(sg_policy->policy)) + return false; + if (sg_policy->work_in_progress) return false; @@ -155,12 +171,12 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, return cpufreq_driver_resolve_freq(policy, freq); } -static void sugov_get_util(unsigned long *util, unsigned long *max) +static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu) { - struct rq *rq = this_rq(); + struct rq *rq = cpu_rq(cpu); unsigned long cfs_max; - cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id()); + cfs_max = arch_scale_cpu_capacity(NULL, cpu); *util = min(rq->cfs.avg.util_avg, cfs_max); *max = cfs_max; @@ -254,7 +270,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, if (flags & SCHED_CPUFREQ_RT_DL) { next_f = policy->cpuinfo.max_freq; } else { - sugov_get_util(&util, &max); + sugov_get_util(&util, &max, sg_cpu->cpu); sugov_iowait_boost(sg_cpu, &util, &max); next_f = get_next_freq(sg_policy, util, max); /* @@ -316,7 +332,7 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time, unsigned long util, max; unsigned int next_f; - sugov_get_util(&util, &max); + sugov_get_util(&util, &max, sg_cpu->cpu); raw_spin_lock(&sg_policy->update_lock); @@ -697,6 +713,11 @@ struct cpufreq_governor *cpufreq_default_governor(void) static int __init sugov_register(void) { + int cpu; + + for_each_possible_cpu(cpu) + per_cpu(sugov_cpu, cpu).cpu = cpu; + return cpufreq_register_governor(&schedutil_gov); } fs_initcall(sugov_register); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 755bd3f1a1a9..5c3bf4bd0327 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1136,7 +1136,7 @@ static void update_curr_dl(struct rq *rq) } /* kick cpufreq (see the comment in kernel/sched/sched.h). */ - cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL); + cpufreq_update_util(rq, SCHED_CPUFREQ_DL); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c95880e216f6..d378d02fdfcb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3278,7 +3278,9 @@ static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {} static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) { - if (&this_rq()->cfs == cfs_rq) { + struct rq *rq = rq_of(cfs_rq); + + if (&rq->cfs == cfs_rq) { /* * There are a few boundary cases this might miss but it should * get called often enough that that should (hopefully) not be @@ -3295,7 +3297,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) * * See cpu_util(). */ - cpufreq_update_util(rq_of(cfs_rq), 0); + cpufreq_update_util(rq, 0); } } @@ -4875,7 +4877,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) * passed. */ if (p->in_iowait) - cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT); + cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT); for_each_sched_entity(se) { if (se->on_rq) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 45caf937ef90..0af5ca9e3e3f 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -970,7 +970,7 @@ static void update_curr_rt(struct rq *rq) return; /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ - cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT); + cpufreq_update_util(rq, SCHED_CPUFREQ_RT); schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec)); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index eeef1a3086d1..aa9d5b87b4f8 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2070,19 +2070,13 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) { struct update_util_data *data; - data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)); + data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data, + cpu_of(rq))); if (data) data->func(data, rq_clock(rq), flags); } - -static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) -{ - if (cpu_of(rq) == smp_processor_id()) - cpufreq_update_util(rq, flags); -} #else static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} -static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {} #endif /* CONFIG_CPU_FREQ */ #ifdef arch_scale_freq_capacity -- cgit v1.2.3 From 99d14d0e16fadb4de001bacc0ac0786a9ebecf2a Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 28 Jul 2017 12:16:39 +0530 Subject: cpufreq: Process remote callbacks from any CPU if the platform permits On many platforms, CPUs can do DVFS across cpufreq policies. i.e CPU from policy-A can change frequency of CPUs belonging to policy-B. This is quite common in case of ARM platforms where we don't configure any per-cpu register. Add a flag to identify such platforms and update cpufreq_can_do_remote_dvfs() to allow remote callbacks if this flag is set. Also enable the flag for cpufreq-dt driver which is used only on ARM platforms currently. Signed-off-by: Viresh Kumar Acked-by: Saravana Kannan Acked-by: Peter Zijlstra (Intel) Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq-dt.c | 1 + include/linux/cpufreq.h | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux/cpufreq.h') diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c index fef3c2160691..d83ab94d041a 100644 --- a/drivers/cpufreq/cpufreq-dt.c +++ b/drivers/cpufreq/cpufreq-dt.c @@ -274,6 +274,7 @@ static int cpufreq_init(struct cpufreq_policy *policy) transition_latency = CPUFREQ_ETERNAL; policy->cpuinfo.transition_latency = transition_latency; + policy->dvfs_possible_from_any_cpu = true; return 0; diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c4035964e6b3..1981a4a167ce 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -127,6 +127,15 @@ struct cpufreq_policy { */ unsigned int transition_delay_us; + /* + * Remote DVFS flag (Not added to the driver structure as we don't want + * to access another structure from scheduler hotpath). + * + * Should be set if CPUs can do DVFS on behalf of other CPUs from + * different cpufreq policies. + */ + bool dvfs_possible_from_any_cpu; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; int cached_resolved_idx; @@ -564,8 +573,13 @@ struct governor_attr { static inline bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy) { - /* Allow remote callbacks only on the CPUs sharing cpufreq policy */ - if (cpumask_test_cpu(smp_processor_id(), policy->cpus)) + /* + * Allow remote callbacks if: + * - dvfs_possible_from_any_cpu flag is set + * - the local and remote CPUs share cpufreq policy + */ + if (policy->dvfs_possible_from_any_cpu || + cpumask_test_cpu(smp_processor_id(), policy->cpus)) return true; return false; -- cgit v1.2.3 From d6344d4b5628052ccda104466e5e05b9c43d7b61 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 4 Aug 2017 14:57:39 +0200 Subject: cpufreq: Simplify cpufreq_can_do_remote_dvfs() The if () in cpufreq_can_do_remote_dvfs() is superfluous, so drop it and simply return the value of the expression under it. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux/cpufreq.h') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 1981a4a167ce..d69464139f67 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -578,11 +578,8 @@ static inline bool cpufreq_can_do_remote_dvfs(struct cpufreq_policy *policy) * - dvfs_possible_from_any_cpu flag is set * - the local and remote CPUs share cpufreq policy */ - if (policy->dvfs_possible_from_any_cpu || - cpumask_test_cpu(smp_processor_id(), policy->cpus)) - return true; - - return false; + return policy->dvfs_possible_from_any_cpu || + cpumask_test_cpu(smp_processor_id(), policy->cpus); } /********************************************************************* -- cgit v1.2.3