From a03dacb0316f74400846aaf144d6c73f4217ca08 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Tue, 2 Mar 2021 15:58:21 +0900 Subject: PM / devfreq: Add cpu based scaling support to passive governor Many CPU architectures have caches that can scale independent of the CPUs. Frequency scaling of the caches is necessary to make sure that the cache is not a performance bottleneck that leads to poor performance and power. The same idea applies for RAM/DDR. To achieve this, this patch adds support for cpu based scaling to the passive governor. This is accomplished by taking the current frequency of each CPU frequency domain and then adjust the frequency of the cache (or any devfreq device) based on the frequency of the CPUs. It listens to CPU frequency transition notifiers to keep itself up to date on the current CPU frequency. To decide the frequency of the device, the governor does one of the following: * Derives the optimal devfreq device opp from required-opps property of the parent cpu opp_table. * Scales the device frequency in proportion to the CPU frequency. So, if the CPUs are running at their max frequency, the device runs at its max frequency. If the CPUs are running at their min frequency, the device runs at its min frequency. It is interpolated for frequencies in between. Tested-by: Chen-Yu Tsai Tested-by: Johnson Wang Signed-off-by: Saravana Kannan [Sibi: Integrated cpu-freqmap governor into passive_governor] Signed-off-by: Sibi Sankar [Chanwoo: Fix conflict with latest code and cleanup code] Signed-off-by: Chanwoo Choi --- include/linux/devfreq.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux/devfreq.h') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 142474b4af96..b1e4a6f796ce 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -38,6 +38,7 @@ enum devfreq_timer { struct devfreq; struct devfreq_governor; +struct devfreq_cpu_data; struct thermal_cooling_device; /** @@ -288,6 +289,11 @@ struct devfreq_simple_ondemand_data { #endif #if IS_ENABLED(CONFIG_DEVFREQ_GOV_PASSIVE) +enum devfreq_parent_dev_type { + DEVFREQ_PARENT_DEV, + CPUFREQ_PARENT_DEV, +}; + /** * struct devfreq_passive_data - ``void *data`` fed to struct devfreq * and devfreq_add_device @@ -299,8 +305,11 @@ struct devfreq_simple_ondemand_data { * using governors except for passive governor. * If the devfreq device has the specific method to decide * the next frequency, should use this callback. - * @this: the devfreq instance of own device. - * @nb: the notifier block for DEVFREQ_TRANSITION_NOTIFIER list + * @parent_type: the parent type of the device. + * @this: the devfreq instance of own device. + * @nb: the notifier block for DEVFREQ_TRANSITION_NOTIFIER or + * CPUFREQ_TRANSITION_NOTIFIER list. + * @parent_cpu_data: the state min/max/current frequency of all online cpu's. * * The devfreq_passive_data have to set the devfreq instance of parent * device with governors except for the passive governor. But, don't need to @@ -314,9 +323,13 @@ struct devfreq_passive_data { /* Optional callback to decide the next frequency of passvice device */ int (*get_target_freq)(struct devfreq *this, unsigned long *freq); + /* Should set the type of parent device */ + enum devfreq_parent_dev_type parent_type; + /* For passive governor's internal use. Don't need to set them */ struct devfreq *this; struct notifier_block nb; + struct devfreq_cpu_data *parent_cpu_data[NR_CPUS]; }; #endif -- cgit v1.2.3 From 26984d9d581e5049bd75091d2e789b9cc3ea12e0 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Wed, 27 Apr 2022 03:49:19 +0900 Subject: PM / devfreq: passive: Keep cpufreq_policy for possible cpus The passive governor requires the cpu data to get the next target frequency of devfreq device if depending on cpu. In order to reduce the unnecessary memory data, keep cpufreq_policy data for possible cpus instead of NR_CPU. Tested-by: Chen-Yu Tsai Tested-by: Johnson Wang Signed-off-by: Chanwoo Choi --- drivers/devfreq/governor.h | 3 ++ drivers/devfreq/governor_passive.c | 75 ++++++++++++++++++++++++++++++-------- include/linux/devfreq.h | 4 +- 3 files changed, 64 insertions(+), 18 deletions(-) (limited to 'include/linux/devfreq.h') diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index 335c4a491254..0adfebc0467a 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -49,6 +49,7 @@ /** * struct devfreq_cpu_data - Hold the per-cpu data + * @node: list node * @dev: reference to cpu device. * @first_cpu: the cpumask of the first cpu of a policy. * @opp_table: reference to cpu opp table. @@ -60,6 +61,8 @@ * This is auto-populated by the governor. */ struct devfreq_cpu_data { + struct list_head node; + struct device *dev; unsigned int first_cpu; diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index ffcce613a48c..7306e943a234 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0-only + // SPDX-License-Identifier: GPL-2.0-only /* * linux/drivers/devfreq/governor_passive.c * @@ -18,6 +18,22 @@ #define HZ_PER_KHZ 1000 +static struct devfreq_cpu_data * +get_parent_cpu_data(struct devfreq_passive_data *p_data, + struct cpufreq_policy *policy) +{ + struct devfreq_cpu_data *parent_cpu_data; + + if (!p_data || !policy) + return NULL; + + list_for_each_entry(parent_cpu_data, &p_data->cpu_data_list, node) + if (parent_cpu_data->first_cpu == cpumask_first(policy->related_cpus)) + return parent_cpu_data; + + return NULL; +} + static unsigned long get_target_freq_by_required_opp(struct device *p_dev, struct opp_table *p_opp_table, struct opp_table *opp_table, @@ -51,14 +67,24 @@ static int get_target_freq_with_cpufreq(struct devfreq *devfreq, struct devfreq_passive_data *p_data = (struct devfreq_passive_data *)devfreq->data; struct devfreq_cpu_data *parent_cpu_data; + struct cpufreq_policy *policy; unsigned long cpu, cpu_cur, cpu_min, cpu_max, cpu_percent; unsigned long dev_min, dev_max; unsigned long freq = 0; + int ret = 0; for_each_online_cpu(cpu) { - parent_cpu_data = p_data->parent_cpu_data[cpu]; - if (!parent_cpu_data || parent_cpu_data->first_cpu != cpu) + policy = cpufreq_cpu_get(cpu); + if (!policy) { + ret = -EINVAL; + continue; + } + + parent_cpu_data = get_parent_cpu_data(p_data, policy); + if (!parent_cpu_data) { + cpufreq_cpu_put(policy); continue; + } /* Get target freq via required opps */ cpu_cur = parent_cpu_data->cur_freq * HZ_PER_KHZ; @@ -67,6 +93,7 @@ static int get_target_freq_with_cpufreq(struct devfreq *devfreq, devfreq->opp_table, &cpu_cur); if (freq) { *target_freq = max(freq, *target_freq); + cpufreq_cpu_put(policy); continue; } @@ -81,9 +108,10 @@ static int get_target_freq_with_cpufreq(struct devfreq *devfreq, freq = dev_min + mult_frac(dev_max - dev_min, cpu_percent, 100); *target_freq = max(freq, *target_freq); + cpufreq_cpu_put(policy); } - return 0; + return ret; } static int get_target_freq_with_devfreq(struct devfreq *devfreq, @@ -168,12 +196,11 @@ static int cpufreq_passive_notifier_call(struct notifier_block *nb, unsigned int cur_freq; int ret; - if (event != CPUFREQ_POSTCHANGE || !freqs || - !p_data->parent_cpu_data[freqs->policy->cpu]) + if (event != CPUFREQ_POSTCHANGE || !freqs) return 0; - parent_cpu_data = p_data->parent_cpu_data[freqs->policy->cpu]; - if (parent_cpu_data->cur_freq == freqs->new) + parent_cpu_data = get_parent_cpu_data(p_data, freqs->policy); + if (!parent_cpu_data || parent_cpu_data->cur_freq == freqs->new) return 0; cur_freq = parent_cpu_data->cur_freq; @@ -196,7 +223,7 @@ static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq) struct devfreq_passive_data *p_data = (struct devfreq_passive_data *)devfreq->data; struct devfreq_cpu_data *parent_cpu_data; - int cpu, ret; + int cpu, ret = 0; if (p_data->nb.notifier_call) { ret = cpufreq_unregister_notifier(&p_data->nb, @@ -206,16 +233,26 @@ static int cpufreq_passive_unregister_notifier(struct devfreq *devfreq) } for_each_possible_cpu(cpu) { - parent_cpu_data = p_data->parent_cpu_data[cpu]; - if (!parent_cpu_data) + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + if (!policy) { + ret = -EINVAL; continue; + } + parent_cpu_data = get_parent_cpu_data(p_data, policy); + if (!parent_cpu_data) { + cpufreq_cpu_put(policy); + continue; + } + + list_del(&parent_cpu_data->node); if (parent_cpu_data->opp_table) dev_pm_opp_put_opp_table(parent_cpu_data->opp_table); kfree(parent_cpu_data); + cpufreq_cpu_put(policy); } - return 0; + return ret; } static int cpufreq_passive_register_notifier(struct devfreq *devfreq) @@ -230,6 +267,9 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq) unsigned int cpu; int ret; + p_data->cpu_data_list + = (struct list_head)LIST_HEAD_INIT(p_data->cpu_data_list); + p_data->nb.notifier_call = cpufreq_passive_notifier_call; ret = cpufreq_register_notifier(&p_data->nb, CPUFREQ_TRANSITION_NOTIFIER); if (ret) { @@ -239,15 +279,18 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq) } for_each_possible_cpu(cpu) { - if (p_data->parent_cpu_data[cpu]) - continue; - policy = cpufreq_cpu_get(cpu); if (!policy) { ret = -EPROBE_DEFER; goto err; } + parent_cpu_data = get_parent_cpu_data(p_data, policy); + if (parent_cpu_data) { + cpufreq_cpu_put(policy); + continue; + } + parent_cpu_data = kzalloc(sizeof(*parent_cpu_data), GFP_KERNEL); if (!parent_cpu_data) { @@ -276,7 +319,7 @@ static int cpufreq_passive_register_notifier(struct devfreq *devfreq) parent_cpu_data->min_freq = policy->cpuinfo.min_freq; parent_cpu_data->max_freq = policy->cpuinfo.max_freq; - p_data->parent_cpu_data[cpu] = parent_cpu_data; + list_add_tail(&parent_cpu_data->node, &p_data->cpu_data_list); cpufreq_cpu_put(policy); } diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index b1e4a6f796ce..dc10bee75a72 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -309,7 +309,7 @@ enum devfreq_parent_dev_type { * @this: the devfreq instance of own device. * @nb: the notifier block for DEVFREQ_TRANSITION_NOTIFIER or * CPUFREQ_TRANSITION_NOTIFIER list. - * @parent_cpu_data: the state min/max/current frequency of all online cpu's. + * @cpu_data_list: the list of cpu frequency data for all cpufreq_policy. * * The devfreq_passive_data have to set the devfreq instance of parent * device with governors except for the passive governor. But, don't need to @@ -329,7 +329,7 @@ struct devfreq_passive_data { /* For passive governor's internal use. Don't need to set them */ struct devfreq *this; struct notifier_block nb; - struct devfreq_cpu_data *parent_cpu_data[NR_CPUS]; + struct list_head cpu_data_list; }; #endif -- cgit v1.2.3 From b5d281f6c16dd432b618bdfd36ddba1a58d5b603 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Mon, 20 Jun 2022 00:03:51 +0200 Subject: PM / devfreq: Rework freq_table to be local to devfreq struct On a devfreq PROBE_DEFER, the freq_table in the driver profile struct, is never reset and may be leaved in an undefined state. This comes from the fact that we store the freq_table in the driver profile struct that is commonly defined as static and not reset on PROBE_DEFER. We currently skip the reinit of the freq_table if we found it's already defined since a driver may declare his own freq_table. This logic is flawed in the case devfreq core generate a freq_table, set it in the profile struct and then PROBE_DEFER, freeing the freq_table. In this case devfreq will found a NOT NULL freq_table that has been freed, skip the freq_table generation and probe the driver based on the wrong table. To fix this and correctly handle PROBE_DEFER, use a local freq_table and max_state in the devfreq struct and never modify the freq_table present in the profile struct if it does provide it. Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device") Cc: stable@vger.kernel.org Signed-off-by: Christian Marangi Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 71 ++++++++++++++++++-------------------- drivers/devfreq/governor_passive.c | 14 ++++---- include/linux/devfreq.h | 5 +++ 3 files changed, 46 insertions(+), 44 deletions(-) (limited to 'include/linux/devfreq.h') diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 80a1235ef8fb..9602141bb8ec 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq, unsigned long *min_freq, unsigned long *max_freq) { - unsigned long *freq_table = devfreq->profile->freq_table; + unsigned long *freq_table = devfreq->freq_table; s32 qos_min_freq, qos_max_freq; lockdep_assert_held(&devfreq->lock); @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq, * The devfreq drivers can initialize this in either ascending or * descending order and devfreq core supports both. */ - if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) { + if (freq_table[0] < freq_table[devfreq->max_state - 1]) { *min_freq = freq_table[0]; - *max_freq = freq_table[devfreq->profile->max_state - 1]; + *max_freq = freq_table[devfreq->max_state - 1]; } else { - *min_freq = freq_table[devfreq->profile->max_state - 1]; + *min_freq = freq_table[devfreq->max_state - 1]; *max_freq = freq_table[0]; } @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) { int lev; - for (lev = 0; lev < devfreq->profile->max_state; lev++) - if (freq == devfreq->profile->freq_table[lev]) + for (lev = 0; lev < devfreq->max_state; lev++) + if (freq == devfreq->freq_table[lev]) return lev; return -EINVAL; @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) static int set_freq_table(struct devfreq *devfreq) { - struct devfreq_dev_profile *profile = devfreq->profile; struct dev_pm_opp *opp; unsigned long freq; int i, count; @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq) if (count <= 0) return -EINVAL; - profile->max_state = count; - profile->freq_table = devm_kcalloc(devfreq->dev.parent, - profile->max_state, - sizeof(*profile->freq_table), - GFP_KERNEL); - if (!profile->freq_table) { - profile->max_state = 0; + devfreq->max_state = count; + devfreq->freq_table = devm_kcalloc(devfreq->dev.parent, + devfreq->max_state, + sizeof(*devfreq->freq_table), + GFP_KERNEL); + if (!devfreq->freq_table) return -ENOMEM; - } - for (i = 0, freq = 0; i < profile->max_state; i++, freq++) { + for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) { opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq); if (IS_ERR(opp)) { - devm_kfree(devfreq->dev.parent, profile->freq_table); - profile->max_state = 0; + devm_kfree(devfreq->dev.parent, devfreq->freq_table); return PTR_ERR(opp); } dev_pm_opp_put(opp); - profile->freq_table[i] = freq; + devfreq->freq_table[i] = freq; } return 0; @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) if (lev != prev_lev) { devfreq->stats.trans_table[ - (prev_lev * devfreq->profile->max_state) + lev]++; + (prev_lev * devfreq->max_state) + lev]++; devfreq->stats.total_trans++; } @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev, if (err < 0) goto err_dev; mutex_lock(&devfreq->lock); + } else { + devfreq->freq_table = devfreq->profile->freq_table; + devfreq->max_state = devfreq->profile->max_state; } devfreq->scaling_min_freq = find_available_min_freq(devfreq); @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev, array3_size(sizeof(unsigned int), - devfreq->profile->max_state, - devfreq->profile->max_state), + devfreq->max_state, + devfreq->max_state), GFP_KERNEL); if (!devfreq->stats.trans_table) { mutex_unlock(&devfreq->lock); @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev, } devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev, - devfreq->profile->max_state, + devfreq->max_state, sizeof(*devfreq->stats.time_in_state), GFP_KERNEL); if (!devfreq->stats.time_in_state) { @@ -1666,9 +1665,9 @@ static ssize_t available_frequencies_show(struct device *d, mutex_lock(&df->lock); - for (i = 0; i < df->profile->max_state; i++) + for (i = 0; i < df->max_state; i++) count += scnprintf(&buf[count], (PAGE_SIZE - count - 2), - "%lu ", df->profile->freq_table[i]); + "%lu ", df->freq_table[i]); mutex_unlock(&df->lock); /* Truncate the trailing space */ @@ -1691,7 +1690,7 @@ static ssize_t trans_stat_show(struct device *dev, if (!df->profile) return -EINVAL; - max_state = df->profile->max_state; + max_state = df->max_state; if (max_state == 0) return sprintf(buf, "Not Supported.\n"); @@ -1708,19 +1707,17 @@ static ssize_t trans_stat_show(struct device *dev, len += sprintf(buf + len, " :"); for (i = 0; i < max_state; i++) len += sprintf(buf + len, "%10lu", - df->profile->freq_table[i]); + df->freq_table[i]); len += sprintf(buf + len, " time(ms)\n"); for (i = 0; i < max_state; i++) { - if (df->profile->freq_table[i] - == df->previous_freq) { + if (df->freq_table[i] == df->previous_freq) len += sprintf(buf + len, "*"); - } else { + else len += sprintf(buf + len, " "); - } - len += sprintf(buf + len, "%10lu:", - df->profile->freq_table[i]); + + len += sprintf(buf + len, "%10lu:", df->freq_table[i]); for (j = 0; j < max_state; j++) len += sprintf(buf + len, "%10u", df->stats.trans_table[(i * max_state) + j]); @@ -1744,7 +1741,7 @@ static ssize_t trans_stat_store(struct device *dev, if (!df->profile) return -EINVAL; - if (df->profile->max_state == 0) + if (df->max_state == 0) return count; err = kstrtoint(buf, 10, &value); @@ -1752,11 +1749,11 @@ static ssize_t trans_stat_store(struct device *dev, return -EINVAL; mutex_lock(&df->lock); - memset(df->stats.time_in_state, 0, (df->profile->max_state * + memset(df->stats.time_in_state, 0, (df->max_state * sizeof(*df->stats.time_in_state))); memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int), - df->profile->max_state, - df->profile->max_state)); + df->max_state, + df->max_state)); df->stats.total_trans = 0; df->stats.last_update = get_jiffies_64(); mutex_unlock(&df->lock); diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c index 69e06725d92b..406ef79c0c46 100644 --- a/drivers/devfreq/governor_passive.c +++ b/drivers/devfreq/governor_passive.c @@ -144,18 +144,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq, goto out; /* Use interpolation if required opps is not available */ - for (i = 0; i < parent_devfreq->profile->max_state; i++) - if (parent_devfreq->profile->freq_table[i] == *freq) + for (i = 0; i < parent_devfreq->max_state; i++) + if (parent_devfreq->freq_table[i] == *freq) break; - if (i == parent_devfreq->profile->max_state) + if (i == parent_devfreq->max_state) return -EINVAL; - if (i < devfreq->profile->max_state) { - child_freq = devfreq->profile->freq_table[i]; + if (i < devfreq->max_state) { + child_freq = devfreq->freq_table[i]; } else { - count = devfreq->profile->max_state; - child_freq = devfreq->profile->freq_table[count - 1]; + count = devfreq->max_state; + child_freq = devfreq->freq_table[count - 1]; } out: diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index dc10bee75a72..34aab4dd336c 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -148,6 +148,8 @@ struct devfreq_stats { * reevaluate operable frequencies. Devfreq users may use * devfreq.nb to the corresponding register notifier call chain. * @work: delayed work for load monitoring. + * @freq_table: current frequency table used by the devfreq driver. + * @max_state: count of entry present in the frequency table. * @previous_freq: previously configured frequency value. * @last_status: devfreq user device info, performance statistics * @data: Private data of the governor. The devfreq framework does not @@ -185,6 +187,9 @@ struct devfreq { struct notifier_block nb; struct delayed_work work; + unsigned long *freq_table; + unsigned int max_state; + unsigned long previous_freq; struct devfreq_dev_status last_status; -- cgit v1.2.3