From 2a9a86d5c81389cd9afe6a4fea42c585733cd705 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 30 Oct 2017 09:10:46 +0200 Subject: PM / QoS: Fix default runtime_pm device resume latency The recent change to the PM QoS framework to introduce a proper no constraint value overlooked to handle the devices which don't implement PM QoS OPS. Runtime PM is one of the more severely impacted subsystems, failing every attempt to runtime suspend a device. This leads into some nasty second level issues like probe failures and increased power consumption among other things. Fix this by adding a proper return value for devices that don't implement PM QoS. Fixes: 0cc2b4e5a020 (PM / QoS: Fix device resume latency PM QoS) Signed-off-by: Tero Kristo Cc: All applicable Signed-off-by: Rafael J. Wysocki --- include/linux/pm_qos.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 6737a8c9e8c6..d68b0569a5eb 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -175,7 +175,8 @@ static inline s32 dev_pm_qos_requested_flags(struct device *dev) static inline s32 dev_pm_qos_raw_read_value(struct device *dev) { return IS_ERR_OR_NULL(dev->power.qos) ? - 0 : pm_qos_read_value(&dev->power.qos->resume_latency); + PM_QOS_RESUME_LATENCY_NO_CONSTRAINT : + pm_qos_read_value(&dev->power.qos->resume_latency); } #else static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, -- cgit v1.2.3 From 5ba257249e9c8cb2c670e22b0f02d1806f349102 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 31 Oct 2017 18:24:38 +0100 Subject: Revert "PM / QoS: Fix default runtime_pm device resume latency" This reverts commit 2a9a86d5c813 (PM / QoS: Fix default runtime_pm device resume latency) as the commit it depends on is going to be reverted. Signed-off-by: Rafael J. Wysocki --- include/linux/pm_qos.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index d68b0569a5eb..6737a8c9e8c6 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -175,8 +175,7 @@ static inline s32 dev_pm_qos_requested_flags(struct device *dev) static inline s32 dev_pm_qos_raw_read_value(struct device *dev) { return IS_ERR_OR_NULL(dev->power.qos) ? - PM_QOS_RESUME_LATENCY_NO_CONSTRAINT : - pm_qos_read_value(&dev->power.qos->resume_latency); + 0 : pm_qos_read_value(&dev->power.qos->resume_latency); } #else static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, -- cgit v1.2.3 From d5919dcc349d2a16d805ef8096d36e4f519e42ae Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 31 Oct 2017 18:26:15 +0100 Subject: Revert "PM / QoS: Fix device resume latency PM QoS" This reverts commit 0cc2b4e5a020 (PM / QoS: Fix device resume latency PM QoS) as it introduced regressions on multiple systems and the fix-up in commit 2a9a86d5c813 (PM / QoS: Fix default runtime_pm device resume latency) does not address all of them. The original problem that commit 0cc2b4e5a020 was attempting to fix will be addressed later. Fixes: 0cc2b4e5a020 (PM / QoS: Fix device resume latency PM QoS) Reported-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-devices-power | 4 +- drivers/base/cpu.c | 3 +- drivers/base/power/domain_governor.c | 53 ++++++++++++--------------- drivers/base/power/qos.c | 2 +- drivers/base/power/runtime.c | 2 +- drivers/base/power/sysfs.c | 25 ++----------- drivers/cpuidle/governors/menu.c | 4 +- include/linux/pm_qos.h | 5 +-- 8 files changed, 35 insertions(+), 63 deletions(-) (limited to 'include') diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 5cbb6f038615..676fdf5f2a99 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -211,9 +211,7 @@ Description: device, after it has been suspended at run time, from a resume request to the moment the device will be ready to process I/O, in microseconds. If it is equal to 0, however, this means that - the PM QoS resume latency may be arbitrary and the special value - "n/a" means that user space cannot accept any resume latency at - all for the given device. + the PM QoS resume latency may be arbitrary. Not all drivers support this attribute. If it isn't supported, it is not present. diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 227bac5f1191..321cd7b4d817 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -377,8 +377,7 @@ int register_cpu(struct cpu *cpu, int num) per_cpu(cpu_sys_devices, num) = &cpu->dev; register_cpu_under_node(num, cpu_to_node(num)); - dev_pm_qos_expose_latency_limit(&cpu->dev, - PM_QOS_RESUME_LATENCY_NO_CONSTRAINT); + dev_pm_qos_expose_latency_limit(&cpu->dev, 0); return 0; } diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c index 51751cc8c9e6..281f949c5ffe 100644 --- a/drivers/base/power/domain_governor.c +++ b/drivers/base/power/domain_governor.c @@ -14,20 +14,23 @@ static int dev_update_qos_constraint(struct device *dev, void *data) { s64 *constraint_ns_p = data; - s64 constraint_ns = -1; + s32 constraint_ns = -1; if (dev->power.subsys_data && dev->power.subsys_data->domain_data) constraint_ns = dev_gpd_data(dev)->td.effective_constraint_ns; - if (constraint_ns < 0) + if (constraint_ns < 0) { constraint_ns = dev_pm_qos_read_value(dev); - - if (constraint_ns == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) + constraint_ns *= NSEC_PER_USEC; + } + if (constraint_ns == 0) return 0; - constraint_ns *= NSEC_PER_USEC; - - if (constraint_ns < *constraint_ns_p || *constraint_ns_p < 0) + /* + * constraint_ns cannot be negative here, because the device has been + * suspended. + */ + if (constraint_ns < *constraint_ns_p || *constraint_ns_p == 0) *constraint_ns_p = constraint_ns; return 0; @@ -60,14 +63,10 @@ static bool default_suspend_ok(struct device *dev) spin_unlock_irqrestore(&dev->power.lock, flags); - if (constraint_ns == 0) + if (constraint_ns < 0) return false; - if (constraint_ns == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) - constraint_ns = -1; - else - constraint_ns *= NSEC_PER_USEC; - + constraint_ns *= NSEC_PER_USEC; /* * We can walk the children without any additional locking, because * they all have been suspended at this point and their @@ -77,19 +76,14 @@ static bool default_suspend_ok(struct device *dev) device_for_each_child(dev, &constraint_ns, dev_update_qos_constraint); - if (constraint_ns < 0) { - /* The children have no constraints. */ - td->effective_constraint_ns = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; - td->cached_suspend_ok = true; - } else { - constraint_ns -= td->suspend_latency_ns + td->resume_latency_ns; - if (constraint_ns > 0) { - td->effective_constraint_ns = constraint_ns; - td->cached_suspend_ok = true; - } else { - td->effective_constraint_ns = 0; - } + if (constraint_ns > 0) { + constraint_ns -= td->suspend_latency_ns + + td->resume_latency_ns; + if (constraint_ns == 0) + return false; } + td->effective_constraint_ns = constraint_ns; + td->cached_suspend_ok = constraint_ns >= 0; /* * The children have been suspended already, so we don't need to take @@ -151,14 +145,13 @@ static bool __default_power_down_ok(struct dev_pm_domain *pd, td = &to_gpd_data(pdd)->td; constraint_ns = td->effective_constraint_ns; /* default_suspend_ok() need not be called before us. */ - if (constraint_ns < 0) + if (constraint_ns < 0) { constraint_ns = dev_pm_qos_read_value(pdd->dev); - - if (constraint_ns == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) + constraint_ns *= NSEC_PER_USEC; + } + if (constraint_ns == 0) continue; - constraint_ns *= NSEC_PER_USEC; - /* * constraint_ns cannot be negative here, because the device has * been suspended. diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 7d29286d9313..277d43a83f53 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -189,7 +189,7 @@ static int dev_pm_qos_constraints_allocate(struct device *dev) plist_head_init(&c->list); c->target_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE; c->default_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE; - c->no_constraint_value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; + c->no_constraint_value = PM_QOS_RESUME_LATENCY_DEFAULT_VALUE; c->type = PM_QOS_MIN; c->notifiers = n; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 13e015905543..7bcf80fa9ada 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -253,7 +253,7 @@ static int rpm_check_suspend_allowed(struct device *dev) || (dev->power.request_pending && dev->power.request == RPM_REQ_RESUME)) retval = -EAGAIN; - else if (__dev_pm_qos_read_value(dev) == 0) + else if (__dev_pm_qos_read_value(dev) < 0) retval = -EPERM; else if (dev->power.runtime_status == RPM_SUSPENDED) retval = 1; diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 632077f05c57..156ab57bca77 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -218,14 +218,7 @@ static ssize_t pm_qos_resume_latency_show(struct device *dev, struct device_attribute *attr, char *buf) { - s32 value = dev_pm_qos_requested_resume_latency(dev); - - if (value == 0) - return sprintf(buf, "n/a\n"); - else if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) - value = 0; - - return sprintf(buf, "%d\n", value); + return sprintf(buf, "%d\n", dev_pm_qos_requested_resume_latency(dev)); } static ssize_t pm_qos_resume_latency_store(struct device *dev, @@ -235,21 +228,11 @@ static ssize_t pm_qos_resume_latency_store(struct device *dev, s32 value; int ret; - if (!kstrtos32(buf, 0, &value)) { - /* - * Prevent users from writing negative or "no constraint" values - * directly. - */ - if (value < 0 || value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) - return -EINVAL; + if (kstrtos32(buf, 0, &value)) + return -EINVAL; - if (value == 0) - value = PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; - } else if (!strcmp(buf, "n/a") || !strcmp(buf, "n/a\n")) { - value = 0; - } else { + if (value < 0) return -EINVAL; - } ret = dev_pm_qos_update_request(dev->power.qos->resume_latency_req, value); diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index aa390404e85f..48eaf2879228 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -298,8 +298,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->needs_update = 0; } - if (resume_latency < latency_req && - resume_latency != PM_QOS_RESUME_LATENCY_NO_CONSTRAINT) + /* resume_latency is 0 means no restriction */ + if (resume_latency && resume_latency < latency_req) latency_req = resume_latency; /* Special case when user has set very strict latency requirement */ diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 6737a8c9e8c6..032b55909145 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -27,17 +27,16 @@ enum pm_qos_flags_status { PM_QOS_FLAGS_ALL, }; -#define PM_QOS_DEFAULT_VALUE (-1) -#define PM_QOS_LATENCY_ANY S32_MAX +#define PM_QOS_DEFAULT_VALUE -1 #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 #define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE 0 #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE 0 -#define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0 #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1) +#define PM_QOS_LATENCY_ANY ((s32)(~(__u32)0 >> 1)) #define PM_QOS_FLAG_NO_POWER_OFF (1 << 0) #define PM_QOS_FLAG_REMOTE_WAKEUP (1 << 1) -- cgit v1.2.3 From 04686ef299db5ff299bfc5a4504b189e46842078 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 31 Oct 2017 19:17:31 -0700 Subject: bpf: remove SK_REDIRECT from UAPI Now that SK_REDIRECT is no longer a valid return code. Remove it from the UAPI completely. Then do a namespace remapping internal to sockmap so SK_REDIRECT is no longer externally visible. Patchs primary change is to do a namechange from SK_REDIRECT to __SK_REDIRECT Reported-by: Alexei Starovoitov Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 - kernel/bpf/sockmap.c | 16 ++++++++++++---- tools/include/uapi/linux/bpf.h | 3 +-- 3 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0d7948ce2128..7bf4c750dd3a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -788,7 +788,6 @@ struct xdp_md { enum sk_action { SK_DROP = 0, SK_PASS, - SK_REDIRECT, }; #define BPF_TAG_SIZE 8 diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 66f00a2b27f4..dbd7b322a86b 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -101,13 +101,19 @@ static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb) TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb); } +enum __sk_action { + __SK_DROP = 0, + __SK_PASS, + __SK_REDIRECT, +}; + static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) { struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict); int rc; if (unlikely(!prog)) - return SK_DROP; + return __SK_DROP; skb_orphan(skb); /* We need to ensure that BPF metadata for maps is also cleared @@ -122,8 +128,10 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) preempt_enable(); skb->sk = NULL; + /* Moving return codes from UAPI namespace into internal namespace */ return rc == SK_PASS ? - (TCP_SKB_CB(skb)->bpf.map ? SK_REDIRECT : SK_PASS) : SK_DROP; + (TCP_SKB_CB(skb)->bpf.map ? __SK_REDIRECT : __SK_PASS) : + __SK_DROP; } static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) @@ -133,7 +141,7 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) rc = smap_verdict_func(psock, skb); switch (rc) { - case SK_REDIRECT: + case __SK_REDIRECT: sk = do_sk_redirect_map(skb); if (likely(sk)) { struct smap_psock *peer = smap_psock_sk(sk); @@ -149,7 +157,7 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) } } /* Fall through and free skb otherwise */ - case SK_DROP: + case __SK_DROP: default: kfree_skb(skb); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c174971afbe6..01cc7ba39924 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -576,7 +576,7 @@ union bpf_attr { * @map: pointer to sockmap * @key: key to lookup sock in map * @flags: reserved for future use - * Return: SK_REDIRECT + * Return: SK_PASS * * int bpf_sock_map_update(skops, map, key, flags) * @skops: pointer to bpf_sock_ops @@ -789,7 +789,6 @@ struct xdp_md { enum sk_action { SK_DROP = 0, SK_PASS, - SK_REDIRECT, }; #define BPF_TAG_SIZE 8 -- cgit v1.2.3 From 2b7cda9c35d3b940eb9ce74b30bbd5eb30db493d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 30 Oct 2017 23:08:20 -0700 Subject: tcp: fix tcp_mtu_probe() vs highest_sack Based on SNMP values provided by Roman, Yuchung made the observation that some crashes in tcp_sacktag_walk() might be caused by MTU probing. Looking at tcp_mtu_probe(), I found that when a new skb was placed in front of the write queue, we were not updating tcp highest sack. If one skb is freed because all its content was copied to the new skb (for MTU probing), then tp->highest_sack could point to a now freed skb. Bad things would then happen, including infinite loops. This patch renames tcp_highest_sack_combine() and uses it from tcp_mtu_probe() to fix the bug. Note that I also removed one test against tp->sacked_out, since we want to replace tp->highest_sack regardless of whatever condition, since keeping a stale pointer to freed skb is a recipe for disaster. Fixes: a47e5a988a57 ("[TCP]: Convert highest_sack to sk_buff to allow direct access") Signed-off-by: Eric Dumazet Reported-by: Alexei Starovoitov Reported-by: Roman Gushchin Reported-by: Oleksandr Natalenko Acked-by: Alexei Starovoitov Acked-by: Neal Cardwell Acked-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/net/tcp.h | 6 +++--- net/ipv4/tcp_output.c | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 33599d17522d..e6d0002a1b0b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1771,12 +1771,12 @@ static inline void tcp_highest_sack_reset(struct sock *sk) tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk); } -/* Called when old skb is about to be deleted (to be combined with new skb) */ -static inline void tcp_highest_sack_combine(struct sock *sk, +/* Called when old skb is about to be deleted and replaced by new skb */ +static inline void tcp_highest_sack_replace(struct sock *sk, struct sk_buff *old, struct sk_buff *new) { - if (tcp_sk(sk)->sacked_out && (old == tcp_sk(sk)->highest_sack)) + if (old == tcp_highest_sack(sk)) tcp_sk(sk)->highest_sack = new; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ae60dd3faed0..823003eef3a2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2062,6 +2062,7 @@ static int tcp_mtu_probe(struct sock *sk) nskb->ip_summed = skb->ip_summed; tcp_insert_write_queue_before(nskb, skb, sk); + tcp_highest_sack_replace(sk, skb, nskb); len = 0; tcp_for_write_queue_from_safe(skb, next, sk) { @@ -2665,7 +2666,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) else if (!skb_shift(skb, next_skb, next_skb_size)) return false; } - tcp_highest_sack_combine(sk, next_skb, skb); + tcp_highest_sack_replace(sk, next_skb, skb); tcp_unlink_write_queue(next_skb, sk); -- cgit v1.2.3