summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorTakashi Iwai <tiwai@suse.de>2020-08-03 14:41:43 +0200
committerTakashi Iwai <tiwai@suse.de>2020-08-03 14:41:43 +0200
commit103f528d3bc35d2b6e726a3fffd879e492d191c2 (patch)
tree2829604c2386f96e228fac7841e49906f698dfff /kernel
parent07c9983b567d0ef33aefc063299de95a987e12a8 (diff)
parent84569f329f7fcb40b7b1860f273b2909dabf2a2b (diff)
Merge tag 'asoc-v5.9' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie/sound into for-linus
ASoC: Updates for v5.9 The biggest changes here one again come from Mormioto-san who has continued his dilligent work cleaning up long standing issues in the APIs, it's particularly nice to see the transition from digital_mute() to mute_stream() finally completed. There's also been a lot of work on the x86 code again, this time a big focus has been on cleaning up some issues identified by various static tests, and on the Freescale systems. Otherwise the biggest thing has been a lot of driver additions: - Convert users of digital_mute() to mute_stream(). - Simplify I/O helper functions. - Add a helper for getting the RTD from a substream. - Many, many fixes and cleanups to the x86 code. - New drivers for Freescale MQS and i.MX6sx, Intel KeemBay I2S, Maxim MAX98360A and MAX98373 Soundwire, several Mediatek boards, nVidia Tegra 186 and 210, RealTek RL6231, Samsung Midas and Aries boards (some of the first phones I worked on!) and TI J721e EVM.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/btf.c4
-rw-r--r--kernel/bpf/cgroup.c53
-rw-r--r--kernel/bpf/devmap.c10
-rw-r--r--kernel/bpf/net_namespace.c194
-rw-r--r--kernel/bpf/reuseport_array.c14
-rw-r--r--kernel/bpf/ringbuf.c18
-rw-r--r--kernel/bpf/syscall.c45
-rw-r--r--kernel/bpf/verifier.c13
-rw-r--r--kernel/cgroup/cgroup.c31
-rw-r--r--kernel/debug/debug_core.c10
-rw-r--r--kernel/debug/gdbstub.c19
-rw-r--r--kernel/debug/kdb/kdb_io.c72
-rw-r--r--kernel/debug/kdb/kdb_main.c3
-rw-r--r--kernel/debug/kdb/kdb_support.c7
-rw-r--r--kernel/dma/Kconfig11
-rw-r--r--kernel/dma/direct.c67
-rw-r--r--kernel/dma/mapping.c10
-rw-r--r--kernel/dma/pool.c94
-rw-r--r--kernel/dma/remap.c5
-rw-r--r--kernel/events/uprobes.c2
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/irq/manage.c37
-rw-r--r--kernel/kallsyms.c17
-rw-r--r--kernel/kexec_file.c34
-rw-r--r--kernel/kprobes.c65
-rw-r--r--kernel/kthread.c2
-rw-r--r--kernel/module.c55
-rw-r--r--kernel/nsproxy.c2
-rw-r--r--kernel/padata.c4
-rw-r--r--kernel/printk/printk.c10
-rw-r--r--kernel/rcu/rcuperf.c2
-rw-r--r--kernel/rcu/tree.c32
-rw-r--r--kernel/sched/core.c118
-rw-r--r--kernel/sched/deadline.c1
-rw-r--r--kernel/sched/fair.c17
-rw-r--r--kernel/sched/idle.c15
-rw-r--r--kernel/sched/sched.h2
-rw-r--r--kernel/signal.c10
-rw-r--r--kernel/smp.c18
-rw-r--r--kernel/task_work.c16
-rw-r--r--kernel/time/timer.c21
-rw-r--r--kernel/trace/blktrace.c30
-rw-r--r--kernel/trace/bpf_trace.c10
-rw-r--r--kernel/trace/ftrace.c12
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace.c3
-rw-r--r--kernel/trace/trace.h3
-rw-r--r--kernel/trace/trace_boot.c10
-rw-r--r--kernel/trace/trace_entries.h14
-rw-r--r--kernel/trace/trace_events_trigger.c21
-rw-r--r--kernel/trace/trace_export.c16
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_kprobe.c6
-rw-r--r--kernel/trace/trace_probe.c4
-rw-r--r--kernel/trace/trace_probe.h2
-rw-r--r--kernel/workqueue.c10
56 files changed, 858 insertions, 449 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 58c9af1d4808..9a1a98dd9e97 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3746,7 +3746,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
return false;
t = btf_type_skip_modifiers(btf, t->type, NULL);
- if (!btf_type_is_int(t)) {
+ if (!btf_type_is_small_int(t)) {
bpf_log(log,
"ret type %s not allowed for fmod_ret\n",
btf_kind_str[BTF_INFO_KIND(t->info)]);
@@ -3768,7 +3768,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
/* skip modifiers */
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
- if (btf_type_is_int(t) || btf_type_is_enum(t))
+ if (btf_type_is_small_int(t) || btf_type_is_enum(t))
/* accessing a scalar */
return true;
if (!btf_type_is_ptr(t)) {
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 4d76f16524cc..ac53102e244a 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1276,16 +1276,23 @@ static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
{
- if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0)
+ if (unlikely(max_optlen < 0))
return -EINVAL;
+ if (unlikely(max_optlen > PAGE_SIZE)) {
+ /* We don't expose optvals that are greater than PAGE_SIZE
+ * to the BPF program.
+ */
+ max_optlen = PAGE_SIZE;
+ }
+
ctx->optval = kzalloc(max_optlen, GFP_USER);
if (!ctx->optval)
return -ENOMEM;
ctx->optval_end = ctx->optval + max_optlen;
- return 0;
+ return max_optlen;
}
static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
@@ -1319,13 +1326,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
*/
max_optlen = max_t(int, 16, *optlen);
- ret = sockopt_alloc_buf(&ctx, max_optlen);
- if (ret)
- return ret;
+ max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
+ if (max_optlen < 0)
+ return max_optlen;
ctx.optlen = *optlen;
- if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
+ if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) {
ret = -EFAULT;
goto out;
}
@@ -1353,8 +1360,14 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
/* export any potential modifications */
*level = ctx.level;
*optname = ctx.optname;
- *optlen = ctx.optlen;
- *kernel_optval = ctx.optval;
+
+ /* optlen == 0 from BPF indicates that we should
+ * use original userspace data.
+ */
+ if (ctx.optlen != 0) {
+ *optlen = ctx.optlen;
+ *kernel_optval = ctx.optval;
+ }
}
out:
@@ -1385,12 +1398,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
return retval;
- ret = sockopt_alloc_buf(&ctx, max_optlen);
- if (ret)
- return ret;
-
ctx.optlen = max_optlen;
+ max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
+ if (max_optlen < 0)
+ return max_optlen;
+
if (!retval) {
/* If kernel getsockopt finished successfully,
* copy whatever was returned to the user back
@@ -1404,10 +1417,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
goto out;
}
- if (ctx.optlen > max_optlen)
- ctx.optlen = max_optlen;
-
- if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) {
+ if (copy_from_user(ctx.optval, optval,
+ min(ctx.optlen, max_optlen)) != 0) {
ret = -EFAULT;
goto out;
}
@@ -1436,10 +1447,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
goto out;
}
- if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
- put_user(ctx.optlen, optlen)) {
- ret = -EFAULT;
- goto out;
+ if (ctx.optlen != 0) {
+ if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
+ put_user(ctx.optlen, optlen)) {
+ ret = -EFAULT;
+ goto out;
+ }
}
ret = ctx.retval;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 0cbb72cdaf63..5fdbc776a760 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -86,12 +86,13 @@ static DEFINE_PER_CPU(struct list_head, dev_flush_list);
static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list);
-static struct hlist_head *dev_map_create_hash(unsigned int entries)
+static struct hlist_head *dev_map_create_hash(unsigned int entries,
+ int numa_node)
{
int i;
struct hlist_head *hash;
- hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL);
+ hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node);
if (hash != NULL)
for (i = 0; i < entries; i++)
INIT_HLIST_HEAD(&hash[i]);
@@ -145,7 +146,8 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
return -EINVAL;
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
- dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
+ dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
+ dtab->map.numa_node);
if (!dtab->dev_index_head)
goto free_charge;
@@ -232,7 +234,7 @@ static void dev_map_free(struct bpf_map *map)
}
}
- kfree(dtab->dev_index_head);
+ bpf_map_area_free(dtab->dev_index_head);
} else {
for (i = 0; i < dtab->map.max_entries; i++) {
struct bpf_dtab_netdev *dev;
diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c
index 78cf061f8179..310241ca7991 100644
--- a/kernel/bpf/net_namespace.c
+++ b/kernel/bpf/net_namespace.c
@@ -19,18 +19,21 @@ struct bpf_netns_link {
* with netns_bpf_mutex held.
*/
struct net *net;
+ struct list_head node; /* node in list of links attached to net */
};
/* Protects updates to netns_bpf */
DEFINE_MUTEX(netns_bpf_mutex);
/* Must be called with netns_bpf_mutex held. */
-static void __net_exit bpf_netns_link_auto_detach(struct bpf_link *link)
+static void netns_bpf_run_array_detach(struct net *net,
+ enum netns_bpf_attach_type type)
{
- struct bpf_netns_link *net_link =
- container_of(link, struct bpf_netns_link, link);
+ struct bpf_prog_array *run_array;
- net_link->net = NULL;
+ run_array = rcu_replace_pointer(net->bpf.run_array[type], NULL,
+ lockdep_is_held(&netns_bpf_mutex));
+ bpf_prog_array_free(run_array);
}
static void bpf_netns_link_release(struct bpf_link *link)
@@ -40,22 +43,18 @@ static void bpf_netns_link_release(struct bpf_link *link)
enum netns_bpf_attach_type type = net_link->netns_type;
struct net *net;
- /* Link auto-detached by dying netns. */
- if (!net_link->net)
- return;
-
mutex_lock(&netns_bpf_mutex);
- /* Recheck after potential sleep. We can race with cleanup_net
- * here, but if we see a non-NULL struct net pointer pre_exit
- * has not happened yet and will block on netns_bpf_mutex.
+ /* We can race with cleanup_net, but if we see a non-NULL
+ * struct net pointer, pre_exit has not run yet and wait for
+ * netns_bpf_mutex.
*/
net = net_link->net;
if (!net)
goto out_unlock;
- net->bpf.links[type] = NULL;
- RCU_INIT_POINTER(net->bpf.progs[type], NULL);
+ netns_bpf_run_array_detach(net, type);
+ list_del(&net_link->node);
out_unlock:
mutex_unlock(&netns_bpf_mutex);
@@ -76,6 +75,7 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
struct bpf_netns_link *net_link =
container_of(link, struct bpf_netns_link, link);
enum netns_bpf_attach_type type = net_link->netns_type;
+ struct bpf_prog_array *run_array;
struct net *net;
int ret = 0;
@@ -93,8 +93,11 @@ static int bpf_netns_link_update_prog(struct bpf_link *link,
goto out_unlock;
}
+ run_array = rcu_dereference_protected(net->bpf.run_array[type],
+ lockdep_is_held(&netns_bpf_mutex));
+ WRITE_ONCE(run_array->items[0].prog, new_prog);
+
old_prog = xchg(&link->prog, new_prog);
- rcu_assign_pointer(net->bpf.progs[type], new_prog);
bpf_prog_put(old_prog);
out_unlock:
@@ -142,14 +145,38 @@ static const struct bpf_link_ops bpf_netns_link_ops = {
.show_fdinfo = bpf_netns_link_show_fdinfo,
};
+/* Must be called with netns_bpf_mutex held. */
+static int __netns_bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr,
+ struct net *net,
+ enum netns_bpf_attach_type type)
+{
+ __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+ struct bpf_prog_array *run_array;
+ u32 prog_cnt = 0, flags = 0;
+
+ run_array = rcu_dereference_protected(net->bpf.run_array[type],
+ lockdep_is_held(&netns_bpf_mutex));
+ if (run_array)
+ prog_cnt = bpf_prog_array_length(run_array);
+
+ if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
+ return -EFAULT;
+ if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+ return -EFAULT;
+ if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+ return 0;
+
+ return bpf_prog_array_copy_to_user(run_array, prog_ids,
+ attr->query.prog_cnt);
+}
+
int netns_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
- __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
- u32 prog_id, prog_cnt = 0, flags = 0;
enum netns_bpf_attach_type type;
- struct bpf_prog *attached;
struct net *net;
+ int ret;
if (attr->query.query_flags)
return -EINVAL;
@@ -162,36 +189,25 @@ int netns_bpf_prog_query(const union bpf_attr *attr,
if (IS_ERR(net))
return PTR_ERR(net);
- rcu_read_lock();
- attached = rcu_dereference(net->bpf.progs[type]);
- if (attached) {
- prog_cnt = 1;
- prog_id = attached->aux->id;
- }
- rcu_read_unlock();
+ mutex_lock(&netns_bpf_mutex);
+ ret = __netns_bpf_prog_query(attr, uattr, net, type);
+ mutex_unlock(&netns_bpf_mutex);
put_net(net);
-
- if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
- return -EFAULT;
- if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
- return -EFAULT;
-
- if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
- return 0;
-
- if (copy_to_user(prog_ids, &prog_id, sizeof(u32)))
- return -EFAULT;
-
- return 0;
+ return ret;
}
int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
+ struct bpf_prog_array *run_array;
enum netns_bpf_attach_type type;
+ struct bpf_prog *attached;
struct net *net;
int ret;
+ if (attr->target_fd || attr->attach_flags || attr->replace_bpf_fd)
+ return -EINVAL;
+
type = to_netns_bpf_attach_type(attr->attach_type);
if (type < 0)
return -EINVAL;
@@ -200,19 +216,47 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
mutex_lock(&netns_bpf_mutex);
/* Attaching prog directly is not compatible with links */
- if (net->bpf.links[type]) {
+ if (!list_empty(&net->bpf.links[type])) {
ret = -EEXIST;
goto out_unlock;
}
switch (type) {
case NETNS_BPF_FLOW_DISSECTOR:
- ret = flow_dissector_bpf_prog_attach(net, prog);
+ ret = flow_dissector_bpf_prog_attach_check(net, prog);
break;
default:
ret = -EINVAL;
break;
}
+ if (ret)
+ goto out_unlock;
+
+ attached = net->bpf.progs[type];
+ if (attached == prog) {
+ /* The same program cannot be attached twice */
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ run_array = rcu_dereference_protected(net->bpf.run_array[type],
+ lockdep_is_held(&netns_bpf_mutex));
+ if (run_array) {
+ WRITE_ONCE(run_array->items[0].prog, prog);
+ } else {
+ run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
+ if (!run_array) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+ run_array->items[0].prog = prog;
+ rcu_assign_pointer(net->bpf.run_array[type], run_array);
+ }
+
+ net->bpf.progs[type] = prog;
+ if (attached)
+ bpf_prog_put(attached);
+
out_unlock:
mutex_unlock(&netns_bpf_mutex);
@@ -221,63 +265,74 @@ out_unlock:
/* Must be called with netns_bpf_mutex held. */
static int __netns_bpf_prog_detach(struct net *net,
- enum netns_bpf_attach_type type)
+ enum netns_bpf_attach_type type,
+ struct bpf_prog *old)
{
struct bpf_prog *attached;
/* Progs attached via links cannot be detached */
- if (net->bpf.links[type])
+ if (!list_empty(&net->bpf.links[type]))
return -EINVAL;
- attached = rcu_dereference_protected(net->bpf.progs[type],
- lockdep_is_held(&netns_bpf_mutex));
- if (!attached)
+ attached = net->bpf.progs[type];
+ if (!attached || attached != old)
return -ENOENT;
- RCU_INIT_POINTER(net->bpf.progs[type], NULL);
+ netns_bpf_run_array_detach(net, type);
+ net->bpf.progs[type] = NULL;
bpf_prog_put(attached);
return 0;
}
-int netns_bpf_prog_detach(const union bpf_attr *attr)
+int netns_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
{
enum netns_bpf_attach_type type;
+ struct bpf_prog *prog;
int ret;
+ if (attr->target_fd)
+ return -EINVAL;
+
type = to_netns_bpf_attach_type(attr->attach_type);
if (type < 0)
return -EINVAL;
+ prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
mutex_lock(&netns_bpf_mutex);
- ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type);
+ ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type, prog);
mutex_unlock(&netns_bpf_mutex);
+ bpf_prog_put(prog);
+
return ret;
}
static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
enum netns_bpf_attach_type type)
{
- struct bpf_prog *prog;
+ struct bpf_netns_link *net_link =
+ container_of(link, struct bpf_netns_link, link);
+ struct bpf_prog_array *run_array;
int err;
mutex_lock(&netns_bpf_mutex);
/* Allow attaching only one prog or link for now */
- if (net->bpf.links[type]) {
+ if (!list_empty(&net->bpf.links[type])) {
err = -E2BIG;
goto out_unlock;
}
/* Links are not compatible with attaching prog directly */
- prog = rcu_dereference_protected(net->bpf.progs[type],
- lockdep_is_held(&netns_bpf_mutex));
- if (prog) {
+ if (net->bpf.progs[type]) {
err = -EEXIST;
goto out_unlock;
}
switch (type) {
case NETNS_BPF_FLOW_DISSECTOR:
- err = flow_dissector_bpf_prog_attach(net, link->prog);
+ err = flow_dissector_bpf_prog_attach_check(net, link->prog);
break;
default:
err = -EINVAL;
@@ -286,7 +341,15 @@ static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
if (err)
goto out_unlock;
- net->bpf.links[type] = link;
+ run_array = bpf_prog_array_alloc(1, GFP_KERNEL);
+ if (!run_array) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+ run_array->items[0].prog = link->prog;
+ rcu_assign_pointer(net->bpf.run_array[type], run_array);
+
+ list_add_tail(&net_link->node, &net->bpf.links[type]);
out_unlock:
mutex_unlock(&netns_bpf_mutex);
@@ -345,23 +408,34 @@ out_put_net:
return err;
}
+static int __net_init netns_bpf_pernet_init(struct net *net)
+{
+ int type;
+
+ for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
+ INIT_LIST_HEAD(&net->bpf.links[type]);
+
+ return 0;
+}
+
static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
{
enum netns_bpf_attach_type type;
- struct bpf_link *link;
+ struct bpf_netns_link *net_link;
mutex_lock(&netns_bpf_mutex);
for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
- link = net->bpf.links[type];
- if (link)
- bpf_netns_link_auto_detach(link);
- else
- __netns_bpf_prog_detach(net, type);
+ netns_bpf_run_array_detach(net, type);
+ list_for_each_entry(net_link, &net->bpf.links[type], node)
+ net_link->net = NULL; /* auto-detach link */
+ if (net->bpf.progs[type])
+ bpf_prog_put(net->bpf.progs[type]);
}
mutex_unlock(&netns_bpf_mutex);
}
static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
+ .init = netns_bpf_pernet_init,
.pre_exit = netns_bpf_pernet_pre_exit,
};
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 21cde24386db..cae9d505e04a 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -20,11 +20,14 @@ static struct reuseport_array *reuseport_array(struct bpf_map *map)
/* The caller must hold the reuseport_lock */
void bpf_sk_reuseport_detach(struct sock *sk)
{
- struct sock __rcu **socks;
+ uintptr_t sk_user_data;
write_lock_bh(&sk->sk_callback_lock);
- socks = sk->sk_user_data;
- if (socks) {
+ sk_user_data = (uintptr_t)sk->sk_user_data;
+ if (sk_user_data & SK_USER_DATA_BPF) {
+ struct sock __rcu **socks;
+
+ socks = (void *)(sk_user_data & SK_USER_DATA_PTRMASK);
WRITE_ONCE(sk->sk_user_data, NULL);
/*
* Do not move this NULL assignment outside of
@@ -252,6 +255,7 @@ int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
struct sock *free_osk = NULL, *osk, *nsk;
struct sock_reuseport *reuse;
u32 index = *(u32 *)key;
+ uintptr_t sk_user_data;
struct socket *socket;
int err, fd;
@@ -305,7 +309,9 @@ int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
if (err)
goto put_file_unlock;
- WRITE_ONCE(nsk->sk_user_data, &array->ptrs[index]);
+ sk_user_data = (uintptr_t)&array->ptrs[index] | SK_USER_DATA_NOCOPY |
+ SK_USER_DATA_BPF;
+ WRITE_ONCE(nsk->sk_user_data, (void *)sk_user_data);
rcu_assign_pointer(array->ptrs[index], nsk);
free_osk = osk;
err = 0;
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 180414bb0d3e..0af88bbc1c15 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -132,15 +132,6 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
{
struct bpf_ringbuf *rb;
- if (!data_sz || !PAGE_ALIGNED(data_sz))
- return ERR_PTR(-EINVAL);
-
-#ifdef CONFIG_64BIT
- /* on 32-bit arch, it's impossible to overflow record's hdr->pgoff */
- if (data_sz > RINGBUF_MAX_DATA_SZ)
- return ERR_PTR(-E2BIG);
-#endif
-
rb = bpf_ringbuf_area_alloc(data_sz, numa_node);
if (!rb)
return ERR_PTR(-ENOMEM);
@@ -166,9 +157,16 @@ static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr)
return ERR_PTR(-EINVAL);
if (attr->key_size || attr->value_size ||
- attr->max_entries == 0 || !PAGE_ALIGNED(attr->max_entries))
+ !is_power_of_2(attr->max_entries) ||
+ !PAGE_ALIGNED(attr->max_entries))
return ERR_PTR(-EINVAL);
+#ifdef CONFIG_64BIT
+ /* on 32-bit arch, it's impossible to overflow record's hdr->pgoff */
+ if (attr->max_entries > RINGBUF_MAX_DATA_SZ)
+ return ERR_PTR(-E2BIG);
+#endif
+
rb_map = kzalloc(sizeof(*rb_map), GFP_USER);
if (!rb_map)
return ERR_PTR(-ENOMEM);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8da159936bab..0fd80ac81f70 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2121,7 +2121,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
!bpf_capable())
return -EPERM;
- if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN))
+ if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN))
return -EPERM;
if (is_perfmon_prog_type(type) && !perfmon_capable())
return -EPERM;
@@ -2893,13 +2893,11 @@ static int bpf_prog_detach(const union bpf_attr *attr)
switch (ptype) {
case BPF_PROG_TYPE_SK_MSG:
case BPF_PROG_TYPE_SK_SKB:
- return sock_map_get_from_fd(attr, NULL);
+ return sock_map_prog_detach(attr, ptype);
case BPF_PROG_TYPE_LIRC_MODE2:
return lirc_prog_detach(attr);
case BPF_PROG_TYPE_FLOW_DISSECTOR:
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
- return netns_bpf_prog_detach(attr);
+ return netns_bpf_prog_detach(attr, ptype);
case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK:
@@ -3139,7 +3137,8 @@ static const struct bpf_map *bpf_map_from_imm(const struct bpf_prog *prog,
return NULL;
}
-static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
+static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
+ const struct cred *f_cred)
{
const struct bpf_map *map;
struct bpf_insn *insns;
@@ -3165,7 +3164,7 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
code == (BPF_JMP | BPF_CALL_ARGS)) {
if (code == (BPF_JMP | BPF_CALL_ARGS))
insns[i].code = BPF_JMP | BPF_CALL;
- if (!bpf_dump_raw_ok())
+ if (!bpf_dump_raw_ok(f_cred))
insns[i].imm = 0;
continue;
}
@@ -3221,7 +3220,8 @@ static int set_info_rec_size(struct bpf_prog_info *info)
return 0;
}
-static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
+static int bpf_prog_get_info_by_fd(struct file *file,
+ struct bpf_prog *prog,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
@@ -3290,11 +3290,11 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
struct bpf_insn *insns_sanitized;
bool fault;
- if (prog->blinded && !bpf_dump_raw_ok()) {
+ if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) {
info.xlated_prog_insns = 0;
goto done;
}
- insns_sanitized = bpf_insn_prepare_dump(prog);
+ insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
if (!insns_sanitized)
return -ENOMEM;
uinsns = u64_to_user_ptr(info.xlated_prog_insns);
@@ -3328,7 +3328,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
}
if (info.jited_prog_len && ulen) {
- if (bpf_dump_raw_ok()) {
+ if (bpf_dump_raw_ok(file->f_cred)) {
uinsns = u64_to_user_ptr(info.jited_prog_insns);
ulen = min_t(u32, info.jited_prog_len, ulen);
@@ -3363,7 +3363,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
ulen = info.nr_jited_ksyms;
info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
if (ulen) {
- if (bpf_dump_raw_ok()) {
+ if (bpf_dump_raw_ok(file->f_cred)) {
unsigned long ksym_addr;
u64 __user *user_ksyms;
u32 i;
@@ -3394,7 +3394,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
ulen = info.nr_jited_func_lens;
info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
if (ulen) {
- if (bpf_dump_raw_ok()) {
+ if (bpf_dump_raw_ok(file->f_cred)) {
u32 __user *user_lens;
u32 func_len, i;
@@ -3451,7 +3451,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
else
info.nr_jited_line_info = 0;
if (info.nr_jited_line_info && ulen) {
- if (bpf_dump_raw_ok()) {
+ if (bpf_dump_raw_ok(file->f_cred)) {
__u64 __user *user_linfo;
u32 i;
@@ -3497,7 +3497,8 @@ done:
return 0;
}
-static int bpf_map_get_info_by_fd(struct bpf_map *map,
+static int bpf_map_get_info_by_fd(struct file *file,
+ struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
@@ -3540,7 +3541,8 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
return 0;
}
-static int bpf_btf_get_info_by_fd(struct btf *btf,
+static int bpf_btf_get_info_by_fd(struct file *file,
+ struct btf *btf,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
@@ -3555,7 +3557,8 @@ static int bpf_btf_get_info_by_fd(struct btf *btf,
return btf_get_info_by_fd(btf, attr, uattr);
}
-static int bpf_link_get_info_by_fd(struct bpf_link *link,
+static int bpf_link_get_info_by_fd(struct file *file,
+ struct bpf_link *link,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
@@ -3608,15 +3611,15 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
return -EBADFD;
if (f.file->f_op == &bpf_prog_fops)
- err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
+ err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr,
uattr);
else if (f.file->f_op == &bpf_map_fops)
- err = bpf_map_get_info_by_fd(f.file->private_data, attr,
+ err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr,
uattr);
else if (f.file->f_op == &btf_fops)
- err = bpf_btf_get_info_by_fd(f.file->private_data, attr, uattr);
+ err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
else if (f.file->f_op == &bpf_link_fops)
- err = bpf_link_get_info_by_fd(f.file->private_data,
+ err = bpf_link_get_info_by_fd(f.file, f.file->private_data,
attr, uattr);
else
err = -EINVAL;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 34cde841ab68..94cead5a43e5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -399,8 +399,7 @@ static bool reg_type_not_null(enum bpf_reg_type type)
return type == PTR_TO_SOCKET ||
type == PTR_TO_TCP_SOCK ||
type == PTR_TO_MAP_VALUE ||
- type == PTR_TO_SOCK_COMMON ||
- type == PTR_TO_BTF_ID;
+ type == PTR_TO_SOCK_COMMON;
}
static bool reg_type_may_be_null(enum bpf_reg_type type)
@@ -9801,7 +9800,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
int i, j, subprog_start, subprog_end = 0, len, subprog;
struct bpf_insn *insn;
void *old_bpf_func;
- int err;
+ int err, num_exentries;
if (env->subprog_cnt <= 1)
return 0;
@@ -9876,6 +9875,14 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->nr_linfo = prog->aux->nr_linfo;
func[i]->aux->jited_linfo = prog->aux->jited_linfo;
func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
+ num_exentries = 0;
+ insn = func[i]->insnsi;
+ for (j = 0; j < func[i]->len; j++, insn++) {
+ if (BPF_CLASS(insn->code) == BPF_LDX &&
+ BPF_MODE(insn->code) == BPF_PROBE_MEM)
+ num_exentries++;
+ }
+ func[i]->aux->num_exentries = num_exentries;
func[i] = bpf_int_jit_compile(func[i]);
if (!func[i]->jited) {
err = -ENOTSUPP;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 1ea181a58465..dd247747ec14 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6439,18 +6439,8 @@ void cgroup_sk_alloc_disable(void)
void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
{
- if (cgroup_sk_alloc_disabled)
- return;
-
- /* Socket clone path */
- if (skcd->val) {
- /*
- * We might be cloning a socket which is left in an empty
- * cgroup and the cgroup might have already been rmdir'd.
- * Don't use cgroup_get_live().
- */
- cgroup_get(sock_cgroup_ptr(skcd));
- cgroup_bpf_get(sock_cgroup_ptr(skcd));
+ if (cgroup_sk_alloc_disabled) {
+ skcd->no_refcnt = 1;
return;
}
@@ -6475,10 +6465,27 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
rcu_read_unlock();
}
+void cgroup_sk_clone(struct sock_cgroup_data *skcd)
+{
+ if (skcd->val) {
+ if (skcd->no_refcnt)
+ return;
+ /*
+ * We might be cloning a socket which is left in an empty
+ * cgroup and the cgroup might have already been rmdir'd.
+ * Don't use cgroup_get_live().
+ */
+ cgroup_get(sock_cgroup_ptr(skcd));
+ cgroup_bpf_get(sock_cgroup_ptr(skcd));
+ }
+}
+
void cgroup_sk_free(struct sock_cgroup_data *skcd)
{
struct cgroup *cgrp = sock_cgroup_ptr(skcd);
+ if (skcd->no_refcnt)
+ return;
cgroup_bpf_put(cgrp);
cgroup_put(cgrp);
}
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index ccc0f98abdd4..9e5934780f41 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -169,18 +169,18 @@ int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
int err;
- err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+ err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr,
BREAK_INSTR_SIZE);
if (err)
return err;
- err = probe_kernel_write((char *)bpt->bpt_addr,
+ err = copy_to_kernel_nofault((char *)bpt->bpt_addr,
arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
return err;
}
int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
{
- return probe_kernel_write((char *)bpt->bpt_addr,
+ return copy_to_kernel_nofault((char *)bpt->bpt_addr,
(char *)bpt->saved_instr, BREAK_INSTR_SIZE);
}
@@ -587,6 +587,7 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
arch_kgdb_ops.disable_hw_break(regs);
acquirelock:
+ rcu_read_lock();
/*
* Interrupts will be restored by the 'trap return' code, except when
* single stepping.
@@ -646,6 +647,7 @@ return_normal:
atomic_dec(&slaves_in_kgdb);
dbg_touch_watchdogs();
local_irq_restore(flags);
+ rcu_read_unlock();
return 0;
}
cpu_relax();
@@ -664,6 +666,7 @@ return_normal:
raw_spin_unlock(&dbg_master_lock);
dbg_touch_watchdogs();
local_irq_restore(flags);
+ rcu_read_unlock();
goto acquirelock;
}
@@ -787,6 +790,7 @@ kgdb_restore:
raw_spin_unlock(&dbg_master_lock);
dbg_touch_watchdogs();
local_irq_restore(flags);
+ rcu_read_unlock();
return kgdb_info[cpu].ret_state;
}
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 4b280fc7dd67..a790026e42d0 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -247,7 +247,7 @@ char *kgdb_mem2hex(char *mem, char *buf, int count)
*/
tmp = buf + count;
- err = probe_kernel_read(tmp, mem, count);
+ err = copy_from_kernel_nofault(tmp, mem, count);
if (err)
return NULL;
while (count > 0) {
@@ -283,7 +283,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count)
*tmp_raw |= hex_to_bin(*tmp_hex--) << 4;
}
- return probe_kernel_write(mem, tmp_raw, count);
+ return copy_to_kernel_nofault(mem, tmp_raw, count);
}
/*
@@ -335,7 +335,7 @@ static int kgdb_ebin2mem(char *buf, char *mem, int count)
size++;
}
- return probe_kernel_write(mem, c, size);
+ return copy_to_kernel_nofault(mem, c, size);
}
#if DBG_MAX_REG_NUM > 0
@@ -792,6 +792,19 @@ static void gdb_cmd_query(struct kgdb_state *ks)
}
break;
#endif
+#ifdef CONFIG_HAVE_ARCH_KGDB_QXFER_PKT
+ case 'S':
+ if (!strncmp(remcom_in_buffer, "qSupported:", 11))
+ strcpy(remcom_out_buffer, kgdb_arch_gdb_stub_feature);
+ break;
+ case 'X':
+ if (!strncmp(remcom_in_buffer, "qXfer:", 6))
+ kgdb_arch_handle_qxfer_pkt(remcom_in_buffer,
+ remcom_out_buffer);
+ break;
+#endif
+ default:
+ break;
}
}
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 924bc9298a42..683a799618ad 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -542,6 +542,44 @@ static int kdb_search_string(char *searched, char *searchfor)
return 0;
}
+static void kdb_msg_write(const char *msg, int msg_len)
+{
+ struct console *c;
+
+ if (msg_len == 0)
+ return;
+
+ if (dbg_io_ops) {
+ const char *cp = msg;
+ int len = msg_len;
+
+ while (len--) {
+ dbg_io_ops->write_char(*cp);
+ cp++;
+ }
+ }
+
+ for_each_console(c) {
+ if (!(c->flags & CON_ENABLED))
+ continue;
+ if (c == dbg_io_ops->cons)
+ continue;
+ /*
+ * Set oops_in_progress to encourage the console drivers to
+ * disregard their internal spin locks: in the current calling
+ * context the risk of deadlock is a bigger problem than risks
+ * due to re-entering the console driver. We operate directly on
+ * oops_in_progress rather than using bust_spinlocks() because
+ * the calls bust_spinlocks() makes on exit are not appropriate
+ * for this calling context.
+ */
+ ++oops_in_progress;
+ c->write(c, msg, msg_len);
+ --oops_in_progress;
+ touch_nmi_watchdog();
+ }
+}
+
int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
{
int diag;
@@ -553,7 +591,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
int this_cpu, old_cpu;
char *cp, *cp2, *cphold = NULL, replaced_byte = ' ';
char *moreprompt = "more> ";
- struct console *c;
unsigned long uninitialized_var(flags);
/* Serialize kdb_printf if multiple cpus try to write at once.
@@ -687,22 +724,11 @@ kdb_printit:
*/
retlen = strlen(kdb_buffer);
cp = (char *) printk_skip_headers(kdb_buffer);
- if (!dbg_kdb_mode && kgdb_connected) {
+ if (!dbg_kdb_mode && kgdb_connected)
gdbstub_msg_write(cp, retlen - (cp - kdb_buffer));
- } else {
- if (dbg_io_ops && !dbg_io_ops->is_console) {
- len = retlen - (cp - kdb_buffer);
- cp2 = cp;
- while (len--) {
- dbg_io_ops->write_char(*cp2);
- cp2++;
- }
- }
- for_each_console(c) {
- c->write(c, cp, retlen - (cp - kdb_buffer));
- touch_nmi_watchdog();
- }
- }
+ else
+ kdb_msg_write(cp, retlen - (cp - kdb_buffer));
+
if (logging) {
saved_loglevel = console_loglevel;
console_loglevel = CONSOLE_LOGLEVEL_SILENT;
@@ -751,19 +777,7 @@ kdb_printit:
moreprompt = "more> ";
kdb_input_flush();
-
- if (dbg_io_ops && !dbg_io_ops->is_console) {
- len = strlen(moreprompt);
- cp = moreprompt;
- while (len--) {
- dbg_io_ops->write_char(*cp);
- cp++;
- }
- }
- for_each_console(c) {
- c->write(c, moreprompt, strlen(moreprompt));
- touch_nmi_watchdog();
- }
+ kdb_msg_write(moreprompt, strlen(moreprompt));
if (logging)
printk("%s", moreprompt);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index ec190569f690..5c7949061671 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2326,7 +2326,8 @@ void kdb_ps1(const struct task_struct *p)
int cpu;
unsigned long tmp;
- if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long)))
+ if (!p ||
+ copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long)))
return;
cpu = kdb_process_cpu(p);
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index b8e6306e7e13..004c5b6c87f8 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -325,7 +325,7 @@ char *kdb_strdup(const char *str, gfp_t type)
*/
int kdb_getarea_size(void *res, unsigned long addr, size_t size)
{
- int ret = probe_kernel_read((char *)res, (char *)addr, size);
+ int ret = copy_from_kernel_nofault((char *)res, (char *)addr, size);
if (ret) {
if (!KDB_STATE(SUPPRESS)) {
kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr);
@@ -350,7 +350,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size)
*/
int kdb_putarea_size(unsigned long addr, void *res, size_t size)
{
- int ret = probe_kernel_read((char *)addr, (char *)res, size);
+ int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size);
if (ret) {
if (!KDB_STATE(SUPPRESS)) {
kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr);
@@ -624,7 +624,8 @@ char kdb_task_state_char (const struct task_struct *p)
char state;
unsigned long tmp;
- if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long)))
+ if (!p ||
+ copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long)))
return 'E';
cpu = kdb_process_cpu(p);
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index d006668c0027..1da3f44f2565 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -71,20 +71,21 @@ config SWIOTLB
# in the pagetables
#
config DMA_NONCOHERENT_MMAP
+ default y if !MMU
bool
-config DMA_REMAP
- depends on MMU
+config DMA_COHERENT_POOL
select GENERIC_ALLOCATOR
- select DMA_NONCOHERENT_MMAP
bool
-config DMA_COHERENT_POOL
+config DMA_REMAP
bool
- select DMA_REMAP
+ depends on MMU
+ select DMA_NONCOHERENT_MMAP
config DMA_DIRECT_REMAP
bool
+ select DMA_REMAP
select DMA_COHERENT_POOL
config DMA_CMA
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 0a4881e59aa7..67f060b86a73 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -70,7 +70,7 @@ gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
return 0;
}
-static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
+bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
{
return phys_to_dma_direct(dev, phys) + size - 1 <=
min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
@@ -109,14 +109,15 @@ static inline bool dma_should_free_from_pool(struct device *dev,
return false;
}
-struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
+static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
gfp_t gfp, unsigned long attrs)
{
- size_t alloc_size = PAGE_ALIGN(size);
int node = dev_to_node(dev);
struct page *page = NULL;
u64 phys_limit;
+ WARN_ON_ONCE(!PAGE_ALIGNED(size));
+
if (attrs & DMA_ATTR_NO_WARN)
gfp |= __GFP_NOWARN;
@@ -124,14 +125,14 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
gfp &= ~__GFP_ZERO;
gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
&phys_limit);
- page = dma_alloc_contiguous(dev, alloc_size, gfp);
+ page = dma_alloc_contiguous(dev, size, gfp);
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
- dma_free_contiguous(dev, page, alloc_size);
+ dma_free_contiguous(dev, page, size);
page = NULL;
}
again:
if (!page)
- page = alloc_pages_node(node, gfp, get_order(alloc_size));
+ page = alloc_pages_node(node, gfp, get_order(size));
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
dma_free_contiguous(dev, page, size);
page = NULL;
@@ -157,9 +158,12 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
{
struct page *page;
void *ret;
+ int err;
+
+ size = PAGE_ALIGN(size);
if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
- ret = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, gfp);
+ ret = dma_alloc_from_pool(dev, size, &page, gfp);
if (!ret)
return NULL;
goto done;
@@ -183,14 +187,20 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
dma_alloc_need_uncached(dev, attrs)) ||
(IS_ENABLED(CONFIG_DMA_REMAP) && PageHighMem(page))) {
/* remove any dirty cache lines on the kernel alias */
- arch_dma_prep_coherent(page, PAGE_ALIGN(size));
+ arch_dma_prep_coherent(page, size);
/* create a coherent mapping */
- ret = dma_common_contiguous_remap(page, PAGE_ALIGN(size),
+ ret = dma_common_contiguous_remap(page, size,
dma_pgprot(dev, PAGE_KERNEL, attrs),
__builtin_return_address(0));
if (!ret)
goto out_free_pages;
+ if (force_dma_unencrypted(dev)) {
+ err = set_memory_decrypted((unsigned long)ret,
+ 1 << get_order(size));
+ if (err)
+ goto out_free_pages;
+ }
memset(ret, 0, size);
goto done;
}
@@ -207,8 +217,12 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
}
ret = page_address(page);
- if (force_dma_unencrypted(dev))
- set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
+ if (force_dma_unencrypted(dev)) {
+ err = set_memory_decrypted((unsigned long)ret,
+ 1 << get_order(size));
+ if (err)
+ goto out_free_pages;
+ }
memset(ret, 0, size);
@@ -217,7 +231,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
arch_dma_prep_coherent(page, size);
ret = arch_dma_set_uncached(ret, size);
if (IS_ERR(ret))
- goto out_free_pages;
+ goto out_encrypt_pages;
}
done:
if (force_dma_unencrypted(dev))
@@ -225,6 +239,15 @@ done:
else
*dma_handle = phys_to_dma(dev, page_to_phys(page));
return ret;
+
+out_encrypt_pages:
+ if (force_dma_unencrypted(dev)) {
+ err = set_memory_encrypted((unsigned long)page_address(page),
+ 1 << get_order(size));
+ /* If memory cannot be re-encrypted, it must be leaked */
+ if (err)
+ return NULL;
+ }
out_free_pages:
dma_free_contiguous(dev, page, size);
return NULL;
@@ -459,7 +482,6 @@ int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt,
return ret;
}
-#ifdef CONFIG_MMU
bool dma_direct_can_mmap(struct device *dev)
{
return dev_is_dma_coherent(dev) ||
@@ -485,19 +507,6 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
return remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
user_count << PAGE_SHIFT, vma->vm_page_prot);
}
-#else /* CONFIG_MMU */
-bool dma_direct_can_mmap(struct device *dev)
-{
- return false;
-}
-
-int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size,
- unsigned long attrs)
-{
- return -ENXIO;
-}
-#endif /* CONFIG_MMU */
int dma_direct_supported(struct device *dev, u64 mask)
{
@@ -530,3 +539,9 @@ size_t dma_direct_max_mapping_size(struct device *dev)
return swiotlb_max_mapping_size(dev);
return SIZE_MAX;
}
+
+bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
+{
+ return !dev_is_dma_coherent(dev) ||
+ is_swiotlb_buffer(dma_to_phys(dev, dma_addr));
+}
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 98e3d873792e..a8c18c9a796f 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -397,6 +397,16 @@ size_t dma_max_mapping_size(struct device *dev)
}
EXPORT_SYMBOL_GPL(dma_max_mapping_size);
+bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ if (dma_is_direct(ops))
+ return dma_direct_need_sync(dev, dma_addr);
+ return ops->sync_single_for_cpu || ops->sync_single_for_device;
+}
+EXPORT_SYMBOL_GPL(dma_need_sync);
+
unsigned long dma_get_merge_boundary(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 35bb51c31fff..6bc74a2d5127 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -6,7 +6,6 @@
#include <linux/debugfs.h>
#include <linux/dma-direct.h>
#include <linux/dma-noncoherent.h>
-#include <linux/dma-contiguous.h>
#include <linux/init.h>
#include <linux/genalloc.h>
#include <linux/set_memory.h>
@@ -69,12 +68,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
do {
pool_size = 1 << (PAGE_SHIFT + order);
-
- if (dev_get_cma_area(NULL))
- page = dma_alloc_from_contiguous(NULL, 1 << order,
- order, false);
- else
- page = alloc_pages(gfp, order);
+ page = alloc_pages(gfp, order);
} while (!page && order-- > 0);
if (!page)
goto out;
@@ -118,8 +112,7 @@ remove_mapping:
dma_common_free_remap(addr, pool_size);
#endif
free_page: __maybe_unused
- if (!dma_release_from_contiguous(NULL, page, 1 << order))
- __free_pages(page, order);
+ __free_pages(page, order);
out:
return ret;
}
@@ -175,10 +168,9 @@ static int __init dma_atomic_pool_init(void)
* sizes to 128KB per 1GB of memory, min 128KB, max MAX_ORDER-1.
*/
if (!atomic_pool_size) {
- atomic_pool_size = max(totalram_pages() >> PAGE_SHIFT, 1UL) *
- SZ_128K;
- atomic_pool_size = min_t(size_t, atomic_pool_size,
- 1 << (PAGE_SHIFT + MAX_ORDER-1));
+ unsigned long pages = totalram_pages() / (SZ_1G / SZ_128K);
+ pages = min_t(unsigned long, pages, MAX_ORDER_NR_PAGES);
+ atomic_pool_size = max_t(size_t, pages << PAGE_SHIFT, SZ_128K);
}
INIT_WORK(&atomic_pool_work, atomic_pool_work_fn);
@@ -204,7 +196,7 @@ static int __init dma_atomic_pool_init(void)
}
postcore_initcall(dma_atomic_pool_init);
-static inline struct gen_pool *dev_to_pool(struct device *dev)
+static inline struct gen_pool *dma_guess_pool_from_device(struct device *dev)
{
u64 phys_mask;
gfp_t gfp;
@@ -218,47 +210,79 @@ static inline struct gen_pool *dev_to_pool(struct device *dev)
return atomic_pool_kernel;
}
-static bool dma_in_atomic_pool(struct device *dev, void *start, size_t size)
+static inline struct gen_pool *dma_get_safer_pool(struct gen_pool *bad_pool)
+{
+ if (bad_pool == atomic_pool_kernel)
+ return atomic_pool_dma32 ? : atomic_pool_dma;
+
+ if (bad_pool == atomic_pool_dma32)
+ return atomic_pool_dma;
+
+ return NULL;
+}
+
+static inline struct gen_pool *dma_guess_pool(struct device *dev,
+ struct gen_pool *bad_pool)
{
- struct gen_pool *pool = dev_to_pool(dev);
+ if (bad_pool)
+ return dma_get_safer_pool(bad_pool);
- if (unlikely(!pool))
- return false;
- return gen_pool_has_addr(pool, (unsigned long)start, size);
+ return dma_guess_pool_from_device(dev);
}
void *dma_alloc_from_pool(struct device *dev, size_t size,
struct page **ret_page, gfp_t flags)
{
- struct gen_pool *pool = dev_to_pool(dev);
- unsigned long val;
+ struct gen_pool *pool = NULL;
+ unsigned long val = 0;
void *ptr = NULL;
-
- if (!pool) {
- WARN(1, "%pGg atomic pool not initialised!\n", &flags);
- return NULL;
+ phys_addr_t phys;
+
+ while (1) {
+ pool = dma_guess_pool(dev, pool);
+ if (!pool) {
+ WARN(1, "Failed to get suitable pool for %s\n",
+ dev_name(dev));
+ break;
+ }
+
+ val = gen_pool_alloc(pool, size);
+ if (!val)
+ continue;
+
+ phys = gen_pool_virt_to_phys(pool, val);
+ if (dma_coherent_ok(dev, phys, size))
+ break;
+
+ gen_pool_free(pool, val, size);
+ val = 0;
}
- val = gen_pool_alloc(pool, size);
- if (val) {
- phys_addr_t phys = gen_pool_virt_to_phys(pool, val);
+ if (val) {
*ret_page = pfn_to_page(__phys_to_pfn(phys));
ptr = (void *)val;
memset(ptr, 0, size);
+
+ if (gen_pool_avail(pool) < atomic_pool_size)
+ schedule_work(&atomic_pool_work);
}
- if (gen_pool_avail(pool) < atomic_pool_size)
- schedule_work(&atomic_pool_work);
return ptr;
}
bool dma_free_from_pool(struct device *dev, void *start, size_t size)
{
- struct gen_pool *pool = dev_to_pool(dev);
+ struct gen_pool *pool = NULL;
+
+ while (1) {
+ pool = dma_guess_pool(dev, pool);
+ if (!pool)
+ return false;
- if (!dma_in_atomic_pool(dev, start, size))
- return false;
- gen_pool_free(pool, (unsigned long)start, size);
- return true;
+ if (gen_pool_has_addr(pool, (unsigned long)start, size)) {
+ gen_pool_free(pool, (unsigned long)start, size);
+ return true;
+ }
+ }
}
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index e739a6eea6e7..78b23f089cf1 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -24,7 +24,8 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
{
void *vaddr;
- vaddr = vmap(pages, size >> PAGE_SHIFT, VM_DMA_COHERENT, prot);
+ vaddr = vmap(pages, PAGE_ALIGN(size) >> PAGE_SHIFT,
+ VM_DMA_COHERENT, prot);
if (vaddr)
find_vm_area(vaddr)->pages = pages;
return vaddr;
@@ -37,7 +38,7 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
void *dma_common_contiguous_remap(struct page *page, size_t size,
pgprot_t prot, const void *caller)
{
- int count = size >> PAGE_SHIFT;
+ int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
struct page **pages;
void *vaddr;
int i;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index bb0862873dba..5f8b0c52fd2e 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -2199,7 +2199,7 @@ static void handle_swbp(struct pt_regs *regs)
if (!uprobe) {
if (is_swbp > 0) {
/* No matching uprobe; signal SIGTRAP. */
- send_sig(SIGTRAP, current, 0);
+ force_sig(SIGTRAP);
} else {
/*
* Either we raced with uprobe_unregister() or we can't
diff --git a/kernel/fork.c b/kernel/fork.c
index 142b23645d82..efc5493203ae 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1977,7 +1977,7 @@ static __latent_entropy struct task_struct *copy_process(
* to stop root fork bombs.
*/
retval = -EAGAIN;
- if (nr_threads >= max_threads)
+ if (data_race(nr_threads >= max_threads))
goto bad_fork_cleanup_count;
delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 761911168438..2a9fec53e159 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -195,9 +195,9 @@ void irq_set_thread_affinity(struct irq_desc *desc)
set_bit(IRQTF_AFFINITY, &action->thread_flags);
}
+#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
static void irq_validate_effective_affinity(struct irq_data *data)
{
-#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
const struct cpumask *m = irq_data_get_effective_affinity_mask(data);
struct irq_chip *chip = irq_data_get_irq_chip(data);
@@ -205,9 +205,19 @@ static void irq_validate_effective_affinity(struct irq_data *data)
return;
pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n",
chip->name, data->irq);
-#endif
}
+static inline void irq_init_effective_affinity(struct irq_data *data,
+ const struct cpumask *mask)
+{
+ cpumask_copy(irq_data_get_effective_affinity_mask(data), mask);
+}
+#else
+static inline void irq_validate_effective_affinity(struct irq_data *data) { }
+static inline void irq_init_effective_affinity(struct irq_data *data,
+ const struct cpumask *mask) { }
+#endif
+
int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
bool force)
{
@@ -304,6 +314,26 @@ static int irq_try_set_affinity(struct irq_data *data,
return ret;
}
+static bool irq_set_affinity_deactivated(struct irq_data *data,
+ const struct cpumask *mask, bool force)
+{
+ struct irq_desc *desc = irq_data_to_desc(data);
+
+ /*
+ * If the interrupt is not yet activated, just store the affinity
+ * mask and do not call the chip driver at all. On activation the
+ * driver has to make sure anyway that the interrupt is in a
+ * useable state so startup works.
+ */
+ if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || irqd_is_activated(data))
+ return false;
+
+ cpumask_copy(desc->irq_common_data.affinity, mask);
+ irq_init_effective_affinity(data, mask);
+ irqd_set(data, IRQD_AFFINITY_SET);
+ return true;
+}
+
int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
bool force)
{
@@ -314,6 +344,9 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask,
if (!chip || !chip->irq_set_affinity)
return -EINVAL;
+ if (irq_set_affinity_deactivated(data, mask, force))
+ return 0;
+
if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) {
ret = irq_try_set_affinity(data, mask, force);
} else {
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 16c8c605f4b0..bb14e64f62a4 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -644,19 +644,20 @@ static inline int kallsyms_for_perf(void)
* Otherwise, require CAP_SYSLOG (assuming kptr_restrict isn't set to
* block even that).
*/
-int kallsyms_show_value(void)
+bool kallsyms_show_value(const struct cred *cred)
{
switch (kptr_restrict) {
case 0:
if (kallsyms_for_perf())
- return 1;
+ return true;
/* fallthrough */
case 1:
- if (has_capability_noaudit(current, CAP_SYSLOG))
- return 1;
+ if (security_capable(cred, &init_user_ns, CAP_SYSLOG,
+ CAP_OPT_NOAUDIT) == 0)
+ return true;
/* fallthrough */
default:
- return 0;
+ return false;
}
}
@@ -673,7 +674,11 @@ static int kallsyms_open(struct inode *inode, struct file *file)
return -ENOMEM;
reset_iter(iter, 0);
- iter->show_value = kallsyms_show_value();
+ /*
+ * Instead of checking this on every s_show() call, cache
+ * the result here at open time.
+ */
+ iter->show_value = kallsyms_show_value(file->f_cred);
return 0;
}
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index bb05fd52de85..09cc78df53c6 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -181,34 +181,19 @@ void kimage_file_post_load_cleanup(struct kimage *image)
static int
kimage_validate_signature(struct kimage *image)
{
- const char *reason;
int ret;
ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
image->kernel_buf_len);
- switch (ret) {
- case 0:
- break;
+ if (ret) {
- /* Certain verification errors are non-fatal if we're not
- * checking errors, provided we aren't mandating that there
- * must be a valid signature.
- */
- case -ENODATA:
- reason = "kexec of unsigned image";
- goto decide;
- case -ENOPKG:
- reason = "kexec of image with unsupported crypto";
- goto decide;
- case -ENOKEY:
- reason = "kexec of image with unavailable key";
- decide:
if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) {
- pr_notice("%s rejected\n", reason);
+ pr_notice("Enforced kernel signature verification failed (%d).\n", ret);
return ret;
}
- /* If IMA is guaranteed to appraise a signature on the kexec
+ /*
+ * If IMA is guaranteed to appraise a signature on the kexec
* image, permit it even if the kernel is otherwise locked
* down.
*/
@@ -216,17 +201,10 @@ kimage_validate_signature(struct kimage *image)
security_locked_down(LOCKDOWN_KEXEC))
return -EPERM;
- return 0;
-
- /* All other errors are fatal, including nomem, unparseable
- * signatures and signature check failures - even if signatures
- * aren't required.
- */
- default:
- pr_notice("kernel signature verification failed (%d).\n", ret);
+ pr_debug("kernel signature verification failed (%d).\n", ret);
}
- return ret;
+ return 0;
}
#endif
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 50cd84f53df0..2e97febeef77 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -46,6 +46,11 @@
static int kprobes_initialized;
+/* kprobe_table can be accessed by
+ * - Normal hlist traversal and RCU add/del under kprobe_mutex is held.
+ * Or
+ * - RCU hlist traversal under disabling preempt (breakpoint handlers)
+ */
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
@@ -326,7 +331,8 @@ struct kprobe *get_kprobe(void *addr)
struct kprobe *p;
head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
- hlist_for_each_entry_rcu(p, head, hlist) {
+ hlist_for_each_entry_rcu(p, head, hlist,
+ lockdep_is_held(&kprobe_mutex)) {
if (p->addr == addr)
return p;
}
@@ -586,11 +592,12 @@ static void kprobe_optimizer(struct work_struct *work)
mutex_unlock(&module_mutex);
mutex_unlock(&text_mutex);
cpus_read_unlock();
- mutex_unlock(&kprobe_mutex);
/* Step 5: Kick optimizer again if needed */
if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
kick_kprobe_optimizer();
+
+ mutex_unlock(&kprobe_mutex);
}
/* Wait for completing optimization and unoptimization */
@@ -668,8 +675,6 @@ static void force_unoptimize_kprobe(struct optimized_kprobe *op)
lockdep_assert_cpus_held();
arch_unoptimize_kprobe(op);
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
- if (kprobe_disabled(&op->kp))
- arch_disarm_kprobe(&op->kp);
}
/* Unoptimize a kprobe if p is optimized */
@@ -849,7 +854,7 @@ static void optimize_all_kprobes(void)
kprobes_allow_optimization = true;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
- hlist_for_each_entry_rcu(p, head, hlist)
+ hlist_for_each_entry(p, head, hlist)
if (!kprobe_disabled(p))
optimize_kprobe(p);
}
@@ -876,7 +881,7 @@ static void unoptimize_all_kprobes(void)
kprobes_allow_optimization = false;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
- hlist_for_each_entry_rcu(p, head, hlist) {
+ hlist_for_each_entry(p, head, hlist) {
if (!kprobe_disabled(p))
unoptimize_kprobe(p, false);
}
@@ -1236,6 +1241,26 @@ __releases(hlist_lock)
}
NOKPROBE_SYMBOL(kretprobe_table_unlock);
+struct kprobe kprobe_busy = {
+ .addr = (void *) get_kprobe,
+};
+
+void kprobe_busy_begin(void)
+{
+ struct kprobe_ctlblk *kcb;
+
+ preempt_disable();
+ __this_cpu_write(current_kprobe, &kprobe_busy);
+ kcb = get_kprobe_ctlblk();
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+}
+
+void kprobe_busy_end(void)
+{
+ __this_cpu_write(current_kprobe, NULL);
+ preempt_enable();
+}
+
/*
* This function is called from finish_task_switch when task tk becomes dead,
* so that we can recycle any function-return probe instances associated
@@ -1253,6 +1278,8 @@ void kprobe_flush_task(struct task_struct *tk)
/* Early boot. kretprobe_table_locks not yet initialized. */
return;
+ kprobe_busy_begin();
+
INIT_HLIST_HEAD(&empty_rp);
hash = hash_ptr(tk, KPROBE_HASH_BITS);
head = &kretprobe_inst_table[hash];
@@ -1266,6 +1293,8 @@ void kprobe_flush_task(struct task_struct *tk)
hlist_del(&ri->hlist);
kfree(ri);
}
+
+ kprobe_busy_end();
}
NOKPROBE_SYMBOL(kprobe_flush_task);
@@ -1499,12 +1528,14 @@ static struct kprobe *__get_valid_kprobe(struct kprobe *p)
{
struct kprobe *ap, *list_p;
+ lockdep_assert_held(&kprobe_mutex);
+
ap = get_kprobe(p->addr);
if (unlikely(!ap))
return NULL;
if (p != ap) {
- list_for_each_entry_rcu(list_p, &ap->list, list)
+ list_for_each_entry(list_p, &ap->list, list)
if (list_p == p)
/* kprobe p is a valid probe */
goto valid;
@@ -1669,7 +1700,9 @@ static int aggr_kprobe_disabled(struct kprobe *ap)
{
struct kprobe *kp;
- list_for_each_entry_rcu(kp, &ap->list, list)
+ lockdep_assert_held(&kprobe_mutex);
+
+ list_for_each_entry(kp, &ap->list, list)
if (!kprobe_disabled(kp))
/*
* There is an active probe on the list.
@@ -1748,7 +1781,7 @@ static int __unregister_kprobe_top(struct kprobe *p)
else {
/* If disabling probe has special handlers, update aggrprobe */
if (p->post_handler && !kprobe_gone(p)) {
- list_for_each_entry_rcu(list_p, &ap->list, list) {
+ list_for_each_entry(list_p, &ap->list, list) {
if ((list_p != p) && (list_p->post_handler))
goto noclean;
}
@@ -2062,13 +2095,15 @@ static void kill_kprobe(struct kprobe *p)
{
struct kprobe *kp;
+ lockdep_assert_held(&kprobe_mutex);
+
p->flags |= KPROBE_FLAG_GONE;
if (kprobe_aggrprobe(p)) {
/*
* If this is an aggr_kprobe, we have to list all the
* chained probes and mark them GONE.
*/
- list_for_each_entry_rcu(kp, &p->list, list)
+ list_for_each_entry(kp, &p->list, list)
kp->flags |= KPROBE_FLAG_GONE;
p->post_handler = NULL;
kill_optimized_kprobe(p);
@@ -2312,7 +2347,7 @@ static int kprobes_module_callback(struct notifier_block *nb,
mutex_lock(&kprobe_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
- hlist_for_each_entry_rcu(p, head, hlist)
+ hlist_for_each_entry(p, head, hlist)
if (within_module_init((unsigned long)p->addr, mod) ||
(checkcore &&
within_module_core((unsigned long)p->addr, mod))) {
@@ -2413,7 +2448,7 @@ static void report_probe(struct seq_file *pi, struct kprobe *p,
else
kprobe_type = "k";
- if (!kallsyms_show_value())
+ if (!kallsyms_show_value(pi->file->f_cred))
addr = NULL;
if (sym)
@@ -2505,7 +2540,7 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
* If /proc/kallsyms is not showing kernel address, we won't
* show them here either.
*/
- if (!kallsyms_show_value())
+ if (!kallsyms_show_value(m->file->f_cred))
seq_printf(m, "0x%px-0x%px\t%ps\n", NULL, NULL,
(void *)ent->start_addr);
else
@@ -2550,7 +2585,7 @@ static int arm_all_kprobes(void)
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
/* Arm all kprobes on a best-effort basis */
- hlist_for_each_entry_rcu(p, head, hlist) {
+ hlist_for_each_entry(p, head, hlist) {
if (!kprobe_disabled(p)) {
err = arm_kprobe(p);
if (err) {
@@ -2593,7 +2628,7 @@ static int disarm_all_kprobes(void)
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
/* Disarm all kprobes on a best-effort basis */
- hlist_for_each_entry_rcu(p, head, hlist) {
+ hlist_for_each_entry(p, head, hlist) {
if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
err = disarm_kprobe(p, false);
if (err) {
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 8e3d2d7fdf5e..132f84a5fde3 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -201,7 +201,7 @@ void *kthread_probe_data(struct task_struct *task)
struct kthread *kthread = to_kthread(task);
void *data = NULL;
- probe_kernel_read(&data, &kthread->data, sizeof(data));
+ copy_from_kernel_nofault(&data, &kthread->data, sizeof(data));
return data;
}
diff --git a/kernel/module.c b/kernel/module.c
index e8a198588f26..aa183c9ac0a2 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1510,8 +1510,7 @@ static inline bool sect_empty(const Elf_Shdr *sect)
}
struct module_sect_attr {
- struct module_attribute mattr;
- char *name;
+ struct bin_attribute battr;
unsigned long address;
};
@@ -1521,13 +1520,18 @@ struct module_sect_attrs {
struct module_sect_attr attrs[];
};
-static ssize_t module_sect_show(struct module_attribute *mattr,
- struct module_kobject *mk, char *buf)
+static ssize_t module_sect_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *battr,
+ char *buf, loff_t pos, size_t count)
{
struct module_sect_attr *sattr =
- container_of(mattr, struct module_sect_attr, mattr);
- return sprintf(buf, "0x%px\n", kptr_restrict < 2 ?
- (void *)sattr->address : NULL);
+ container_of(battr, struct module_sect_attr, battr);
+
+ if (pos != 0)
+ return -EINVAL;
+
+ return sprintf(buf, "0x%px\n",
+ kallsyms_show_value(file->f_cred) ? (void *)sattr->address : NULL);
}
static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
@@ -1535,7 +1539,7 @@ static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
unsigned int section;
for (section = 0; section < sect_attrs->nsections; section++)
- kfree(sect_attrs->attrs[section].name);
+ kfree(sect_attrs->attrs[section].battr.attr.name);
kfree(sect_attrs);
}
@@ -1544,42 +1548,41 @@ static void add_sect_attrs(struct module *mod, const struct load_info *info)
unsigned int nloaded = 0, i, size[2];
struct module_sect_attrs *sect_attrs;
struct module_sect_attr *sattr;
- struct attribute **gattr;
+ struct bin_attribute **gattr;
/* Count loaded sections and allocate structures */
for (i = 0; i < info->hdr->e_shnum; i++)
if (!sect_empty(&info->sechdrs[i]))
nloaded++;
size[0] = ALIGN(struct_size(sect_attrs, attrs, nloaded),
- sizeof(sect_attrs->grp.attrs[0]));
- size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.attrs[0]);
+ sizeof(sect_attrs->grp.bin_attrs[0]));
+ size[1] = (nloaded + 1) * sizeof(sect_attrs->grp.bin_attrs[0]);
sect_attrs = kzalloc(size[0] + size[1], GFP_KERNEL);
if (sect_attrs == NULL)
return;
/* Setup section attributes. */
sect_attrs->grp.name = "sections";
- sect_attrs->grp.attrs = (void *)sect_attrs + size[0];
+ sect_attrs->grp.bin_attrs = (void *)sect_attrs + size[0];
sect_attrs->nsections = 0;
sattr = &sect_attrs->attrs[0];
- gattr = &sect_attrs->grp.attrs[0];
+ gattr = &sect_attrs->grp.bin_attrs[0];
for (i = 0; i < info->hdr->e_shnum; i++) {
Elf_Shdr *sec = &info->sechdrs[i];
if (sect_empty(sec))
continue;
+ sysfs_bin_attr_init(&sattr->battr);
sattr->address = sec->sh_addr;
- sattr->name = kstrdup(info->secstrings + sec->sh_name,
- GFP_KERNEL);
- if (sattr->name == NULL)
+ sattr->battr.attr.name =
+ kstrdup(info->secstrings + sec->sh_name, GFP_KERNEL);
+ if (sattr->battr.attr.name == NULL)
goto out;
sect_attrs->nsections++;
- sysfs_attr_init(&sattr->mattr.attr);
- sattr->mattr.show = module_sect_show;
- sattr->mattr.store = NULL;
- sattr->mattr.attr.name = sattr->name;
- sattr->mattr.attr.mode = S_IRUSR;
- *(gattr++) = &(sattr++)->mattr.attr;
+ sattr->battr.read = module_sect_read;
+ sattr->battr.size = 3 /* "0x", "\n" */ + (BITS_PER_LONG / 4);
+ sattr->battr.attr.mode = 0400;
+ *(gattr++) = &(sattr++)->battr;
}
*gattr = NULL;
@@ -1669,7 +1672,7 @@ static void add_notes_attrs(struct module *mod, const struct load_info *info)
continue;
if (info->sechdrs[i].sh_type == SHT_NOTE) {
sysfs_bin_attr_init(nattr);
- nattr->attr.name = mod->sect_attrs->attrs[loaded].name;
+ nattr->attr.name = mod->sect_attrs->attrs[loaded].battr.attr.name;
nattr->attr.mode = S_IRUGO;
nattr->size = info->sechdrs[i].sh_size;
nattr->private = (void *) info->sechdrs[i].sh_addr;
@@ -2783,7 +2786,9 @@ static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug)
void * __weak module_alloc(unsigned long size)
{
- return vmalloc_exec(size);
+ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
+ GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
+ NUMA_NO_NODE, __builtin_return_address(0));
}
bool __weak module_init_section(const char *name)
@@ -4377,7 +4382,7 @@ static int modules_open(struct inode *inode, struct file *file)
if (!err) {
struct seq_file *m = file->private_data;
- m->private = kallsyms_show_value() ? NULL : (void *)8ul;
+ m->private = kallsyms_show_value(file->f_cred) ? NULL : (void *)8ul;
}
return err;
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index b03df67621d0..cd356630a311 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -531,7 +531,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, flags)
} else if (!IS_ERR(pidfd_pid(file))) {
err = check_setns_flags(flags);
} else {
- err = -EBADF;
+ err = -EINVAL;
}
if (err)
goto out;
diff --git a/kernel/padata.c b/kernel/padata.c
index 29fc5d87a4cd..4373f7adaa40 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -335,7 +335,7 @@ static void padata_reorder(struct parallel_data *pd)
*
* Ensure reorder queue is read after pd->lock is dropped so we see
* new objects from another task in padata_do_serial. Pairs with
- * smp_mb__after_atomic in padata_do_serial.
+ * smp_mb in padata_do_serial.
*/
smp_mb();
@@ -418,7 +418,7 @@ void padata_do_serial(struct padata_priv *padata)
* with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
* in padata_reorder.
*/
- smp_mb__after_atomic();
+ smp_mb();
padata_reorder(pd);
}
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 8c14835be46c..b71eaf5f5a86 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -974,16 +974,6 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
user->idx = log_next_idx;
user->seq = log_next_seq;
break;
- case SEEK_CUR:
- /*
- * It isn't supported due to the record nature of this
- * interface: _SET _DATA and _END point to very specific
- * record positions, while _CUR would be more useful in case
- * of a byte-based log. Because of that, return the default
- * errno value for invalid seek operation.
- */
- ret = -ESPIPE;
- break;
default:
ret = -EINVAL;
}
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 16dd1e6b7c09..9eb39c20082c 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -723,7 +723,7 @@ kfree_perf_init(void)
schedule_timeout_uninterruptible(1);
}
- pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
+ pr_alert("kfree object size=%zu\n", kfree_mult * sizeof(struct kfree_obj));
kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
GFP_KERNEL);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c716eadc7617..6c6569e0586c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -250,7 +250,7 @@ static noinstr void rcu_dynticks_eqs_enter(void)
* next idle sojourn.
*/
rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
- seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
+ seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
// RCU is no longer watching. Better be in extended quiescent state!
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
(seq & RCU_DYNTICK_CTRL_CTR));
@@ -274,13 +274,13 @@ static noinstr void rcu_dynticks_eqs_exit(void)
* and we also must force ordering with the next RCU read-side
* critical section.
*/
- seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
+ seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
// RCU is now watching. Better not be in an extended quiescent state!
rcu_dynticks_task_trace_exit(); // After ->dynticks update!
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
!(seq & RCU_DYNTICK_CTRL_CTR));
if (seq & RCU_DYNTICK_CTRL_MASK) {
- atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
+ arch_atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
smp_mb__after_atomic(); /* _exit after clearing mask. */
}
}
@@ -313,7 +313,7 @@ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
- return !(atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR);
+ return !(arch_atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR);
}
/*
@@ -633,6 +633,10 @@ static noinstr void rcu_eqs_enter(bool user)
do_nocb_deferred_wakeup(rdp);
rcu_prepare_for_idle();
rcu_preempt_deferred_qs(current);
+
+ // instrumentation for the noinstr rcu_dynticks_eqs_enter()
+ instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
+
instrumentation_end();
WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
// RCU is watching here ...
@@ -692,6 +696,7 @@ noinstr void rcu_nmi_exit(void)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+ instrumentation_begin();
/*
* Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
* (We are exiting an NMI handler, so RCU better be paying attention
@@ -705,7 +710,6 @@ noinstr void rcu_nmi_exit(void)
* leave it in non-RCU-idle state.
*/
if (rdp->dynticks_nmi_nesting != 1) {
- instrumentation_begin();
trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2,
atomic_read(&rdp->dynticks));
WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
@@ -714,13 +718,15 @@ noinstr void rcu_nmi_exit(void)
return;
}
- instrumentation_begin();
/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
if (!in_nmi())
rcu_prepare_for_idle();
+
+ // instrumentation for the noinstr rcu_dynticks_eqs_enter()
+ instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
instrumentation_end();
// RCU is watching here ...
@@ -838,6 +844,10 @@ static void noinstr rcu_eqs_exit(bool user)
rcu_dynticks_eqs_exit();
// ... but is watching here.
instrumentation_begin();
+
+ // instrumentation for the noinstr rcu_dynticks_eqs_exit()
+ instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
+
rcu_cleanup_after_idle();
trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
@@ -983,13 +993,21 @@ noinstr void rcu_nmi_enter(void)
if (!in_nmi())
rcu_cleanup_after_idle();
+ instrumentation_begin();
+ // instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
+ instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks));
+ // instrumentation for the noinstr rcu_dynticks_eqs_exit()
+ instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
+
incby = 1;
} else if (!in_nmi()) {
instrumentation_begin();
rcu_irq_enter_check_tick();
instrumentation_end();
+ } else {
+ instrumentation_begin();
}
- instrumentation_begin();
+
trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
rdp->dynticks_nmi_nesting,
rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks));
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8f360326861e..2142c6767682 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1311,9 +1311,6 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
void activate_task(struct rq *rq, struct task_struct *p, int flags)
{
- if (task_contributes_to_load(p))
- rq->nr_uninterruptible--;
-
enqueue_task(rq, p, flags);
p->on_rq = TASK_ON_RQ_QUEUED;
@@ -1323,9 +1320,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
{
p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING;
- if (task_contributes_to_load(p))
- rq->nr_uninterruptible++;
-
dequeue_task(rq, p, flags);
}
@@ -1637,7 +1631,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
goto out;
}
- if (cpumask_equal(p->cpus_ptr, new_mask))
+ if (cpumask_equal(&p->cpus_mask, new_mask))
goto out;
/*
@@ -2236,10 +2230,10 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
lockdep_assert_held(&rq->lock);
-#ifdef CONFIG_SMP
if (p->sched_contributes_to_load)
rq->nr_uninterruptible--;
+#ifdef CONFIG_SMP
if (wake_flags & WF_MIGRATED)
en_flags |= ENQUEUE_MIGRATED;
#endif
@@ -2293,8 +2287,15 @@ void sched_ttwu_pending(void *arg)
rq_lock_irqsave(rq, &rf);
update_rq_clock(rq);
- llist_for_each_entry_safe(p, t, llist, wake_entry)
+ llist_for_each_entry_safe(p, t, llist, wake_entry.llist) {
+ if (WARN_ON_ONCE(p->on_cpu))
+ smp_cond_load_acquire(&p->on_cpu, !VAL);
+
+ if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
+ set_task_cpu(p, cpu_of(rq));
+
ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
+ }
rq_unlock_irqrestore(rq, &rf);
}
@@ -2322,7 +2323,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags
p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
WRITE_ONCE(rq->ttwu_pending, 1);
- __smp_call_single_queue(cpu, &p->wake_entry);
+ __smp_call_single_queue(cpu, &p->wake_entry.llist);
}
void wake_up_if_idle(int cpu)
@@ -2369,7 +2370,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
* the soon-to-be-idle CPU as the current CPU is likely busy.
* nr_running is checked to avoid unnecessary task stacking.
*/
- if ((wake_flags & WF_ON_RQ) && cpu_rq(cpu)->nr_running <= 1)
+ if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
return true;
return false;
@@ -2378,6 +2379,9 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
{
if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
+ if (WARN_ON_ONCE(cpu == smp_processor_id()))
+ return false;
+
sched_clock_cpu(cpu); /* Sync clocks across CPUs */
__ttwu_queue_wakelist(p, cpu, wake_flags);
return true;
@@ -2528,7 +2532,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
goto out;
success = 1;
- cpu = task_cpu(p);
trace_sched_waking(p);
p->state = TASK_RUNNING;
trace_sched_wakeup(p);
@@ -2550,7 +2553,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
/* We're going to change ->state: */
success = 1;
- cpu = task_cpu(p);
/*
* Ensure we load p->on_rq _after_ p->state, otherwise it would
@@ -2575,7 +2577,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* A similar smb_rmb() lives in try_invoke_on_locked_down_task().
*/
smp_rmb();
- if (p->on_rq && ttwu_remote(p, wake_flags))
+ if (READ_ONCE(p->on_rq) && ttwu_remote(p, wake_flags))
goto unlock;
if (p->in_iowait) {
@@ -2584,9 +2586,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
}
#ifdef CONFIG_SMP
- p->sched_contributes_to_load = !!task_contributes_to_load(p);
- p->state = TASK_WAKING;
-
/*
* Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
* possible to, falsely, observe p->on_cpu == 0.
@@ -2605,8 +2604,20 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
*
* Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
* __schedule(). See the comment for smp_mb__after_spinlock().
+ *
+ * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
+ * schedule()'s deactivate_task() has 'happened' and p will no longer
+ * care about it's own p->state. See the comment in __schedule().
*/
- smp_rmb();
+ smp_acquire__after_ctrl_dep();
+
+ /*
+ * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
+ * == 0), which means we need to do an enqueue, change p->state to
+ * TASK_WAKING such that we can unlock p->pi_lock before doing the
+ * enqueue, such as ttwu_queue_wakelist().
+ */
+ p->state = TASK_WAKING;
/*
* If the owning (remote) CPU is still in the middle of schedule() with
@@ -2614,8 +2625,21 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* which potentially sends an IPI instead of spinning on p->on_cpu to
* let the waker make forward progress. This is safe because IRQs are
* disabled and the IPI will deliver after on_cpu is cleared.
+ *
+ * Ensure we load task_cpu(p) after p->on_cpu:
+ *
+ * set_task_cpu(p, cpu);
+ * STORE p->cpu = @cpu
+ * __schedule() (switch to task 'p')
+ * LOCK rq->lock
+ * smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu)
+ * STORE p->on_cpu = 1 LOAD p->cpu
+ *
+ * to ensure we observe the correct CPU on which the task is currently
+ * scheduling.
*/
- if (READ_ONCE(p->on_cpu) && ttwu_queue_wakelist(p, cpu, wake_flags | WF_ON_RQ))
+ if (smp_load_acquire(&p->on_cpu) &&
+ ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
goto unlock;
/*
@@ -2635,6 +2659,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
psi_ttwu_dequeue(p);
set_task_cpu(p, cpu);
}
+#else
+ cpu = task_cpu(p);
#endif /* CONFIG_SMP */
ttwu_queue(p, cpu, wake_flags);
@@ -2642,7 +2668,7 @@ unlock:
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
out:
if (success)
- ttwu_stat(p, cpu, wake_flags);
+ ttwu_stat(p, task_cpu(p), wake_flags);
preempt_enable();
return success;
@@ -2763,7 +2789,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
#endif
init_numa_balancing(clone_flags, p);
#ifdef CONFIG_SMP
- p->wake_entry_type = CSD_TYPE_TTWU;
+ p->wake_entry.u_flags = CSD_TYPE_TTWU;
#endif
}
@@ -2939,6 +2965,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
* Silence PROVE_RCU.
*/
raw_spin_lock_irqsave(&p->pi_lock, flags);
+ rseq_migrate(p);
/*
* We're setting the CPU for the first time, we don't migrate,
* so use __set_task_cpu().
@@ -3003,6 +3030,7 @@ void wake_up_new_task(struct task_struct *p)
* as we're not fully set-up yet.
*/
p->recent_used_cpu = task_cpu(p);
+ rseq_migrate(p);
__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
#endif
rq = __task_rq_lock(p, &rf);
@@ -4074,6 +4102,7 @@ static void __sched notrace __schedule(bool preempt)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
+ unsigned long prev_state;
struct rq_flags rf;
struct rq *rq;
int cpu;
@@ -4093,9 +4122,16 @@ static void __sched notrace __schedule(bool preempt)
/*
* Make sure that signal_pending_state()->signal_pending() below
* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
- * done by the caller to avoid the race with signal_wake_up().
+ * done by the caller to avoid the race with signal_wake_up():
*
- * The membarrier system call requires a full memory barrier
+ * __set_current_state(@state) signal_wake_up()
+ * schedule() set_tsk_thread_flag(p, TIF_SIGPENDING)
+ * wake_up_state(p, state)
+ * LOCK rq->lock LOCK p->pi_state
+ * smp_mb__after_spinlock() smp_mb__after_spinlock()
+ * if (signal_pending_state()) if (p->state & @state)
+ *
+ * Also, the membarrier system call requires a full memory barrier
* after coming from user-space, before storing to rq->curr.
*/
rq_lock(rq, &rf);
@@ -4106,10 +4142,38 @@ static void __sched notrace __schedule(bool preempt)
update_rq_clock(rq);
switch_count = &prev->nivcsw;
- if (!preempt && prev->state) {
- if (signal_pending_state(prev->state, prev)) {
+
+ /*
+ * We must load prev->state once (task_struct::state is volatile), such
+ * that:
+ *
+ * - we form a control dependency vs deactivate_task() below.
+ * - ptrace_{,un}freeze_traced() can change ->state underneath us.
+ */
+ prev_state = prev->state;
+ if (!preempt && prev_state) {
+ if (signal_pending_state(prev_state, prev)) {
prev->state = TASK_RUNNING;
} else {
+ prev->sched_contributes_to_load =
+ (prev_state & TASK_UNINTERRUPTIBLE) &&
+ !(prev_state & TASK_NOLOAD) &&
+ !(prev->flags & PF_FROZEN);
+
+ if (prev->sched_contributes_to_load)
+ rq->nr_uninterruptible++;
+
+ /*
+ * __schedule() ttwu()
+ * prev_state = prev->state; if (p->on_rq && ...)
+ * if (prev_state) goto out;
+ * p->on_rq = 0; smp_acquire__after_ctrl_dep();
+ * p->state = TASK_WAKING
+ *
+ * Where __schedule() and ttwu() have matching control dependencies.
+ *
+ * After this, schedule() must not care about p->state any more.
+ */
deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
if (prev->in_iowait) {
@@ -4421,6 +4485,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
void *key)
{
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC);
return try_to_wake_up(curr->private, mode, wake_flags);
}
EXPORT_SYMBOL(default_wake_function);
@@ -4533,7 +4598,8 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
*/
if (dl_prio(prio)) {
if (!dl_prio(p->normal_prio) ||
- (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
+ (pi_task && dl_prio(pi_task->prio) &&
+ dl_entity_preempt(&pi_task->dl, &p->dl))) {
p->dl.dl_boosted = 1;
queue_flag |= ENQUEUE_REPLENISH;
} else
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 504d2f51b0d6..f63f337c7147 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2692,6 +2692,7 @@ void __dl_clear_params(struct task_struct *p)
dl_se->dl_bw = 0;
dl_se->dl_density = 0;
+ dl_se->dl_boosted = 0;
dl_se->dl_throttled = 0;
dl_se->dl_yielded = 0;
dl_se->dl_non_contending = 0;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cbcb2f71599b..04fa8dbcfa4d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -806,7 +806,7 @@ void post_init_entity_util_avg(struct task_struct *p)
}
}
- sa->runnable_avg = cpu_scale;
+ sa->runnable_avg = sa->util_avg;
if (p->sched_class != &fair_sched_class) {
/*
@@ -4039,7 +4039,11 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
return;
}
- rq->misfit_task_load = task_h_load(p);
+ /*
+ * Make sure that misfit_task_load will not be null even if
+ * task_h_load() returns 0.
+ */
+ rq->misfit_task_load = max_t(unsigned long, task_h_load(p), 1);
}
#else /* CONFIG_SMP */
@@ -7638,7 +7642,14 @@ static int detach_tasks(struct lb_env *env)
switch (env->migration_type) {
case migrate_load:
- load = task_h_load(p);
+ /*
+ * Depending of the number of CPUs and tasks and the
+ * cgroup hierarchy, task_h_load() can return a null
+ * value. Make sure that env->imbalance decreases
+ * otherwise detach_tasks() will stop only after
+ * detaching up to loop_max tasks.
+ */
+ load = max_t(unsigned long, task_h_load(p), 1);
if (sched_feat(LB_MIN) &&
load < 16 && !env->sd->nr_balance_failed)
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index 05deb81bb3e3..1ae95b9150d3 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -96,6 +96,15 @@ void __cpuidle default_idle_call(void)
}
}
+static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev)
+{
+ if (current_clr_polling_and_test())
+ return -EBUSY;
+
+ return cpuidle_enter_s2idle(drv, dev);
+}
+
static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
int next_state)
{
@@ -171,11 +180,9 @@ static void cpuidle_idle_call(void)
if (idle_should_enter_s2idle()) {
rcu_idle_enter();
- entered_state = cpuidle_enter_s2idle(drv, dev);
- if (entered_state > 0) {
- local_irq_enable();
+ entered_state = call_cpuidle_s2idle(drv, dev);
+ if (entered_state > 0)
goto exit_idle;
- }
rcu_idle_exit();
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1d4e94c1e5fe..877fb08eb1b0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1682,7 +1682,7 @@ static inline int task_on_rq_migrating(struct task_struct *p)
#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */
#define WF_FORK 0x02 /* Child wakeup after fork */
#define WF_MIGRATED 0x04 /* Internal use, task got migrated */
-#define WF_ON_RQ 0x08 /* Wakee is on_rq */
+#define WF_ON_CPU 0x08 /* Wakee is on_cpu */
/*
* To aid in avoiding the subversion of "niceness" due to uneven distribution
diff --git a/kernel/signal.c b/kernel/signal.c
index 5ca48cc5da76..ee22ec78fd6d 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2529,9 +2529,6 @@ bool get_signal(struct ksignal *ksig)
struct signal_struct *signal = current->signal;
int signr;
- if (unlikely(current->task_works))
- task_work_run();
-
if (unlikely(uprobe_deny_signal()))
return false;
@@ -2544,6 +2541,13 @@ bool get_signal(struct ksignal *ksig)
relock:
spin_lock_irq(&sighand->siglock);
+ current->jobctl &= ~JOBCTL_TASK_WORK;
+ if (unlikely(current->task_works)) {
+ spin_unlock_irq(&sighand->siglock);
+ task_work_run();
+ goto relock;
+ }
+
/*
* Every stopped thread goes here after wakeup. Check to see if
* we should notify the parent, prepare_signal(SIGCONT) encodes
diff --git a/kernel/smp.c b/kernel/smp.c
index 472c2b274c65..aa17eedff5be 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -669,24 +669,6 @@ void __init smp_init(void)
{
int num_nodes, num_cpus;
- /*
- * Ensure struct irq_work layout matches so that
- * flush_smp_call_function_queue() can do horrible things.
- */
- BUILD_BUG_ON(offsetof(struct irq_work, llnode) !=
- offsetof(struct __call_single_data, llist));
- BUILD_BUG_ON(offsetof(struct irq_work, func) !=
- offsetof(struct __call_single_data, func));
- BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
- offsetof(struct __call_single_data, flags));
-
- /*
- * Assert the CSD_TYPE_TTWU layout is similar enough
- * for task_struct to be on the @call_single_queue.
- */
- BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
- offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
-
idle_threads_init();
cpuhp_threads_init();
diff --git a/kernel/task_work.c b/kernel/task_work.c
index 825f28259a19..5c0848ca1287 100644
--- a/kernel/task_work.c
+++ b/kernel/task_work.c
@@ -25,9 +25,10 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
* 0 if succeeds or -ESRCH.
*/
int
-task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
+task_work_add(struct task_struct *task, struct callback_head *work, int notify)
{
struct callback_head *head;
+ unsigned long flags;
do {
head = READ_ONCE(task->task_works);
@@ -36,8 +37,19 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
work->next = head;
} while (cmpxchg(&task->task_works, head, work) != head);
- if (notify)
+ switch (notify) {
+ case TWA_RESUME:
set_notify_resume(task);
+ break;
+ case TWA_SIGNAL:
+ if (lock_task_sighand(task, &flags)) {
+ task->jobctl |= JOBCTL_TASK_WORK;
+ signal_wake_up(task, 0);
+ unlock_task_sighand(task, &flags);
+ }
+ break;
+ }
+
return 0;
}
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 398e6eadb861..df1ff803acc4 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -521,8 +521,8 @@ static int calc_wheel_index(unsigned long expires, unsigned long clk)
* Force expire obscene large timeouts to expire at the
* capacity limit of the wheel.
*/
- if (expires >= WHEEL_TIMEOUT_CUTOFF)
- expires = WHEEL_TIMEOUT_MAX;
+ if (delta >= WHEEL_TIMEOUT_CUTOFF)
+ expires = clk + WHEEL_TIMEOUT_MAX;
idx = calc_index(expires, LVL_DEPTH - 1);
}
@@ -584,7 +584,15 @@ trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer)
* Set the next expiry time and kick the CPU so it can reevaluate the
* wheel:
*/
- base->next_expiry = timer->expires;
+ if (time_before(timer->expires, base->clk)) {
+ /*
+ * Prevent from forward_timer_base() moving the base->clk
+ * backward
+ */
+ base->next_expiry = base->clk;
+ } else {
+ base->next_expiry = timer->expires;
+ }
wake_up_nohz_cpu(base->cpu);
}
@@ -896,10 +904,13 @@ static inline void forward_timer_base(struct timer_base *base)
* If the next expiry value is > jiffies, then we fast forward to
* jiffies otherwise we forward to the next expiry value.
*/
- if (time_after(base->next_expiry, jnow))
+ if (time_after(base->next_expiry, jnow)) {
base->clk = jnow;
- else
+ } else {
+ if (WARN_ON_ONCE(time_before(base->next_expiry, base->clk)))
+ return;
base->clk = base->next_expiry;
+ }
#endif
}
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 5773f0ba7e76..5ef0484513ec 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -3,6 +3,9 @@
* Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
*
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/blktrace_api.h>
@@ -344,7 +347,8 @@ static int __blk_trace_remove(struct request_queue *q)
{
struct blk_trace *bt;
- bt = xchg(&q->blk_trace, NULL);
+ bt = rcu_replace_pointer(q->blk_trace, NULL,
+ lockdep_is_held(&q->blk_trace_mutex));
if (!bt)
return -EINVAL;
@@ -494,6 +498,17 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
*/
strreplace(buts->name, '/', '_');
+ /*
+ * bdev can be NULL, as with scsi-generic, this is a helpful as
+ * we can be.
+ */
+ if (rcu_dereference_protected(q->blk_trace,
+ lockdep_is_held(&q->blk_trace_mutex))) {
+ pr_warn("Concurrent blktraces are not allowed on %s\n",
+ buts->name);
+ return -EBUSY;
+ }
+
bt = kzalloc(sizeof(*bt), GFP_KERNEL);
if (!bt)
return -ENOMEM;
@@ -543,10 +558,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
bt->pid = buts->pid;
bt->trace_state = Blktrace_setup;
- ret = -EBUSY;
- if (cmpxchg(&q->blk_trace, NULL, bt))
- goto err;
-
+ rcu_assign_pointer(q->blk_trace, bt);
get_probe_ref();
ret = 0;
@@ -1629,7 +1641,8 @@ static int blk_trace_remove_queue(struct request_queue *q)
{
struct blk_trace *bt;
- bt = xchg(&q->blk_trace, NULL);
+ bt = rcu_replace_pointer(q->blk_trace, NULL,
+ lockdep_is_held(&q->blk_trace_mutex));
if (bt == NULL)
return -EINVAL;
@@ -1661,10 +1674,7 @@ static int blk_trace_setup_queue(struct request_queue *q,
blk_trace_setup_lba(bt, bdev);
- ret = -EBUSY;
- if (cmpxchg(&q->blk_trace, NULL, bt))
- goto free_bt;
-
+ rcu_assign_pointer(q->blk_trace, bt);
get_probe_ref();
return 0;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e729c9e587a0..7bc3d6175868 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -141,7 +141,7 @@ bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr)
{
int ret;
- ret = probe_user_read(dst, unsafe_ptr, size);
+ ret = copy_from_user_nofault(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
memset(dst, 0, size);
return ret;
@@ -196,7 +196,7 @@ bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
if (unlikely(ret < 0))
goto fail;
- ret = probe_kernel_read(dst, unsafe_ptr, size);
+ ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
goto fail;
return ret;
@@ -241,7 +241,7 @@ bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
if (unlikely(ret < 0))
goto fail;
- return 0;
+ return ret;
fail:
memset(dst, 0, size);
return ret;
@@ -326,7 +326,7 @@ BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
if (unlikely(!nmi_uaccess_okay()))
return -EPERM;
- return probe_user_write(unsafe_ptr, src, size);
+ return copy_to_user_nofault(unsafe_ptr, src, size);
}
static const struct bpf_func_proto bpf_probe_write_user_proto = {
@@ -661,7 +661,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
copy_size = (fmt[i + 2] == '4') ? 4 : 16;
- err = probe_kernel_read(bufs->buf[memcpy_cnt],
+ err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt],
(void *) (long) args[fmt_cnt],
copy_size);
if (err < 0)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c163c3531faf..1903b80db6eb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2260,7 +2260,7 @@ ftrace_find_tramp_ops_next(struct dyn_ftrace *rec,
if (hash_contains_ip(ip, op->func_hash))
return op;
- }
+ }
return NULL;
}
@@ -3599,7 +3599,7 @@ static int t_show(struct seq_file *m, void *v)
if (direct)
seq_printf(m, "\n\tdirect-->%pS", (void *)direct);
}
- }
+ }
seq_putc(m, '\n');
@@ -7151,6 +7151,10 @@ static int pid_open(struct inode *inode, struct file *file, int type)
case TRACE_NO_PIDS:
seq_ops = &ftrace_no_pid_sops;
break;
+ default:
+ trace_array_put(tr);
+ WARN_ON_ONCE(1);
+ return -EINVAL;
}
ret = seq_open(file, seq_ops);
@@ -7229,6 +7233,10 @@ pid_write(struct file *filp, const char __user *ubuf,
other_pids = rcu_dereference_protected(tr->function_pids,
lockdep_is_held(&ftrace_lock));
break;
+ default:
+ ret = -EINVAL;
+ WARN_ON_ONCE(1);
+ goto out;
}
ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b8e1ca48be50..00867ff82412 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2427,7 +2427,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
if (unlikely(info->add_timestamp)) {
bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
- event = rb_add_time_stamp(event, info->delta, abs);
+ event = rb_add_time_stamp(event, abs ? info->delta : delta, abs);
length -= RB_LEN_TIME_EXTEND;
delta = 0;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ec44b0e2a19c..bb62269724d5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3570,7 +3570,6 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
void tracing_iter_reset(struct trace_iterator *iter, int cpu)
{
- struct ring_buffer_event *event;
struct ring_buffer_iter *buf_iter;
unsigned long entries = 0;
u64 ts;
@@ -3588,7 +3587,7 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
* that a reset never took place on a cpu. This is evident
* by the timestamp being before the start of the buffer.
*/
- while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
+ while (ring_buffer_iter_peek(buf_iter, &ts)) {
if (ts >= iter->array_buffer->time_start)
break;
entries++;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index def769df5bf1..13db4000af3f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -61,6 +61,9 @@ enum trace_type {
#undef __field_desc
#define __field_desc(type, container, item)
+#undef __field_packed
+#define __field_packed(type, container, item)
+
#undef __array
#define __array(type, item, size) type item[size];
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 9de29bb45a27..fa0fc08c6ef8 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -101,12 +101,16 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event)
kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN);
ret = kprobe_event_gen_cmd_start(&cmd, event, val);
- if (ret)
+ if (ret) {
+ pr_err("Failed to generate probe: %s\n", buf);
break;
+ }
ret = kprobe_event_gen_cmd_end(&cmd);
- if (ret)
+ if (ret) {
pr_err("Failed to add probe: %s\n", buf);
+ break;
+ }
}
return ret;
@@ -120,7 +124,7 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event)
}
#endif
-#ifdef CONFIG_HIST_TRIGGERS
+#ifdef CONFIG_SYNTH_EVENTS
static int __init
trace_boot_add_synth_event(struct xbc_node *node, const char *event)
{
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index a523da0dae0a..18c4a58aff79 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -78,8 +78,8 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
F_STRUCT(
__field_struct( struct ftrace_graph_ent, graph_ent )
- __field_desc( unsigned long, graph_ent, func )
- __field_desc( int, graph_ent, depth )
+ __field_packed( unsigned long, graph_ent, func )
+ __field_packed( int, graph_ent, depth )
),
F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
@@ -92,11 +92,11 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
F_STRUCT(
__field_struct( struct ftrace_graph_ret, ret )
- __field_desc( unsigned long, ret, func )
- __field_desc( unsigned long, ret, overrun )
- __field_desc( unsigned long long, ret, calltime)
- __field_desc( unsigned long long, ret, rettime )
- __field_desc( int, ret, depth )
+ __field_packed( unsigned long, ret, func )
+ __field_packed( unsigned long, ret, overrun )
+ __field_packed( unsigned long long, ret, calltime)
+ __field_packed( unsigned long long, ret, rettime )
+ __field_packed( int, ret, depth )
),
F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d",
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 3a74736da363..f725802160c0 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -216,11 +216,17 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file)
int trigger_process_regex(struct trace_event_file *file, char *buff)
{
- char *command, *next = buff;
+ char *command, *next;
struct event_command *p;
int ret = -EINVAL;
+ next = buff = skip_spaces(buff);
command = strsep(&next, ": \t");
+ if (next) {
+ next = skip_spaces(next);
+ if (!*next)
+ next = NULL;
+ }
command = (command[0] != '!') ? command : command + 1;
mutex_lock(&trigger_cmd_mutex);
@@ -630,8 +636,14 @@ event_trigger_callback(struct event_command *cmd_ops,
int ret;
/* separate the trigger from the filter (t:n [if filter]) */
- if (param && isdigit(param[0]))
+ if (param && isdigit(param[0])) {
trigger = strsep(&param, " \t");
+ if (param) {
+ param = skip_spaces(param);
+ if (!*param)
+ param = NULL;
+ }
+ }
trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
@@ -1368,6 +1380,11 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
trigger = strsep(&param, " \t");
if (!trigger)
return -EINVAL;
+ if (param) {
+ param = skip_spaces(param);
+ if (!*param)
+ param = NULL;
+ }
system = strsep(&trigger, ":");
if (!trigger)
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 77ce5a3b6773..70d3d0a09053 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -45,6 +45,9 @@ static int ftrace_event_register(struct trace_event_call *call,
#undef __field_desc
#define __field_desc(type, container, item) type item;
+#undef __field_packed
+#define __field_packed(type, container, item) type item;
+
#undef __array
#define __array(type, item, size) type item[size];
@@ -85,6 +88,13 @@ static void __always_unused ____ftrace_check_##name(void) \
.size = sizeof(_type), .align = __alignof__(_type), \
is_signed_type(_type), .filter_type = _filter_type },
+
+#undef __field_ext_packed
+#define __field_ext_packed(_type, _item, _filter_type) { \
+ .type = #_type, .name = #_item, \
+ .size = sizeof(_type), .align = 1, \
+ is_signed_type(_type), .filter_type = _filter_type },
+
#undef __field
#define __field(_type, _item) __field_ext(_type, _item, FILTER_OTHER)
@@ -94,6 +104,9 @@ static void __always_unused ____ftrace_check_##name(void) \
#undef __field_desc
#define __field_desc(_type, _container, _item) __field_ext(_type, _item, FILTER_OTHER)
+#undef __field_packed
+#define __field_packed(_type, _container, _item) __field_ext_packed(_type, _item, FILTER_OTHER)
+
#undef __array
#define __array(_type, _item, _len) { \
.type = #_type"["__stringify(_len)"]", .name = #_item, \
@@ -129,6 +142,9 @@ static struct trace_event_fields ftrace_event_fields_##name[] = { \
#undef __field_desc
#define __field_desc(type, container, item)
+#undef __field_packed
+#define __field_packed(type, container, item)
+
#undef __array
#define __array(type, item, len)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8a4c8d5c2c98..dd4dff71d89a 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -42,7 +42,7 @@ static int allocate_ftrace_ops(struct trace_array *tr)
if (!ops)
return -ENOMEM;
- /* Currently only the non stack verision is supported */
+ /* Currently only the non stack version is supported */
ops->func = function_trace_call;
ops->flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_PID;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 6048f1be26d2..aefb6065b508 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1222,7 +1222,7 @@ fetch_store_strlen(unsigned long addr)
#endif
do {
- ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
+ ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1);
len++;
} while (c && ret == 0 && len < MAX_STRING_SIZE);
@@ -1290,7 +1290,7 @@ probe_mem_read_user(void *dest, void *src, size_t size)
{
const void __user *uaddr = (__force const void __user *)src;
- return probe_user_read(dest, uaddr, size);
+ return copy_from_user_nofault(dest, uaddr, size);
}
static nokprobe_inline int
@@ -1300,7 +1300,7 @@ probe_mem_read(void *dest, void *src, size_t size)
if ((unsigned long)src < TASK_SIZE)
return probe_mem_read_user(dest, src, size);
#endif
- return probe_kernel_read(dest, src, size);
+ return copy_from_kernel_nofault(dest, src, size);
}
/* Note that we don't verify it, since the code does not come from user space */
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index b8a928e925c7..d2867ccc6aca 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -639,8 +639,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
ret = -EINVAL;
goto fail;
}
- if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) ||
- parg->count) {
+ if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM ||
+ code->op == FETCH_OP_DATA) || parg->count) {
/*
* IMM, DATA and COMM is pointing actual address, those
* must be kept, and if parg->count != 0, this is an
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index a0ff9e200ef6..a22b62813f8c 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -236,7 +236,7 @@ struct trace_probe_event {
struct trace_event_call call;
struct list_head files;
struct list_head probes;
- struct trace_uprobe_filter filter[0];
+ struct trace_uprobe_filter filter[];
};
struct trace_probe {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9fbe1e237563..c41c3c17b86a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4638,11 +4638,11 @@ void print_worker_info(const char *log_lvl, struct task_struct *task)
* Carefully copy the associated workqueue's workfn, name and desc.
* Keep the original last '\0' in case the original is garbage.
*/
- probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
- probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
- probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
- probe_kernel_read(name, wq->name, sizeof(name) - 1);
- probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
+ copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
+ copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
+ copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
+ copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
+ copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
if (fn || name[0] || desc[0]) {
printk("%sWorkqueue: %s %ps", log_lvl, name, fn);